Rubyando59
commited on
Commit
•
6ee389e
1
Parent(s):
b100ca0
Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +1167 -0
- config.json +32 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,1167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
datasets: []
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
library_name: sentence-transformers
|
6 |
+
license: apache-2.0
|
7 |
+
metrics:
|
8 |
+
- cosine_accuracy@1
|
9 |
+
- cosine_accuracy@3
|
10 |
+
- cosine_accuracy@5
|
11 |
+
- cosine_accuracy@10
|
12 |
+
- cosine_precision@1
|
13 |
+
- cosine_precision@3
|
14 |
+
- cosine_precision@5
|
15 |
+
- cosine_precision@10
|
16 |
+
- cosine_recall@1
|
17 |
+
- cosine_recall@3
|
18 |
+
- cosine_recall@5
|
19 |
+
- cosine_recall@10
|
20 |
+
- cosine_ndcg@10
|
21 |
+
- cosine_mrr@10
|
22 |
+
- cosine_map@100
|
23 |
+
pipeline_tag: sentence-similarity
|
24 |
+
tags:
|
25 |
+
- sentence-transformers
|
26 |
+
- sentence-similarity
|
27 |
+
- feature-extraction
|
28 |
+
- generated_from_trainer
|
29 |
+
- dataset_size:99145
|
30 |
+
- loss:MatryoshkaLoss
|
31 |
+
- loss:MultipleNegativesRankingLoss
|
32 |
+
widget:
|
33 |
+
- source_sentence: "YouTube provides people with entertainment, information, and opportunities\
|
34 |
+
\ to learn something new. Google Assistant \noffers the best way to get things\
|
35 |
+
\ done seamlessly across different devices, providing intelligent help throughout\
|
36 |
+
\ a \nperson's day, no matter where they are. Google Cloud helps customers solve\
|
37 |
+
\ today’s business challenges, improve \nproductivity, reduce costs, and unlock\
|
38 |
+
\ new growth engines. We are continually innovating and building new products\
|
39 |
+
\ \nand features that will help our users, partners, customers, and communities\
|
40 |
+
\ and have invested more than $150 billion \nin research and development in the\
|
41 |
+
\ last five years in support of these efforts .\nMaking AI H elpful for Everyone\n\
|
42 |
+
AI is a transformational technology that can bring meaningful and positive change\
|
43 |
+
\ to people and societies across \nthe world, and for our business. At Google,\
|
44 |
+
\ we have been bringing AI into our products and services for more than a \ndecade\
|
45 |
+
\ and making them available to our users. Our journey began in 2001, when machine\
|
46 |
+
\ learning was first \nincorporated into Google Search to suggest better spellings\
|
47 |
+
\ to users searching the web. Today, AI in our products is Table of Contents Alphabet\
|
48 |
+
\ Inc.\n4."
|
49 |
+
sentences:
|
50 |
+
- In what ways does Alphabet support the financial health of its employees?
|
51 |
+
- Analyze the potential impact of AI-driven tools on Google’s operational costs
|
52 |
+
and overall financial health.
|
53 |
+
- What strategies can companies implement to mitigate the financial risks associated
|
54 |
+
with problematic content?
|
55 |
+
- source_sentence: "Executive Overview\nThe following table summarizes our consolidated\
|
56 |
+
\ financial results (in millions, except for per share information \nand percentages):\n\
|
57 |
+
Year Ended December 31,\n2022 2023 $ Change % Change\nConsolidated revenues $\
|
58 |
+
\ 282,836 $ 307,394 $ 24,558 9 %\nChange in consolidated constant currency revenues(1)\
|
59 |
+
\ 10 %\nCost of revenues $ 126,203 $ 133,332 $ 7,129 6 %\nOperating expenses\
|
60 |
+
\ $ 81,791 $ 89,769 $ 7,978 10 %\nOperating income $ 74,842 $ 84,293 $ 9,451\
|
61 |
+
\ 13 %\nOperating margin 26 % 27 % 1 %\nOther income (expense), net $ (3,514)\
|
62 |
+
\ $ 1,424 $ 4,938 NM\nNet income $ 59,972 $ 73,795 $ 13,823 23 %\nDiluted EPS\
|
63 |
+
\ $ 4.56 $ 5.80 $ 1.24 27 %\nNM = Not Meaningful\n(1) See \"Use of Non-GAAP Constant\
|
64 |
+
\ Currency Information \" below for details relating to our use of constant currency\
|
65 |
+
\ information. \n•Revenues were $307.4 billion , an increase of 9% year over\
|
66 |
+
\ year, primarily driven by an increase in Google \nServices revenues of $19.0\
|
67 |
+
\ billion , or 8%, and an increase in Google Cloud revenues of $6.8 billion ,\
|
68 |
+
\ or 26%. \n•Total constant currency revenues, which exclude the effect of hedging,\
|
69 |
+
\ increased 10% year over year.\n•Cost of revenues was $133.3 billion , an increase\
|
70 |
+
\ of 6% year over year, primarily driven by increase s in content \nacquisition\
|
71 |
+
\ costs , compensation expenses, and TAC . The increase in compensation expenses\
|
72 |
+
\ included \ncharges related to employee severance associated with the reduction\
|
73 |
+
\ in our workforce . Additionally, cost of \nrevenues benefited from a reduction\
|
74 |
+
\ in depreciation due to the change in estimated useful lives of our servers \n\
|
75 |
+
and network equipment.\n•Operating expenses were $89.8 billion , an increase \
|
76 |
+
\ of 10% year over year , primarily driven by an increase in \ncompensation expenses\
|
77 |
+
\ and charges related to our office space optimization efforts . The increase\
|
78 |
+
\ in \ncompensation expenses was largely the result of charges related to employee\
|
79 |
+
\ severance associated with the \nreduction in our workforce and an increase\
|
80 |
+
\ in SBC expense. Operating expenses benefited from the change in \nthe estimated\
|
81 |
+
\ useful lives of our servers and certain network equipment.\nOther Information:\n\
|
82 |
+
•In January 2023, we announced a reduction of our workforce , and as a result\
|
83 |
+
\ we recorded employee \nseverance and related charges of $2.1 billion for the\
|
84 |
+
\ year ended December 31, 2023. In addition, we are \ntaking actions to optimize\
|
85 |
+
\ our global office space. As a result, exit charges recorded during the year\
|
86 |
+
\ ended \nDecember 31, 2023, were $1.8 billion . In addition to these exit charges,\
|
87 |
+
\ for the year ended December 31, \n2023, we incurred $269 million in accelerated\
|
88 |
+
\ rent and accelerated depreciation . For additional information, \nsee Note 8\
|
89 |
+
\ of the Notes to Consolidated Financial Statements included in Item 8 of this\
|
90 |
+
\ Annual Report on \nForm 10-K.\n•In January 2023, we completed an assessment\
|
91 |
+
\ of the useful lives of our servers and network equipment, \nresulting in a change\
|
92 |
+
\ in the estimated useful life of our servers and certain network equipment to\
|
93 |
+
\ six years. \nThe effect of this change was a reduction in depreciation expense\
|
94 |
+
\ of $3.9 billion for the year ended December \n31, 2023, recognized primarily\
|
95 |
+
\ in cost of revenues and R&D expenses. For additional information, see Note 1\
|
96 |
+
\ \nof the Notes to Consolidated Financial Statements included in Item 8 of this\
|
97 |
+
\ Annual Report on Form 10-K.Table of Contents Alphabet Inc.\n34."
|
98 |
+
sentences:
|
99 |
+
- How does Google’s investment in AI research align with its long-term financial
|
100 |
+
strategy and goals?
|
101 |
+
- What role do market and industry factors play in the fluctuation of stock prices,
|
102 |
+
regardless of a company's performance?
|
103 |
+
- What was the total consolidated revenue for the year ended December 31, 2023,
|
104 |
+
and how does it compare to the previous year?
|
105 |
+
- source_sentence: "Furthermore, failure to maintain and enhance our brands could\
|
106 |
+
\ harm our business, reputation, financial condition, \nand operating results.\
|
107 |
+
\ Our success will depend largely on our ability to remain a technology leader\
|
108 |
+
\ and continue to \nprovide high-quality, trustworthy, innovative products and\
|
109 |
+
\ services that are truly useful and play a valuable role in a \nrange of settings.\
|
110 |
+
\ \nWe face a number of manufacturing and supply chain risks that could harm our\
|
111 |
+
\ business, financial \ncondition, and operating results. \nWe face a number of\
|
112 |
+
\ risks related to manufacturing and supply chain management, which could affect\
|
113 |
+
\ our ability \nto supply both our products and our services. \nWe rely on contract\
|
114 |
+
\ manufacturers to manufacture or assemble our device s and servers and networking\
|
115 |
+
\ \nequipment used in our technical infrastructure, and we may supply the contract\
|
116 |
+
\ manufacturers with components to \nassemble t he device s and equipment. We\
|
117 |
+
\ also rely on other companies to participate in the supply of components and\
|
118 |
+
\ \ndistribution of our products and services. Our business could be negatively\
|
119 |
+
\ affected if we are not able to engage these \ncompanies with the necessary capabilities\
|
120 |
+
\ or capacity on reasonable terms, or if those we engage fail to meet their Table\
|
121 |
+
\ of Contents Alphabet Inc.\n13."
|
122 |
+
sentences:
|
123 |
+
- Discuss the impact of annual stock-based compensation (SBC) awards on Alphabet
|
124 |
+
Inc.'s financial reporting.
|
125 |
+
- What financial risks does Google face if it fails to comply with the General Data
|
126 |
+
Protection Regulation (GDPR)?
|
127 |
+
- How does the ability to provide innovative products and services correlate with
|
128 |
+
a company's revenue growth?
|
129 |
+
- source_sentence: "For example, in December 2023, a California jury delivered a verdict\
|
130 |
+
\ in Epic Games v. Google finding that Google \nviolated antitrust laws related\
|
131 |
+
\ to Google Play's billing practices. The presiding judge will determine remedies\
|
132 |
+
\ in 2024 \nand the range of potential remedies vary widely. We plan to appeal.\
|
133 |
+
\ In addition, the U.S. Department of Justice, \nvarious U.S. states, and other\
|
134 |
+
\ plaintiffs have filed several antitrust lawsuits about various aspects of our\
|
135 |
+
\ business, \nincluding our advertising technologies and practices, the operation\
|
136 |
+
\ and distribution of Google Search, and the \noperation and distribution of the\
|
137 |
+
\ Android operating system and Play Store. Other regulatory agencies in the U.S.\
|
138 |
+
\ and \naround the world, including competition enforcers, consumer protection\
|
139 |
+
\ agencies, and data protection authorities, have \nchallenged and may continue\
|
140 |
+
\ to challenge our business practices and compliance with laws and regulations.\
|
141 |
+
\ We are \ncooperating with these investigations and defending litigation or\
|
142 |
+
\ appealing decisions where appropriate. \nVarious laws, regulations, investigations,\
|
143 |
+
\ enforcement lawsuits, and regulatory actions have involved in the past , \n\
|
144 |
+
and may in the future result in substantial fines and penalties, injunctive relief,\
|
145 |
+
\ ongoing monitoring and auditing \nobligations, changes to our products and services,\
|
146 |
+
\ alterations to our business models and operations , including \ndivestiture\
|
147 |
+
\ , and collateral related civil litigation or other adverse consequences, all\
|
148 |
+
\ of which could harm our business, \nreputation, financial condition, and operating\
|
149 |
+
\ results. \nAny of these legal proceedings could result in legal costs, diversion\
|
150 |
+
\ of management resources, negative publicity \nand other harms to our business.\
|
151 |
+
\ Estimating liabilities for our pending proceedings is a complex, fact-specific\
|
152 |
+
\ , and \nspeculative process that requires significant judgment, and the amounts\
|
153 |
+
\ we are ultimately liable for may be less than or \nexceed our estimates. The\
|
154 |
+
\ resolution of one or more such proceedings has resulted in, and may in the future\
|
155 |
+
\ result in, \nadditional substantial fines, penalties, injunctions, and other\
|
156 |
+
\ sanctions that could harm our business, reputation, \nfinancial condition, and\
|
157 |
+
\ operating results. \nFor additional information about the ongoing material legal\
|
158 |
+
\ proceedings to which we are subject, see Legal \nProceedings in Part I, Item\
|
159 |
+
\ 3 of this Annual Report on Form 10-K.\nPrivacy, data protection, and data usage\
|
160 |
+
\ regulations are complex and rapidly evolving areas. Any failure \nor alleged\
|
161 |
+
\ failure to comply with these laws could harm our business, reputation, financial\
|
162 |
+
\ condition, and \noperating results. \nAuthorities around the world have adopted\
|
163 |
+
\ and are considering a number of legislative and regulatory proposals \nconcerning\
|
164 |
+
\ data protection, data usage, and encryption of user data. Adverse legal rulings,\
|
165 |
+
\ legislation, or regulation \nhave resulted in, and may continue to result in,\
|
166 |
+
\ fines and orders requiring that we change our practices, which have \nhad and\
|
167 |
+
\ could continue to have an adverse effect on how we provide services, harming\
|
168 |
+
\ our business, reputation, \nfinancial condition, and operating results. These\
|
169 |
+
\ laws and regulations are evolving and subject to interpretation, and \ncompliance\
|
170 |
+
\ obligations could cause us to incur substantial costs or harm the quality and\
|
171 |
+
\ operations of our products \nand services in ways that harm our business. Examples\
|
172 |
+
\ of these laws include : \n•The General Data Protection Regulation and the United\
|
173 |
+
\ Kingdom General Data Protection Regulations, which \napply to all of our activities\
|
174 |
+
\ conducted from an establishment in the EU or the United Kingdom, respectively,\
|
175 |
+
\ or \nrelated to products and services that we offer to EU or the United Kingdom\
|
176 |
+
\ users or customers, respectively, or \nthe monitoring of their behavior in the\
|
177 |
+
\ EU or the UK, respectively.\n•Various comprehensive U.S. state and foreign privacy\
|
178 |
+
\ laws, which give new data privacy rights to their \nrespective residents (including,\
|
179 |
+
\ in California, a private right of action in the event of a data breach resulting\
|
180 |
+
\ \nfrom our failure to implement and maintain reasonable security procedures\
|
181 |
+
\ and practices) and impose \nsignificant obligations on controllers and processors\
|
182 |
+
\ of consumer data.\n•State laws governing the processing of biometric information,\
|
183 |
+
\ such as the Illinois Biometric Information Privacy \nAct and the Texas Capture\
|
184 |
+
\ or Use of Biometric Identifier Act, which impose obligations on businesses that\
|
185 |
+
\ \ncollect or disclose consumer biometric information. \n•Various federal, state,\
|
186 |
+
\ and foreign laws governing how companies provide age appropriate experiences\
|
187 |
+
\ to \nchildren and minors, including the collection and processing of children\
|
188 |
+
\ and minor’s data. These include the \nChildren’s Online Privacy Protection Act\
|
189 |
+
\ of 1998, and the United Kingdom Age-Appropriate Design Code, all of \nwhich\
|
190 |
+
\ address the use and disclosure of the personal data of children and minors and\
|
191 |
+
\ impose obligations on \nonline services or products directed to or likely to\
|
192 |
+
\ be accessed by children. \n•The California Internet of Things Security Law,\
|
193 |
+
\ which regulates the security of data used in connection with \ninternet-connected\
|
194 |
+
\ devices."
|
195 |
+
sentences:
|
196 |
+
- What are the ethical challenges that may arise from the development of new AI
|
197 |
+
products and services?
|
198 |
+
- How might the California Internet of Things Security Law impose additional financial
|
199 |
+
obligations on Google?
|
200 |
+
- In the context of Google Services, what factors contribute to the competitive
|
201 |
+
nature of the device market, and how might these factors affect financial outcomes?
|
202 |
+
- source_sentence: "obligations (whether due to financial difficulties or other reasons),\
|
203 |
+
\ or make adverse changes in the pricing or other \nmaterial terms of our arrangements\
|
204 |
+
\ with them. \nWe have experienced and/or may in the future experience supply\
|
205 |
+
\ shortages, price increases, quality issues, and/\nor longer lead times that\
|
206 |
+
\ could negatively affect our operations, driven by raw material, component availability,\
|
207 |
+
\ \nmanufacturing capacity, labor shortages, industry allocations, logistics capacity,\
|
208 |
+
\ inflation, foreign currency exchange \nrates, tariffs, sanctions and export\
|
209 |
+
\ controls, trade disputes and barriers, forced labor concerns, sustainability\
|
210 |
+
\ sourcing \nrequirements, geopolitical tensions, armed conflicts, natural disasters\
|
211 |
+
\ or pandemics, the effects of climate change \n(such as sea level rise, drought,\
|
212 |
+
\ flooding, heat waves, wildfires and resultant air quality effects and power\
|
213 |
+
\ shutdowns \nassociated with wildfire prevention, and increased storm severity),\
|
214 |
+
\ power loss, and significant changes in the financial \nor business condition\
|
215 |
+
\ of our suppliers. Some of the components we use in our technical infrastructure\
|
216 |
+
\ and our device s \nare available from only one or limited sources, and we may\
|
217 |
+
\ not be able to find replacement vendors on favorable terms \nin the event of\
|
218 |
+
\ a supply chain disruption. A significant supply interruption that affects us\
|
219 |
+
\ or our vendors could delay \ncritical data center upgrades or expansions and\
|
220 |
+
\ delay consumer product availability . \nWe may enter into long-term contracts\
|
221 |
+
\ for materials and products that commit us to significant terms and \nconditions.\
|
222 |
+
\ We may face costs for materials and products that are not consumed due to market\
|
223 |
+
\ demand, technological \nchange, changed consumer preferences, quality, product\
|
224 |
+
\ recalls, and warranty issues. For instance, because certain of \nour hardware\
|
225 |
+
\ supply contracts have volume-based pricing or minimum purchase requirements,\
|
226 |
+
\ if the volume of sales \nof our devices decreases or does not reach projected\
|
227 |
+
\ targets, we could face increased materials and manufacturing \ncosts or other\
|
228 |
+
\ financial liabilities that could make our products more costly per unit to manufacture\
|
229 |
+
\ and harm our \nfinancial condition and operating results. Furthermore, certain\
|
230 |
+
\ of our competitors may negotiate more favorable \ncontractual terms based on\
|
231 |
+
\ volume and other commitments that may provide them with competitive advantages\
|
232 |
+
\ and \nmay affect our supply. \nOur device s have had, and in the future may\
|
233 |
+
\ have, quality issues resulting from design, manufacturing, or \noperations.\
|
234 |
+
\ Sometimes, these issues may be caused by components we purchase from other manufacturers\
|
235 |
+
\ or \nsuppliers. If the quality of our products and services does not meet expectations\
|
236 |
+
\ or our products or services are \ndefective or require a recall, it could harm\
|
237 |
+
\ our reputation, financial condition, and operating results. \nWe require our\
|
238 |
+
\ suppliers and business partners to comply with laws and, where applicable, our\
|
239 |
+
\ company policies \nand practices, such as the Google Supplier Code of Conduct,\
|
240 |
+
\ regarding workplace and employment practices, data \nsecurity, environmental\
|
241 |
+
\ compliance, and intellectual property licensing, but we do not control them\
|
242 |
+
\ or their practices. \nViolations of law or unethical business practices could\
|
243 |
+
\ result in supply chain disruptions, canceled orders, harm to key \nrelationships,\
|
244 |
+
\ and damage to our reputation. Their failure to procure necessary license rights\
|
245 |
+
\ to intellectual property \ncould affect our ability to sell our products or\
|
246 |
+
\ services and expose us to litigation or financial claims. \nInterruption to,\
|
247 |
+
\ interference with, or failure of our complex information technology and communications\
|
248 |
+
\ \nsystems could hurt our ability to effectively provide our products and services,\
|
249 |
+
\ which could harm our \nreputation, financial condition, and operating results.\
|
250 |
+
\ \nThe availability of our products and services and fulfillment of our customer\
|
251 |
+
\ contracts depend on the continuing \noperation of our information technology\
|
252 |
+
\ and communications systems. Our systems are vulnerable to damage, \ninterference,\
|
253 |
+
\ or interruption from modifications or upgrades, terrorist attacks, state-sponsored\
|
254 |
+
\ attacks, natural disasters \nor pandemics, geopolitical tensions or armed conflicts,\
|
255 |
+
\ export controls and sanctions, the effects of climate change \n(such as sea\
|
256 |
+
\ level rise, drought, flooding, heat waves, wildfires and resultant air quality\
|
257 |
+
\ effects and power shutdowns \nassociated with wildfire prevention, and increased\
|
258 |
+
\ storm severity), power loss, utility outages, telecommunications \nfailures,\
|
259 |
+
\ computer viruses, software bugs, ransomware attacks, supply-chain attacks, computer\
|
260 |
+
\ denial of service \nattacks, phishing schemes, or other attempts to harm or\
|
261 |
+
\ access our systems. Some of our data centers are located in \nareas with a high\
|
262 |
+
\ risk of major earthquakes or other natural disasters. Our data centers are also\
|
263 |
+
\ subject to break-ins, \nsabotage, and intentional acts of vandalism, and, in\
|
264 |
+
\ some cases, to potential disruptions resulting from problems \nexperienced by\
|
265 |
+
\ facility operators or disruptions as a result of geopolitical tensions and conflicts\
|
266 |
+
\ happening in the area. \nSome of our systems are not fully redundant, and disaster\
|
267 |
+
\ recovery planning cannot account for all eventualities. The \noccurrence of\
|
268 |
+
\ a natural disaster or pandemic, closure of a facility, or other unanticipated\
|
269 |
+
\ problems affecting our data \ncenters could result in lengthy interruptions\
|
270 |
+
\ in our service."
|
271 |
+
sentences:
|
272 |
+
- What are the implications of increased logistics capacity costs on a company's
|
273 |
+
overall financial performance?
|
274 |
+
- What are the potential risks associated with the company's reliance on consumer
|
275 |
+
subscription-based products for revenue?
|
276 |
+
- How might legal proceedings and regulatory scrutiny affect a company's financial
|
277 |
+
condition and operating results?
|
278 |
+
model-index:
|
279 |
+
- name: SUJET AI bge-base Finance Matryoshka
|
280 |
+
results:
|
281 |
+
- task:
|
282 |
+
type: information-retrieval
|
283 |
+
name: Information Retrieval
|
284 |
+
dataset:
|
285 |
+
name: dim 768
|
286 |
+
type: dim_768
|
287 |
+
metrics:
|
288 |
+
- type: cosine_accuracy@1
|
289 |
+
value: 0.015384615384615385
|
290 |
+
name: Cosine Accuracy@1
|
291 |
+
- type: cosine_accuracy@3
|
292 |
+
value: 0.04657342657342657
|
293 |
+
name: Cosine Accuracy@3
|
294 |
+
- type: cosine_accuracy@5
|
295 |
+
value: 0.06993006993006994
|
296 |
+
name: Cosine Accuracy@5
|
297 |
+
- type: cosine_accuracy@10
|
298 |
+
value: 0.13076923076923078
|
299 |
+
name: Cosine Accuracy@10
|
300 |
+
- type: cosine_precision@1
|
301 |
+
value: 0.015384615384615385
|
302 |
+
name: Cosine Precision@1
|
303 |
+
- type: cosine_precision@3
|
304 |
+
value: 0.015524475524475523
|
305 |
+
name: Cosine Precision@3
|
306 |
+
- type: cosine_precision@5
|
307 |
+
value: 0.013986013986013986
|
308 |
+
name: Cosine Precision@5
|
309 |
+
- type: cosine_precision@10
|
310 |
+
value: 0.013076923076923076
|
311 |
+
name: Cosine Precision@10
|
312 |
+
- type: cosine_recall@1
|
313 |
+
value: 0.015384615384615385
|
314 |
+
name: Cosine Recall@1
|
315 |
+
- type: cosine_recall@3
|
316 |
+
value: 0.04657342657342657
|
317 |
+
name: Cosine Recall@3
|
318 |
+
- type: cosine_recall@5
|
319 |
+
value: 0.06993006993006994
|
320 |
+
name: Cosine Recall@5
|
321 |
+
- type: cosine_recall@10
|
322 |
+
value: 0.13076923076923078
|
323 |
+
name: Cosine Recall@10
|
324 |
+
- type: cosine_ndcg@10
|
325 |
+
value: 0.0620726064588503
|
326 |
+
name: Cosine Ndcg@10
|
327 |
+
- type: cosine_mrr@10
|
328 |
+
value: 0.04157842157842149
|
329 |
+
name: Cosine Mrr@10
|
330 |
+
- type: cosine_map@100
|
331 |
+
value: 0.05757497178689022
|
332 |
+
name: Cosine Map@100
|
333 |
+
- task:
|
334 |
+
type: information-retrieval
|
335 |
+
name: Information Retrieval
|
336 |
+
dataset:
|
337 |
+
name: dim 512
|
338 |
+
type: dim_512
|
339 |
+
metrics:
|
340 |
+
- type: cosine_accuracy@1
|
341 |
+
value: 0.014965034965034965
|
342 |
+
name: Cosine Accuracy@1
|
343 |
+
- type: cosine_accuracy@3
|
344 |
+
value: 0.04531468531468531
|
345 |
+
name: Cosine Accuracy@3
|
346 |
+
- type: cosine_accuracy@5
|
347 |
+
value: 0.06713286713286713
|
348 |
+
name: Cosine Accuracy@5
|
349 |
+
- type: cosine_accuracy@10
|
350 |
+
value: 0.12755244755244755
|
351 |
+
name: Cosine Accuracy@10
|
352 |
+
- type: cosine_precision@1
|
353 |
+
value: 0.014965034965034965
|
354 |
+
name: Cosine Precision@1
|
355 |
+
- type: cosine_precision@3
|
356 |
+
value: 0.015104895104895105
|
357 |
+
name: Cosine Precision@3
|
358 |
+
- type: cosine_precision@5
|
359 |
+
value: 0.013426573426573427
|
360 |
+
name: Cosine Precision@5
|
361 |
+
- type: cosine_precision@10
|
362 |
+
value: 0.012755244755244756
|
363 |
+
name: Cosine Precision@10
|
364 |
+
- type: cosine_recall@1
|
365 |
+
value: 0.014965034965034965
|
366 |
+
name: Cosine Recall@1
|
367 |
+
- type: cosine_recall@3
|
368 |
+
value: 0.04531468531468531
|
369 |
+
name: Cosine Recall@3
|
370 |
+
- type: cosine_recall@5
|
371 |
+
value: 0.06713286713286713
|
372 |
+
name: Cosine Recall@5
|
373 |
+
- type: cosine_recall@10
|
374 |
+
value: 0.12755244755244755
|
375 |
+
name: Cosine Recall@10
|
376 |
+
- type: cosine_ndcg@10
|
377 |
+
value: 0.06036389249600748
|
378 |
+
name: Cosine Ndcg@10
|
379 |
+
- type: cosine_mrr@10
|
380 |
+
value: 0.04032722832722825
|
381 |
+
name: Cosine Mrr@10
|
382 |
+
- type: cosine_map@100
|
383 |
+
value: 0.05606060146944153
|
384 |
+
name: Cosine Map@100
|
385 |
+
- task:
|
386 |
+
type: information-retrieval
|
387 |
+
name: Information Retrieval
|
388 |
+
dataset:
|
389 |
+
name: dim 256
|
390 |
+
type: dim_256
|
391 |
+
metrics:
|
392 |
+
- type: cosine_accuracy@1
|
393 |
+
value: 0.012167832167832168
|
394 |
+
name: Cosine Accuracy@1
|
395 |
+
- type: cosine_accuracy@3
|
396 |
+
value: 0.04055944055944056
|
397 |
+
name: Cosine Accuracy@3
|
398 |
+
- type: cosine_accuracy@5
|
399 |
+
value: 0.06265734265734266
|
400 |
+
name: Cosine Accuracy@5
|
401 |
+
- type: cosine_accuracy@10
|
402 |
+
value: 0.11734265734265734
|
403 |
+
name: Cosine Accuracy@10
|
404 |
+
- type: cosine_precision@1
|
405 |
+
value: 0.012167832167832168
|
406 |
+
name: Cosine Precision@1
|
407 |
+
- type: cosine_precision@3
|
408 |
+
value: 0.013519813519813519
|
409 |
+
name: Cosine Precision@3
|
410 |
+
- type: cosine_precision@5
|
411 |
+
value: 0.012531468531468533
|
412 |
+
name: Cosine Precision@5
|
413 |
+
- type: cosine_precision@10
|
414 |
+
value: 0.011734265734265736
|
415 |
+
name: Cosine Precision@10
|
416 |
+
- type: cosine_recall@1
|
417 |
+
value: 0.012167832167832168
|
418 |
+
name: Cosine Recall@1
|
419 |
+
- type: cosine_recall@3
|
420 |
+
value: 0.04055944055944056
|
421 |
+
name: Cosine Recall@3
|
422 |
+
- type: cosine_recall@5
|
423 |
+
value: 0.06265734265734266
|
424 |
+
name: Cosine Recall@5
|
425 |
+
- type: cosine_recall@10
|
426 |
+
value: 0.11734265734265734
|
427 |
+
name: Cosine Recall@10
|
428 |
+
- type: cosine_ndcg@10
|
429 |
+
value: 0.054805553416946595
|
430 |
+
name: Cosine Ndcg@10
|
431 |
+
- type: cosine_mrr@10
|
432 |
+
value: 0.03612859362859355
|
433 |
+
name: Cosine Mrr@10
|
434 |
+
- type: cosine_map@100
|
435 |
+
value: 0.050715277611358314
|
436 |
+
name: Cosine Map@100
|
437 |
+
- task:
|
438 |
+
type: information-retrieval
|
439 |
+
name: Information Retrieval
|
440 |
+
dataset:
|
441 |
+
name: dim 128
|
442 |
+
type: dim_128
|
443 |
+
metrics:
|
444 |
+
- type: cosine_accuracy@1
|
445 |
+
value: 0.01020979020979021
|
446 |
+
name: Cosine Accuracy@1
|
447 |
+
- type: cosine_accuracy@3
|
448 |
+
value: 0.03538461538461538
|
449 |
+
name: Cosine Accuracy@3
|
450 |
+
- type: cosine_accuracy@5
|
451 |
+
value: 0.05118881118881119
|
452 |
+
name: Cosine Accuracy@5
|
453 |
+
- type: cosine_accuracy@10
|
454 |
+
value: 0.09734265734265735
|
455 |
+
name: Cosine Accuracy@10
|
456 |
+
- type: cosine_precision@1
|
457 |
+
value: 0.01020979020979021
|
458 |
+
name: Cosine Precision@1
|
459 |
+
- type: cosine_precision@3
|
460 |
+
value: 0.011794871794871797
|
461 |
+
name: Cosine Precision@3
|
462 |
+
- type: cosine_precision@5
|
463 |
+
value: 0.01023776223776224
|
464 |
+
name: Cosine Precision@5
|
465 |
+
- type: cosine_precision@10
|
466 |
+
value: 0.009734265734265736
|
467 |
+
name: Cosine Precision@10
|
468 |
+
- type: cosine_recall@1
|
469 |
+
value: 0.01020979020979021
|
470 |
+
name: Cosine Recall@1
|
471 |
+
- type: cosine_recall@3
|
472 |
+
value: 0.03538461538461538
|
473 |
+
name: Cosine Recall@3
|
474 |
+
- type: cosine_recall@5
|
475 |
+
value: 0.05118881118881119
|
476 |
+
name: Cosine Recall@5
|
477 |
+
- type: cosine_recall@10
|
478 |
+
value: 0.09734265734265735
|
479 |
+
name: Cosine Recall@10
|
480 |
+
- type: cosine_ndcg@10
|
481 |
+
value: 0.045562900318375184
|
482 |
+
name: Cosine Ndcg@10
|
483 |
+
- type: cosine_mrr@10
|
484 |
+
value: 0.03009612609612603
|
485 |
+
name: Cosine Mrr@10
|
486 |
+
- type: cosine_map@100
|
487 |
+
value: 0.04272564391942989
|
488 |
+
name: Cosine Map@100
|
489 |
+
- task:
|
490 |
+
type: information-retrieval
|
491 |
+
name: Information Retrieval
|
492 |
+
dataset:
|
493 |
+
name: dim 64
|
494 |
+
type: dim_64
|
495 |
+
metrics:
|
496 |
+
- type: cosine_accuracy@1
|
497 |
+
value: 0.005874125874125874
|
498 |
+
name: Cosine Accuracy@1
|
499 |
+
- type: cosine_accuracy@3
|
500 |
+
value: 0.02125874125874126
|
501 |
+
name: Cosine Accuracy@3
|
502 |
+
- type: cosine_accuracy@5
|
503 |
+
value: 0.03370629370629371
|
504 |
+
name: Cosine Accuracy@5
|
505 |
+
- type: cosine_accuracy@10
|
506 |
+
value: 0.06741258741258742
|
507 |
+
name: Cosine Accuracy@10
|
508 |
+
- type: cosine_precision@1
|
509 |
+
value: 0.005874125874125874
|
510 |
+
name: Cosine Precision@1
|
511 |
+
- type: cosine_precision@3
|
512 |
+
value: 0.007086247086247086
|
513 |
+
name: Cosine Precision@3
|
514 |
+
- type: cosine_precision@5
|
515 |
+
value: 0.006741258741258742
|
516 |
+
name: Cosine Precision@5
|
517 |
+
- type: cosine_precision@10
|
518 |
+
value: 0.006741258741258742
|
519 |
+
name: Cosine Precision@10
|
520 |
+
- type: cosine_recall@1
|
521 |
+
value: 0.005874125874125874
|
522 |
+
name: Cosine Recall@1
|
523 |
+
- type: cosine_recall@3
|
524 |
+
value: 0.02125874125874126
|
525 |
+
name: Cosine Recall@3
|
526 |
+
- type: cosine_recall@5
|
527 |
+
value: 0.03370629370629371
|
528 |
+
name: Cosine Recall@5
|
529 |
+
- type: cosine_recall@10
|
530 |
+
value: 0.06741258741258742
|
531 |
+
name: Cosine Recall@10
|
532 |
+
- type: cosine_ndcg@10
|
533 |
+
value: 0.030435876859011154
|
534 |
+
name: Cosine Ndcg@10
|
535 |
+
- type: cosine_mrr@10
|
536 |
+
value: 0.01942596292596293
|
537 |
+
name: Cosine Mrr@10
|
538 |
+
- type: cosine_map@100
|
539 |
+
value: 0.028981824813925826
|
540 |
+
name: Cosine Map@100
|
541 |
+
---
|
542 |
+
|
543 |
+
# SUJET AI bge-base Finance Matryoshka
|
544 |
+
|
545 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
546 |
+
|
547 |
+
## Model Details
|
548 |
+
|
549 |
+
### Model Description
|
550 |
+
- **Model Type:** Sentence Transformer
|
551 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
552 |
+
- **Maximum Sequence Length:** 512 tokens
|
553 |
+
- **Output Dimensionality:** 768 tokens
|
554 |
+
- **Similarity Function:** Cosine Similarity
|
555 |
+
<!-- - **Training Dataset:** Unknown -->
|
556 |
+
- **Language:** en
|
557 |
+
- **License:** apache-2.0
|
558 |
+
|
559 |
+
### Model Sources
|
560 |
+
|
561 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
562 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
563 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
564 |
+
|
565 |
+
### Full Model Architecture
|
566 |
+
|
567 |
+
```
|
568 |
+
SentenceTransformer(
|
569 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
570 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
571 |
+
(2): Normalize()
|
572 |
+
)
|
573 |
+
```
|
574 |
+
|
575 |
+
## Usage
|
576 |
+
|
577 |
+
### Direct Usage (Sentence Transformers)
|
578 |
+
|
579 |
+
First install the Sentence Transformers library:
|
580 |
+
|
581 |
+
```bash
|
582 |
+
pip install -U sentence-transformers
|
583 |
+
```
|
584 |
+
|
585 |
+
Then you can load this model and run inference.
|
586 |
+
```python
|
587 |
+
from sentence_transformers import SentenceTransformer
|
588 |
+
|
589 |
+
# Download from the 🤗 Hub
|
590 |
+
model = SentenceTransformer("Rubyando59/bge-base-financial-matryoshka")
|
591 |
+
# Run inference
|
592 |
+
sentences = [
|
593 |
+
'obligations (whether due to financial difficulties or other reasons), or make adverse changes in the pricing or other \nmaterial terms of our arrangements with them. \nWe have experienced and/or may in the future experience supply shortages, price increases, quality issues, and/\nor longer lead times that could negatively affect our operations, driven by raw material, component availability, \nmanufacturing capacity, labor shortages, industry allocations, logistics capacity, inflation, foreign currency exchange \nrates, tariffs, sanctions and export controls, trade disputes and barriers, forced labor concerns, sustainability sourcing \nrequirements, geopolitical tensions, armed conflicts, natural disasters or pandemics, the effects of climate change \n(such as sea level rise, drought, flooding, heat waves, wildfires and resultant air quality effects and power shutdowns \nassociated with wildfire prevention, and increased storm severity), power loss, and significant changes in the financial \nor business condition of our suppliers. Some of the components we use in our technical infrastructure and our device s \nare available from only one or limited sources, and we may not be able to find replacement vendors on favorable terms \nin the event of a supply chain disruption. A significant supply interruption that affects us or our vendors could delay \ncritical data center upgrades or expansions and delay consumer product availability . \nWe may enter into long-term contracts for materials and products that commit us to significant terms and \nconditions. We may face costs for materials and products that are not consumed due to market demand, technological \nchange, changed consumer preferences, quality, product recalls, and warranty issues. For instance, because certain of \nour hardware supply contracts have volume-based pricing or minimum purchase requirements, if the volume of sales \nof our devices decreases or does not reach projected targets, we could face increased materials and manufacturing \ncosts or other financial liabilities that could make our products more costly per unit to manufacture and harm our \nfinancial condition and operating results. Furthermore, certain of our competitors may negotiate more favorable \ncontractual terms based on volume and other commitments that may provide them with competitive advantages and \nmay affect our supply. \nOur device s have had, and in the future may have, quality issues resulting from design, manufacturing, or \noperations. Sometimes, these issues may be caused by components we purchase from other manufacturers or \nsuppliers. If the quality of our products and services does not meet expectations or our products or services are \ndefective or require a recall, it could harm our reputation, financial condition, and operating results. \nWe require our suppliers and business partners to comply with laws and, where applicable, our company policies \nand practices, such as the Google Supplier Code of Conduct, regarding workplace and employment practices, data \nsecurity, environmental compliance, and intellectual property licensing, but we do not control them or their practices. \nViolations of law or unethical business practices could result in supply chain disruptions, canceled orders, harm to key \nrelationships, and damage to our reputation. Their failure to procure necessary license rights to intellectual property \ncould affect our ability to sell our products or services and expose us to litigation or financial claims. \nInterruption to, interference with, or failure of our complex information technology and communications \nsystems could hurt our ability to effectively provide our products and services, which could harm our \nreputation, financial condition, and operating results. \nThe availability of our products and services and fulfillment of our customer contracts depend on the continuing \noperation of our information technology and communications systems. Our systems are vulnerable to damage, \ninterference, or interruption from modifications or upgrades, terrorist attacks, state-sponsored attacks, natural disasters \nor pandemics, geopolitical tensions or armed conflicts, export controls and sanctions, the effects of climate change \n(such as sea level rise, drought, flooding, heat waves, wildfires and resultant air quality effects and power shutdowns \nassociated with wildfire prevention, and increased storm severity), power loss, utility outages, telecommunications \nfailures, computer viruses, software bugs, ransomware attacks, supply-chain attacks, computer denial of service \nattacks, phishing schemes, or other attempts to harm or access our systems. Some of our data centers are located in \nareas with a high risk of major earthquakes or other natural disasters. Our data centers are also subject to break-ins, \nsabotage, and intentional acts of vandalism, and, in some cases, to potential disruptions resulting from problems \nexperienced by facility operators or disruptions as a result of geopolitical tensions and conflicts happening in the area. \nSome of our systems are not fully redundant, and disaster recovery planning cannot account for all eventualities. The \noccurrence of a natural disaster or pandemic, closure of a facility, or other unanticipated problems affecting our data \ncenters could result in lengthy interruptions in our service.',
|
594 |
+
"What are the implications of increased logistics capacity costs on a company's overall financial performance?",
|
595 |
+
"How might legal proceedings and regulatory scrutiny affect a company's financial condition and operating results?",
|
596 |
+
]
|
597 |
+
embeddings = model.encode(sentences)
|
598 |
+
print(embeddings.shape)
|
599 |
+
# [3, 768]
|
600 |
+
|
601 |
+
# Get the similarity scores for the embeddings
|
602 |
+
similarities = model.similarity(embeddings, embeddings)
|
603 |
+
print(similarities.shape)
|
604 |
+
# [3, 3]
|
605 |
+
```
|
606 |
+
|
607 |
+
<!--
|
608 |
+
### Direct Usage (Transformers)
|
609 |
+
|
610 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
611 |
+
|
612 |
+
</details>
|
613 |
+
-->
|
614 |
+
|
615 |
+
<!--
|
616 |
+
### Downstream Usage (Sentence Transformers)
|
617 |
+
|
618 |
+
You can finetune this model on your own dataset.
|
619 |
+
|
620 |
+
<details><summary>Click to expand</summary>
|
621 |
+
|
622 |
+
</details>
|
623 |
+
-->
|
624 |
+
|
625 |
+
<!--
|
626 |
+
### Out-of-Scope Use
|
627 |
+
|
628 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
629 |
+
-->
|
630 |
+
|
631 |
+
## Evaluation
|
632 |
+
|
633 |
+
### Metrics
|
634 |
+
|
635 |
+
#### Information Retrieval
|
636 |
+
* Dataset: `dim_768`
|
637 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
638 |
+
|
639 |
+
| Metric | Value |
|
640 |
+
|:--------------------|:-----------|
|
641 |
+
| cosine_accuracy@1 | 0.0154 |
|
642 |
+
| cosine_accuracy@3 | 0.0466 |
|
643 |
+
| cosine_accuracy@5 | 0.0699 |
|
644 |
+
| cosine_accuracy@10 | 0.1308 |
|
645 |
+
| cosine_precision@1 | 0.0154 |
|
646 |
+
| cosine_precision@3 | 0.0155 |
|
647 |
+
| cosine_precision@5 | 0.014 |
|
648 |
+
| cosine_precision@10 | 0.0131 |
|
649 |
+
| cosine_recall@1 | 0.0154 |
|
650 |
+
| cosine_recall@3 | 0.0466 |
|
651 |
+
| cosine_recall@5 | 0.0699 |
|
652 |
+
| cosine_recall@10 | 0.1308 |
|
653 |
+
| cosine_ndcg@10 | 0.0621 |
|
654 |
+
| cosine_mrr@10 | 0.0416 |
|
655 |
+
| **cosine_map@100** | **0.0576** |
|
656 |
+
|
657 |
+
#### Information Retrieval
|
658 |
+
* Dataset: `dim_512`
|
659 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
660 |
+
|
661 |
+
| Metric | Value |
|
662 |
+
|:--------------------|:-----------|
|
663 |
+
| cosine_accuracy@1 | 0.015 |
|
664 |
+
| cosine_accuracy@3 | 0.0453 |
|
665 |
+
| cosine_accuracy@5 | 0.0671 |
|
666 |
+
| cosine_accuracy@10 | 0.1276 |
|
667 |
+
| cosine_precision@1 | 0.015 |
|
668 |
+
| cosine_precision@3 | 0.0151 |
|
669 |
+
| cosine_precision@5 | 0.0134 |
|
670 |
+
| cosine_precision@10 | 0.0128 |
|
671 |
+
| cosine_recall@1 | 0.015 |
|
672 |
+
| cosine_recall@3 | 0.0453 |
|
673 |
+
| cosine_recall@5 | 0.0671 |
|
674 |
+
| cosine_recall@10 | 0.1276 |
|
675 |
+
| cosine_ndcg@10 | 0.0604 |
|
676 |
+
| cosine_mrr@10 | 0.0403 |
|
677 |
+
| **cosine_map@100** | **0.0561** |
|
678 |
+
|
679 |
+
#### Information Retrieval
|
680 |
+
* Dataset: `dim_256`
|
681 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
682 |
+
|
683 |
+
| Metric | Value |
|
684 |
+
|:--------------------|:-----------|
|
685 |
+
| cosine_accuracy@1 | 0.0122 |
|
686 |
+
| cosine_accuracy@3 | 0.0406 |
|
687 |
+
| cosine_accuracy@5 | 0.0627 |
|
688 |
+
| cosine_accuracy@10 | 0.1173 |
|
689 |
+
| cosine_precision@1 | 0.0122 |
|
690 |
+
| cosine_precision@3 | 0.0135 |
|
691 |
+
| cosine_precision@5 | 0.0125 |
|
692 |
+
| cosine_precision@10 | 0.0117 |
|
693 |
+
| cosine_recall@1 | 0.0122 |
|
694 |
+
| cosine_recall@3 | 0.0406 |
|
695 |
+
| cosine_recall@5 | 0.0627 |
|
696 |
+
| cosine_recall@10 | 0.1173 |
|
697 |
+
| cosine_ndcg@10 | 0.0548 |
|
698 |
+
| cosine_mrr@10 | 0.0361 |
|
699 |
+
| **cosine_map@100** | **0.0507** |
|
700 |
+
|
701 |
+
#### Information Retrieval
|
702 |
+
* Dataset: `dim_128`
|
703 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
704 |
+
|
705 |
+
| Metric | Value |
|
706 |
+
|:--------------------|:-----------|
|
707 |
+
| cosine_accuracy@1 | 0.0102 |
|
708 |
+
| cosine_accuracy@3 | 0.0354 |
|
709 |
+
| cosine_accuracy@5 | 0.0512 |
|
710 |
+
| cosine_accuracy@10 | 0.0973 |
|
711 |
+
| cosine_precision@1 | 0.0102 |
|
712 |
+
| cosine_precision@3 | 0.0118 |
|
713 |
+
| cosine_precision@5 | 0.0102 |
|
714 |
+
| cosine_precision@10 | 0.0097 |
|
715 |
+
| cosine_recall@1 | 0.0102 |
|
716 |
+
| cosine_recall@3 | 0.0354 |
|
717 |
+
| cosine_recall@5 | 0.0512 |
|
718 |
+
| cosine_recall@10 | 0.0973 |
|
719 |
+
| cosine_ndcg@10 | 0.0456 |
|
720 |
+
| cosine_mrr@10 | 0.0301 |
|
721 |
+
| **cosine_map@100** | **0.0427** |
|
722 |
+
|
723 |
+
#### Information Retrieval
|
724 |
+
* Dataset: `dim_64`
|
725 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
726 |
+
|
727 |
+
| Metric | Value |
|
728 |
+
|:--------------------|:----------|
|
729 |
+
| cosine_accuracy@1 | 0.0059 |
|
730 |
+
| cosine_accuracy@3 | 0.0213 |
|
731 |
+
| cosine_accuracy@5 | 0.0337 |
|
732 |
+
| cosine_accuracy@10 | 0.0674 |
|
733 |
+
| cosine_precision@1 | 0.0059 |
|
734 |
+
| cosine_precision@3 | 0.0071 |
|
735 |
+
| cosine_precision@5 | 0.0067 |
|
736 |
+
| cosine_precision@10 | 0.0067 |
|
737 |
+
| cosine_recall@1 | 0.0059 |
|
738 |
+
| cosine_recall@3 | 0.0213 |
|
739 |
+
| cosine_recall@5 | 0.0337 |
|
740 |
+
| cosine_recall@10 | 0.0674 |
|
741 |
+
| cosine_ndcg@10 | 0.0304 |
|
742 |
+
| cosine_mrr@10 | 0.0194 |
|
743 |
+
| **cosine_map@100** | **0.029** |
|
744 |
+
|
745 |
+
<!--
|
746 |
+
## Bias, Risks and Limitations
|
747 |
+
|
748 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
749 |
+
-->
|
750 |
+
|
751 |
+
<!--
|
752 |
+
### Recommendations
|
753 |
+
|
754 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
755 |
+
-->
|
756 |
+
|
757 |
+
## Training Details
|
758 |
+
|
759 |
+
### Training Hyperparameters
|
760 |
+
#### Non-Default Hyperparameters
|
761 |
+
|
762 |
+
- `eval_strategy`: epoch
|
763 |
+
- `per_device_train_batch_size`: 32
|
764 |
+
- `per_device_eval_batch_size`: 16
|
765 |
+
- `gradient_accumulation_steps`: 16
|
766 |
+
- `learning_rate`: 2e-05
|
767 |
+
- `num_train_epochs`: 10
|
768 |
+
- `lr_scheduler_type`: cosine
|
769 |
+
- `warmup_ratio`: 0.1
|
770 |
+
- `bf16`: True
|
771 |
+
- `tf32`: True
|
772 |
+
- `load_best_model_at_end`: True
|
773 |
+
- `optim`: adamw_torch_fused
|
774 |
+
- `batch_sampler`: no_duplicates
|
775 |
+
|
776 |
+
#### All Hyperparameters
|
777 |
+
<details><summary>Click to expand</summary>
|
778 |
+
|
779 |
+
- `overwrite_output_dir`: False
|
780 |
+
- `do_predict`: False
|
781 |
+
- `eval_strategy`: epoch
|
782 |
+
- `prediction_loss_only`: True
|
783 |
+
- `per_device_train_batch_size`: 32
|
784 |
+
- `per_device_eval_batch_size`: 16
|
785 |
+
- `per_gpu_train_batch_size`: None
|
786 |
+
- `per_gpu_eval_batch_size`: None
|
787 |
+
- `gradient_accumulation_steps`: 16
|
788 |
+
- `eval_accumulation_steps`: None
|
789 |
+
- `learning_rate`: 2e-05
|
790 |
+
- `weight_decay`: 0.0
|
791 |
+
- `adam_beta1`: 0.9
|
792 |
+
- `adam_beta2`: 0.999
|
793 |
+
- `adam_epsilon`: 1e-08
|
794 |
+
- `max_grad_norm`: 1.0
|
795 |
+
- `num_train_epochs`: 10
|
796 |
+
- `max_steps`: -1
|
797 |
+
- `lr_scheduler_type`: cosine
|
798 |
+
- `lr_scheduler_kwargs`: {}
|
799 |
+
- `warmup_ratio`: 0.1
|
800 |
+
- `warmup_steps`: 0
|
801 |
+
- `log_level`: passive
|
802 |
+
- `log_level_replica`: warning
|
803 |
+
- `log_on_each_node`: True
|
804 |
+
- `logging_nan_inf_filter`: True
|
805 |
+
- `save_safetensors`: True
|
806 |
+
- `save_on_each_node`: False
|
807 |
+
- `save_only_model`: False
|
808 |
+
- `restore_callback_states_from_checkpoint`: False
|
809 |
+
- `no_cuda`: False
|
810 |
+
- `use_cpu`: False
|
811 |
+
- `use_mps_device`: False
|
812 |
+
- `seed`: 42
|
813 |
+
- `data_seed`: None
|
814 |
+
- `jit_mode_eval`: False
|
815 |
+
- `use_ipex`: False
|
816 |
+
- `bf16`: True
|
817 |
+
- `fp16`: False
|
818 |
+
- `fp16_opt_level`: O1
|
819 |
+
- `half_precision_backend`: auto
|
820 |
+
- `bf16_full_eval`: False
|
821 |
+
- `fp16_full_eval`: False
|
822 |
+
- `tf32`: True
|
823 |
+
- `local_rank`: 0
|
824 |
+
- `ddp_backend`: None
|
825 |
+
- `tpu_num_cores`: None
|
826 |
+
- `tpu_metrics_debug`: False
|
827 |
+
- `debug`: []
|
828 |
+
- `dataloader_drop_last`: False
|
829 |
+
- `dataloader_num_workers`: 0
|
830 |
+
- `dataloader_prefetch_factor`: None
|
831 |
+
- `past_index`: -1
|
832 |
+
- `disable_tqdm`: False
|
833 |
+
- `remove_unused_columns`: True
|
834 |
+
- `label_names`: None
|
835 |
+
- `load_best_model_at_end`: True
|
836 |
+
- `ignore_data_skip`: False
|
837 |
+
- `fsdp`: []
|
838 |
+
- `fsdp_min_num_params`: 0
|
839 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
840 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
841 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
842 |
+
- `deepspeed`: None
|
843 |
+
- `label_smoothing_factor`: 0.0
|
844 |
+
- `optim`: adamw_torch_fused
|
845 |
+
- `optim_args`: None
|
846 |
+
- `adafactor`: False
|
847 |
+
- `group_by_length`: False
|
848 |
+
- `length_column_name`: length
|
849 |
+
- `ddp_find_unused_parameters`: None
|
850 |
+
- `ddp_bucket_cap_mb`: None
|
851 |
+
- `ddp_broadcast_buffers`: False
|
852 |
+
- `dataloader_pin_memory`: True
|
853 |
+
- `dataloader_persistent_workers`: False
|
854 |
+
- `skip_memory_metrics`: True
|
855 |
+
- `use_legacy_prediction_loop`: False
|
856 |
+
- `push_to_hub`: False
|
857 |
+
- `resume_from_checkpoint`: None
|
858 |
+
- `hub_model_id`: None
|
859 |
+
- `hub_strategy`: every_save
|
860 |
+
- `hub_private_repo`: False
|
861 |
+
- `hub_always_push`: False
|
862 |
+
- `gradient_checkpointing`: False
|
863 |
+
- `gradient_checkpointing_kwargs`: None
|
864 |
+
- `include_inputs_for_metrics`: False
|
865 |
+
- `eval_do_concat_batches`: True
|
866 |
+
- `fp16_backend`: auto
|
867 |
+
- `push_to_hub_model_id`: None
|
868 |
+
- `push_to_hub_organization`: None
|
869 |
+
- `mp_parameters`:
|
870 |
+
- `auto_find_batch_size`: False
|
871 |
+
- `full_determinism`: False
|
872 |
+
- `torchdynamo`: None
|
873 |
+
- `ray_scope`: last
|
874 |
+
- `ddp_timeout`: 1800
|
875 |
+
- `torch_compile`: False
|
876 |
+
- `torch_compile_backend`: None
|
877 |
+
- `torch_compile_mode`: None
|
878 |
+
- `dispatch_batches`: None
|
879 |
+
- `split_batches`: None
|
880 |
+
- `include_tokens_per_second`: False
|
881 |
+
- `include_num_input_tokens_seen`: False
|
882 |
+
- `neftune_noise_alpha`: None
|
883 |
+
- `optim_target_modules`: None
|
884 |
+
- `batch_eval_metrics`: False
|
885 |
+
- `eval_on_start`: False
|
886 |
+
- `batch_sampler`: no_duplicates
|
887 |
+
- `multi_dataset_batch_sampler`: proportional
|
888 |
+
|
889 |
+
</details>
|
890 |
+
|
891 |
+
### Training Logs
|
892 |
+
<details><summary>Click to expand</summary>
|
893 |
+
|
894 |
+
| Epoch | Step | Training Loss | dim_128_cosine_map@100 | dim_256_cosine_map@100 | dim_512_cosine_map@100 | dim_64_cosine_map@100 | dim_768_cosine_map@100 |
|
895 |
+
|:----------:|:-------:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:----------------------:|
|
896 |
+
| 0.0516 | 10 | 6.6963 | - | - | - | - | - |
|
897 |
+
| 0.1033 | 20 | 7.634 | - | - | - | - | - |
|
898 |
+
| 0.1549 | 30 | 6.8573 | - | - | - | - | - |
|
899 |
+
| 0.2065 | 40 | 8.1731 | - | - | - | - | - |
|
900 |
+
| 0.2581 | 50 | 7.2853 | - | - | - | - | - |
|
901 |
+
| 0.3098 | 60 | 7.6009 | - | - | - | - | - |
|
902 |
+
| 0.3614 | 70 | 9.0776 | - | - | - | - | - |
|
903 |
+
| 0.4130 | 80 | 7.8738 | - | - | - | - | - |
|
904 |
+
| 0.4647 | 90 | 10.46 | - | - | - | - | - |
|
905 |
+
| 0.5163 | 100 | 10.7396 | - | - | - | - | - |
|
906 |
+
| 0.5679 | 110 | 10.3513 | - | - | - | - | - |
|
907 |
+
| 0.6196 | 120 | 10.654 | - | - | - | - | - |
|
908 |
+
| 0.6712 | 130 | 12.6157 | - | - | - | - | - |
|
909 |
+
| 0.7228 | 140 | 11.955 | - | - | - | - | - |
|
910 |
+
| 0.7744 | 150 | 13.2498 | - | - | - | - | - |
|
911 |
+
| 0.8261 | 160 | 11.2981 | - | - | - | - | - |
|
912 |
+
| 0.8777 | 170 | 13.8403 | - | - | - | - | - |
|
913 |
+
| 0.9293 | 180 | 9.4428 | - | - | - | - | - |
|
914 |
+
| 0.9810 | 190 | 8.1768 | - | - | - | - | - |
|
915 |
+
| **1.0016** | **194** | **-** | **0.0427** | **0.0507** | **0.0561** | **0.029** | **0.0576** |
|
916 |
+
| 1.0303 | 200 | 7.0981 | - | - | - | - | - |
|
917 |
+
| 1.0820 | 210 | 7.3113 | - | - | - | - | - |
|
918 |
+
| 1.1336 | 220 | 7.0259 | - | - | - | - | - |
|
919 |
+
| 1.1852 | 230 | 7.5874 | - | - | - | - | - |
|
920 |
+
| 1.2369 | 240 | 7.65 | - | - | - | - | - |
|
921 |
+
| 1.2885 | 250 | 7.2387 | - | - | - | - | - |
|
922 |
+
| 1.3401 | 260 | 9.001 | - | - | - | - | - |
|
923 |
+
| 1.3917 | 270 | 7.5975 | - | - | - | - | - |
|
924 |
+
| 1.4434 | 280 | 9.9568 | - | - | - | - | - |
|
925 |
+
| 1.4950 | 290 | 10.4123 | - | - | - | - | - |
|
926 |
+
| 1.5466 | 300 | 10.5535 | - | - | - | - | - |
|
927 |
+
| 1.5983 | 310 | 9.8199 | - | - | - | - | - |
|
928 |
+
| 1.6499 | 320 | 12.7258 | - | - | - | - | - |
|
929 |
+
| 1.7015 | 330 | 11.9423 | - | - | - | - | - |
|
930 |
+
| 1.7531 | 340 | 12.7364 | - | - | - | - | - |
|
931 |
+
| 1.8048 | 350 | 12.1926 | - | - | - | - | - |
|
932 |
+
| 1.8564 | 360 | 12.926 | - | - | - | - | - |
|
933 |
+
| 1.9080 | 370 | 11.8007 | - | - | - | - | - |
|
934 |
+
| 1.9597 | 380 | 8.7379 | - | - | - | - | - |
|
935 |
+
| 2.0010 | 388 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
936 |
+
| 2.0090 | 390 | 7.1936 | - | - | - | - | - |
|
937 |
+
| 2.0607 | 400 | 6.7359 | - | - | - | - | - |
|
938 |
+
| 2.1123 | 410 | 7.4212 | - | - | - | - | - |
|
939 |
+
| 2.1639 | 420 | 7.346 | - | - | - | - | - |
|
940 |
+
| 2.2156 | 430 | 7.6784 | - | - | - | - | - |
|
941 |
+
| 2.2672 | 440 | 7.5079 | - | - | - | - | - |
|
942 |
+
| 2.3188 | 450 | 7.8875 | - | - | - | - | - |
|
943 |
+
| 2.3704 | 460 | 8.7154 | - | - | - | - | - |
|
944 |
+
| 2.4221 | 470 | 8.1278 | - | - | - | - | - |
|
945 |
+
| 2.4737 | 480 | 11.1214 | - | - | - | - | - |
|
946 |
+
| 2.5253 | 490 | 10.5293 | - | - | - | - | - |
|
947 |
+
| 2.5770 | 500 | 9.9882 | - | - | - | - | - |
|
948 |
+
| 2.6286 | 510 | 11.5283 | - | - | - | - | - |
|
949 |
+
| 2.6802 | 520 | 12.4337 | - | - | - | - | - |
|
950 |
+
| 2.7318 | 530 | 11.641 | - | - | - | - | - |
|
951 |
+
| 2.7835 | 540 | 13.3482 | - | - | - | - | - |
|
952 |
+
| 2.8351 | 550 | 11.7302 | - | - | - | - | - |
|
953 |
+
| 2.8867 | 560 | 13.7171 | - | - | - | - | - |
|
954 |
+
| 2.9384 | 570 | 8.9323 | - | - | - | - | - |
|
955 |
+
| 2.9900 | 580 | 7.4869 | - | - | - | - | - |
|
956 |
+
| 3.0003 | 582 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
957 |
+
| 3.0394 | 590 | 6.9978 | - | - | - | - | - |
|
958 |
+
| 3.0910 | 600 | 7.33 | - | - | - | - | - |
|
959 |
+
| 3.1426 | 610 | 7.1879 | - | - | - | - | - |
|
960 |
+
| 3.1943 | 620 | 7.9204 | - | - | - | - | - |
|
961 |
+
| 3.2459 | 630 | 7.4435 | - | - | - | - | - |
|
962 |
+
| 3.2975 | 640 | 7.4079 | - | - | - | - | - |
|
963 |
+
| 3.3491 | 650 | 9.2445 | - | - | - | - | - |
|
964 |
+
| 3.4008 | 660 | 7.1794 | - | - | - | - | - |
|
965 |
+
| 3.4524 | 670 | 10.4496 | - | - | - | - | - |
|
966 |
+
| 3.5040 | 680 | 10.7556 | - | - | - | - | - |
|
967 |
+
| 3.5557 | 690 | 10.3543 | - | - | - | - | - |
|
968 |
+
| 3.6073 | 700 | 9.9478 | - | - | - | - | - |
|
969 |
+
| 3.6589 | 710 | 12.6559 | - | - | - | - | - |
|
970 |
+
| 3.7106 | 720 | 12.2463 | - | - | - | - | - |
|
971 |
+
| 3.7622 | 730 | 12.8381 | - | - | - | - | - |
|
972 |
+
| 3.8138 | 740 | 11.726 | - | - | - | - | - |
|
973 |
+
| 3.8654 | 750 | 13.4883 | - | - | - | - | - |
|
974 |
+
| 3.9171 | 760 | 10.7751 | - | - | - | - | - |
|
975 |
+
| 3.9687 | 770 | 8.5484 | - | - | - | - | - |
|
976 |
+
| 3.9997 | 776 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
977 |
+
| 4.0181 | 780 | 7.1582 | - | - | - | - | - |
|
978 |
+
| 4.0697 | 790 | 7.0161 | - | - | - | - | - |
|
979 |
+
| 4.1213 | 800 | 7.11 | - | - | - | - | - |
|
980 |
+
| 4.1730 | 810 | 7.4557 | - | - | - | - | - |
|
981 |
+
| 4.2246 | 820 | 7.723 | - | - | - | - | - |
|
982 |
+
| 4.2762 | 830 | 7.2889 | - | - | - | - | - |
|
983 |
+
| 4.3278 | 840 | 8.3884 | - | - | - | - | - |
|
984 |
+
| 4.3795 | 850 | 8.1581 | - | - | - | - | - |
|
985 |
+
| 4.4311 | 860 | 9.1386 | - | - | - | - | - |
|
986 |
+
| 4.4827 | 870 | 10.706 | - | - | - | - | - |
|
987 |
+
| 4.5344 | 880 | 10.4258 | - | - | - | - | - |
|
988 |
+
| 4.5860 | 890 | 9.9659 | - | - | - | - | - |
|
989 |
+
| 4.6376 | 900 | 11.8535 | - | - | - | - | - |
|
990 |
+
| 4.6893 | 910 | 12.5578 | - | - | - | - | - |
|
991 |
+
| 4.7409 | 920 | 11.834 | - | - | - | - | - |
|
992 |
+
| 4.7925 | 930 | 12.5328 | - | - | - | - | - |
|
993 |
+
| 4.8441 | 940 | 12.6998 | - | - | - | - | - |
|
994 |
+
| 4.8958 | 950 | 12.9728 | - | - | - | - | - |
|
995 |
+
| 4.9474 | 960 | 8.9204 | - | - | - | - | - |
|
996 |
+
| 4.9990 | 970 | 7.3909 | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
997 |
+
| 5.0484 | 980 | 6.6683 | - | - | - | - | - |
|
998 |
+
| 5.1000 | 990 | 7.5538 | - | - | - | - | - |
|
999 |
+
| 5.1517 | 1000 | 6.9256 | - | - | - | - | - |
|
1000 |
+
| 5.2033 | 1010 | 8.0908 | - | - | - | - | - |
|
1001 |
+
| 5.2549 | 1020 | 7.254 | - | - | - | - | - |
|
1002 |
+
| 5.3066 | 1030 | 7.6558 | - | - | - | - | - |
|
1003 |
+
| 5.3582 | 1040 | 9.2184 | - | - | - | - | - |
|
1004 |
+
| 5.4098 | 1050 | 7.5886 | - | - | - | - | - |
|
1005 |
+
| 5.4614 | 1060 | 10.4976 | - | - | - | - | - |
|
1006 |
+
| 5.5131 | 1070 | 10.785 | - | - | - | - | - |
|
1007 |
+
| 5.5647 | 1080 | 10.2376 | - | - | - | - | - |
|
1008 |
+
| 5.6163 | 1090 | 10.4871 | - | - | - | - | - |
|
1009 |
+
| 5.6680 | 1100 | 12.6986 | - | - | - | - | - |
|
1010 |
+
| 5.7196 | 1110 | 12.0688 | - | - | - | - | - |
|
1011 |
+
| 5.7712 | 1120 | 13.1161 | - | - | - | - | - |
|
1012 |
+
| 5.8228 | 1130 | 11.3866 | - | - | - | - | - |
|
1013 |
+
| 5.8745 | 1140 | 13.7281 | - | - | - | - | - |
|
1014 |
+
| 5.9261 | 1150 | 9.8432 | - | - | - | - | - |
|
1015 |
+
| 5.9777 | 1160 | 8.2606 | - | - | - | - | - |
|
1016 |
+
| 5.9984 | 1164 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
1017 |
+
| 6.0271 | 1170 | 7.0799 | - | - | - | - | - |
|
1018 |
+
| 6.0787 | 1180 | 7.2981 | - | - | - | - | - |
|
1019 |
+
| 6.1304 | 1190 | 7.0085 | - | - | - | - | - |
|
1020 |
+
| 6.1820 | 1200 | 7.4587 | - | - | - | - | - |
|
1021 |
+
| 6.2336 | 1210 | 7.8467 | - | - | - | - | - |
|
1022 |
+
| 6.2853 | 1220 | 7.2008 | - | - | - | - | - |
|
1023 |
+
| 6.3369 | 1230 | 8.8152 | - | - | - | - | - |
|
1024 |
+
| 6.3885 | 1240 | 7.7205 | - | - | - | - | - |
|
1025 |
+
| 6.4401 | 1250 | 9.9131 | - | - | - | - | - |
|
1026 |
+
| 6.4918 | 1260 | 10.212 | - | - | - | - | - |
|
1027 |
+
| 6.5434 | 1270 | 10.6791 | - | - | - | - | - |
|
1028 |
+
| 6.5950 | 1280 | 9.8454 | - | - | - | - | - |
|
1029 |
+
| 6.6467 | 1290 | 12.4647 | - | - | - | - | - |
|
1030 |
+
| 6.6983 | 1300 | 11.8962 | - | - | - | - | - |
|
1031 |
+
| 6.7499 | 1310 | 12.8014 | - | - | - | - | - |
|
1032 |
+
| 6.8015 | 1320 | 12.1836 | - | - | - | - | - |
|
1033 |
+
| 6.8532 | 1330 | 12.9114 | - | - | - | - | - |
|
1034 |
+
| 6.9048 | 1340 | 12.1711 | - | - | - | - | - |
|
1035 |
+
| 6.9564 | 1350 | 8.8125 | - | - | - | - | - |
|
1036 |
+
| 6.9977 | 1358 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
1037 |
+
| 7.0058 | 1360 | 7.2281 | - | - | - | - | - |
|
1038 |
+
| 7.0574 | 1370 | 6.6681 | - | - | - | - | - |
|
1039 |
+
| 7.1091 | 1380 | 7.5282 | - | - | - | - | - |
|
1040 |
+
| 7.1607 | 1390 | 7.1585 | - | - | - | - | - |
|
1041 |
+
| 7.2123 | 1400 | 7.8507 | - | - | - | - | - |
|
1042 |
+
| 7.2640 | 1410 | 7.4737 | - | - | - | - | - |
|
1043 |
+
| 7.3156 | 1420 | 7.6963 | - | - | - | - | - |
|
1044 |
+
| 7.3672 | 1430 | 8.8799 | - | - | - | - | - |
|
1045 |
+
| 7.4188 | 1440 | 7.9977 | - | - | - | - | - |
|
1046 |
+
| 7.4705 | 1450 | 10.9078 | - | - | - | - | - |
|
1047 |
+
| 7.5221 | 1460 | 10.5731 | - | - | - | - | - |
|
1048 |
+
| 7.5737 | 1470 | 10.1121 | - | - | - | - | - |
|
1049 |
+
| 7.6254 | 1480 | 11.2426 | - | - | - | - | - |
|
1050 |
+
| 7.6770 | 1490 | 12.4832 | - | - | - | - | - |
|
1051 |
+
| 7.7286 | 1500 | 11.6954 | - | - | - | - | - |
|
1052 |
+
| 7.7803 | 1510 | 13.4836 | - | - | - | - | - |
|
1053 |
+
| 7.8319 | 1520 | 11.4752 | - | - | - | - | - |
|
1054 |
+
| 7.8835 | 1530 | 13.8097 | - | - | - | - | - |
|
1055 |
+
| 7.9351 | 1540 | 9.0087 | - | - | - | - | - |
|
1056 |
+
| 7.9868 | 1550 | 7.709 | - | - | - | - | - |
|
1057 |
+
| 8.0023 | 1553 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
1058 |
+
| 8.0361 | 1560 | 7.1515 | - | - | - | - | - |
|
1059 |
+
| 8.0878 | 1570 | 7.2816 | - | - | - | - | - |
|
1060 |
+
| 8.1394 | 1580 | 7.1392 | - | - | - | - | - |
|
1061 |
+
| 8.1910 | 1590 | 7.7863 | - | - | - | - | - |
|
1062 |
+
| 8.2427 | 1600 | 7.4939 | - | - | - | - | - |
|
1063 |
+
| 8.2943 | 1610 | 7.3074 | - | - | - | - | - |
|
1064 |
+
| 8.3459 | 1620 | 9.1739 | - | - | - | - | - |
|
1065 |
+
| 8.3975 | 1630 | 7.3667 | - | - | - | - | - |
|
1066 |
+
| 8.4492 | 1640 | 10.2528 | - | - | - | - | - |
|
1067 |
+
| 8.5008 | 1650 | 10.6824 | - | - | - | - | - |
|
1068 |
+
| 8.5524 | 1660 | 10.3765 | - | - | - | - | - |
|
1069 |
+
| 8.6041 | 1670 | 9.853 | - | - | - | - | - |
|
1070 |
+
| 8.6557 | 1680 | 12.8624 | - | - | - | - | - |
|
1071 |
+
| 8.7073 | 1690 | 12.0849 | - | - | - | - | - |
|
1072 |
+
| 8.7590 | 1700 | 12.7345 | - | - | - | - | - |
|
1073 |
+
| 8.8106 | 1710 | 11.9884 | - | - | - | - | - |
|
1074 |
+
| 8.8622 | 1720 | 13.2117 | - | - | - | - | - |
|
1075 |
+
| 8.9138 | 1730 | 11.1261 | - | - | - | - | - |
|
1076 |
+
| 8.9655 | 1740 | 8.5941 | - | - | - | - | - |
|
1077 |
+
| 9.0016 | 1747 | - | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
1078 |
+
| 9.0148 | 1750 | 7.2587 | - | - | - | - | - |
|
1079 |
+
| 9.0665 | 1760 | 6.8577 | - | - | - | - | - |
|
1080 |
+
| 9.1181 | 1770 | 7.2256 | - | - | - | - | - |
|
1081 |
+
| 9.1697 | 1780 | 7.456 | - | - | - | - | - |
|
1082 |
+
| 9.2214 | 1790 | 7.6563 | - | - | - | - | - |
|
1083 |
+
| 9.2730 | 1800 | 7.3877 | - | - | - | - | - |
|
1084 |
+
| 9.3246 | 1810 | 8.2009 | - | - | - | - | - |
|
1085 |
+
| 9.3763 | 1820 | 8.5318 | - | - | - | - | - |
|
1086 |
+
| 9.4279 | 1830 | 8.5052 | - | - | - | - | - |
|
1087 |
+
| 9.4795 | 1840 | 10.9953 | - | - | - | - | - |
|
1088 |
+
| 9.5311 | 1850 | 10.4012 | - | - | - | - | - |
|
1089 |
+
| 9.5828 | 1860 | 10.0235 | - | - | - | - | - |
|
1090 |
+
| 9.6344 | 1870 | 11.9031 | - | - | - | - | - |
|
1091 |
+
| 9.6860 | 1880 | 12.5293 | - | - | - | - | - |
|
1092 |
+
| 9.7377 | 1890 | 11.5157 | - | - | - | - | - |
|
1093 |
+
| 9.7893 | 1900 | 12.8049 | - | - | - | - | - |
|
1094 |
+
| 9.8409 | 1910 | 12.4659 | - | - | - | - | - |
|
1095 |
+
| 9.8925 | 1920 | 13.1517 | - | - | - | - | - |
|
1096 |
+
| 9.9442 | 1930 | 9.0604 | 0.0427 | 0.0507 | 0.0561 | 0.0290 | 0.0576 |
|
1097 |
+
|
1098 |
+
* The bold row denotes the saved checkpoint.
|
1099 |
+
</details>
|
1100 |
+
|
1101 |
+
### Framework Versions
|
1102 |
+
- Python: 3.10.13
|
1103 |
+
- Sentence Transformers: 3.0.1
|
1104 |
+
- Transformers: 4.42.3
|
1105 |
+
- PyTorch: 2.5.0.dev20240704+cu124
|
1106 |
+
- Accelerate: 0.32.1
|
1107 |
+
- Datasets: 2.20.0
|
1108 |
+
- Tokenizers: 0.19.1
|
1109 |
+
|
1110 |
+
## Citation
|
1111 |
+
|
1112 |
+
### BibTeX
|
1113 |
+
|
1114 |
+
#### Sentence Transformers
|
1115 |
+
```bibtex
|
1116 |
+
@inproceedings{reimers-2019-sentence-bert,
|
1117 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
1118 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
1119 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
1120 |
+
month = "11",
|
1121 |
+
year = "2019",
|
1122 |
+
publisher = "Association for Computational Linguistics",
|
1123 |
+
url = "https://arxiv.org/abs/1908.10084",
|
1124 |
+
}
|
1125 |
+
```
|
1126 |
+
|
1127 |
+
#### MatryoshkaLoss
|
1128 |
+
```bibtex
|
1129 |
+
@misc{kusupati2024matryoshka,
|
1130 |
+
title={Matryoshka Representation Learning},
|
1131 |
+
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
1132 |
+
year={2024},
|
1133 |
+
eprint={2205.13147},
|
1134 |
+
archivePrefix={arXiv},
|
1135 |
+
primaryClass={cs.LG}
|
1136 |
+
}
|
1137 |
+
```
|
1138 |
+
|
1139 |
+
#### MultipleNegativesRankingLoss
|
1140 |
+
```bibtex
|
1141 |
+
@misc{henderson2017efficient,
|
1142 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
1143 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
1144 |
+
year={2017},
|
1145 |
+
eprint={1705.00652},
|
1146 |
+
archivePrefix={arXiv},
|
1147 |
+
primaryClass={cs.CL}
|
1148 |
+
}
|
1149 |
+
```
|
1150 |
+
|
1151 |
+
<!--
|
1152 |
+
## Glossary
|
1153 |
+
|
1154 |
+
*Clearly define terms in order to be accessible across audiences.*
|
1155 |
+
-->
|
1156 |
+
|
1157 |
+
<!--
|
1158 |
+
## Model Card Authors
|
1159 |
+
|
1160 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
1161 |
+
-->
|
1162 |
+
|
1163 |
+
<!--
|
1164 |
+
## Model Card Contact
|
1165 |
+
|
1166 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
1167 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./bge-base-financial-matryoshka/checkpoint-194",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 3072,
|
17 |
+
"label2id": {
|
18 |
+
"LABEL_0": 0
|
19 |
+
},
|
20 |
+
"layer_norm_eps": 1e-12,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"model_type": "bert",
|
23 |
+
"num_attention_heads": 12,
|
24 |
+
"num_hidden_layers": 12,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"position_embedding_type": "absolute",
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.42.3",
|
29 |
+
"type_vocab_size": 2,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 30522
|
32 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.3",
|
5 |
+
"pytorch": "2.5.0.dev20240704+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3fd10a24bf86833d54a50a03e1b818d1cc3e53792d1fade4318858dcb31fcf4
|
3 |
+
size 437951328
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|