Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +557 -0
- config.json +47 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:305
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: nomic-ai/modernbert-embed-base
|
10 |
+
widget:
|
11 |
+
- source_sentence: According to the text, when is an approach permitted?
|
12 |
+
sentences:
|
13 |
+
- "25.1.2\tAdjudicate on the Rules of the game;\n25.1.3\tImpose any sanction necessary\
|
14 |
+
\ to control the match;\n25.1.4\tAward Tries and record the progressive score;\n\
|
15 |
+
25.1.5\tMaintain a count of Touches during each Possession;\n25.1.6\tAward Penalties\
|
16 |
+
\ for Infringements against the Rules; and\n25.1.7\tReport to the relevant competition\
|
17 |
+
\ administration any Sin Bins, \nDismissals or injuries to any participant sustained\
|
18 |
+
\ during a Match.25.2\tOnly Team captains are permitted to seek clarification\
|
19 |
+
\ of a decision directly \nfrom the Referee.An approach may only be made during\
|
20 |
+
\ a break in play or at \nthe discretion of the Referee."
|
21 |
+
- "11.2\tA player in Possession may not intentionally kick, pass, flick, knock,\
|
22 |
+
\ throw, \nhand-off or otherwise propel the ball in a Forward direction over an\
|
23 |
+
\ opposition \nplayer and regain Possession.Ruling = A Penalty will be awarded\
|
24 |
+
\ to the Defending Team at the Mark where the ball was \npropelled Forward.12 \
|
25 |
+
\ Ball Touched in Flight \n \n12.1\tIf a player from the Defending Team deliberately\
|
26 |
+
\ makes contact with the ball in \nflight and the ball goes to ground, the Attacking\
|
27 |
+
\ Team retains the ball and the \nTouch Count restarts as zero (0) Touch.12.2\t\
|
28 |
+
If a player from the Defending Team deliberately makes contact with the ball \n\
|
29 |
+
in flight and the ball is retrieved by an attacking player, without touching the\
|
30 |
+
\ \nground, play continues and the next Touch is zero (0) Touch."
|
31 |
+
- "Ruling = A Penalty to the Attacking Team at the point of the Infringement or\
|
32 |
+
\ on the seven (7) \nmetre line directly Forward of the Infringement.15.4\tWhen\
|
33 |
+
\ a Rollball occurs within Defending Team’s Seven Metre Zone or a Penalty \nTap\
|
34 |
+
\ within ten (10) metres of the Defending Team’s Try Line, all players from the\
|
35 |
+
\ \nDefending Team must have both feet on or behind their Try Line and no other\
|
36 |
+
\ \npart of the body in contact with the ground Forward of their Try Line.Ruling\
|
37 |
+
\ = A Penalty to the Attacking Team at the seven (7) metre line directly Forward\
|
38 |
+
\ of the \npoint of the Infringement.15.5\tAfter effecting the Touch, the defending\
|
39 |
+
\ player must retire the required seven \n(7) metres or to the Defending Try Line\
|
40 |
+
\ as indicated by the Referee without \ninterfering with the Attacking Team.Ruling\
|
41 |
+
\ = A Penalty to the Attacking Team ten (10) metres Forward of the Infringement\
|
42 |
+
\ or if \non the Defensive Try Line, on the seven (7) metre line."
|
43 |
+
- source_sentence: If a player intentionally propels the ball forward over an opponent
|
44 |
+
and regains possession, what is the ruling?
|
45 |
+
sentences:
|
46 |
+
- "24.5\tFor the avoidance of doubt for clauses 24.3 and 24.4 the non-offending\
|
47 |
+
\ Team \nwill retain a numerical advantage on the Field of Play during the Drop-Off.25 \
|
48 |
+
\ Match Officials \n25.1\tThe Referee is the sole judge on all match related\
|
49 |
+
\ matters inside the Perimeter \nfor the Duration of a match, has jurisdiction\
|
50 |
+
\ over all players, coaches and \nofficials and is required to:\n25.1.1\tInspect\
|
51 |
+
\ the Field of Play, Line Markings and Markers prior to the \ncommencement of\
|
52 |
+
\ the Match to ensure the safety of all participants.25.1.2\tAdjudicate on the\
|
53 |
+
\ Rules of the game;\n25.1.3\tImpose any sanction necessary to control the match;\n\
|
54 |
+
25.1.4\tAward Tries and record the progressive score;\n25.1.5\tMaintain a count\
|
55 |
+
\ of Touches during each Possession;\n25.1.6\tAward Penalties for Infringements\
|
56 |
+
\ against the Rules; and\n25.1.7\tReport to the relevant competition administration\
|
57 |
+
\ any Sin Bins, \nDismissals or injuries to any participant sustained during a\
|
58 |
+
\ Match."
|
59 |
+
- "See Appendix 1.Forced Interchange\nWhen a player is required to undertake a compulsory\
|
60 |
+
\ Interchange for \nan Infringement ruled more serious than a Penalty but less\
|
61 |
+
\ serious \nthan a Permanent Interchange, Sin Bin or Dismissal.Forward\nA position\
|
62 |
+
\ or direction towards the Dead Ball Line beyond the Team’s \nAttacking Try Line.Full\
|
63 |
+
\ Time\nThe expiration of the second period of time allowed for play.Half\nThe\
|
64 |
+
\ player who takes Possession following a Rollball.Half Time\nThe break in play\
|
65 |
+
\ between the two halves of a match.Imminent\nAbout to occur, it is almost certain\
|
66 |
+
\ to occur.Infringement\nThe action of a player contrary to the Rules of the game.In-Goal\
|
67 |
+
\ Area\nThe area in the Field of Play bounded by the Sidelines, the Try Lines\
|
68 |
+
\ \nand the Dead Ball Lines.There are two (2), one (1) at each end of the \nField\
|
69 |
+
\ of Play.See Appendix 1.Interchange\nThe act of an on-field player leaving the\
|
70 |
+
\ Field of Play to be replaced \nby an off-field player entering the Field of\
|
71 |
+
\ Play."
|
72 |
+
- "Ruling = A Change of Possession occurs at the point of the Touch, or if In-Goal\
|
73 |
+
\ the nearest \npoint on the seven (7) metre line.10.8\tIf a Touch is made in\
|
74 |
+
\ the In-Goal Area before the ball is grounded, the player in \nPossession is\
|
75 |
+
\ to perform a Rollball seven (7) metres from the Team’s Attacking \nTry Line,\
|
76 |
+
\ provided it is not the sixth Touch and the player is not Half.10.9\tIf a player\
|
77 |
+
\ in Possession is Touched while on or behind their Defending Try Line, \nthe\
|
78 |
+
\ Touch counts and once the Referee sets the Mark seven (7) metres directly \n\
|
79 |
+
Forward of the contact point from the Defending Team’s Try Line, a Rollball is\
|
80 |
+
\ \nperformed.10.10\tIf a player in Possession intentionally makes a Touch on\
|
81 |
+
\ an Offside defender \nwho is making every effort to retire and remain out of\
|
82 |
+
\ play, the Touch counts."
|
83 |
+
- source_sentence: How large is the Ruck/Rollball Area?
|
84 |
+
sentences:
|
85 |
+
- "13.5\tA player may only perform a Rollball at the Mark under the following \n\
|
86 |
+
circumstances:\n13.5.1\twhen a Touch has been made; or\n13.5.2\twhen Possession\
|
87 |
+
\ changes following the sixth Touch; or\n13.5.3\twhen Possession changes due to\
|
88 |
+
\ the ball being dropped or passed and \ngoes to the ground; or\n13.5.4\twhen\
|
89 |
+
\ Possession changes due to an Infringement by an attacking player \nat a Penalty,\
|
90 |
+
\ a Tap or a Rollball; or\nFIT Playing Rules - 5th Edition\nCOPYRIGHT © Touch\
|
91 |
+
\ Football Australia 2020\n11\n13.5.5\twhen Possession changes after the Half\
|
92 |
+
\ is Touched or when the Half \nplaces the ball on or over the Try Line; or\n\
|
93 |
+
13.5.6\tin replacement of a Penalty Tap; or\n13.5.7\twhen so directed by the Referee.13.6\t\
|
94 |
+
A player is to perform a Rollball seven (7) metres in-field under the following\
|
95 |
+
\ \ncircumstances:\n13.6.1\twhen a Change of Possession takes place due to a player\
|
96 |
+
\ in Possession \nmaking contact with the Sideline or any ground outside the Field\
|
97 |
+
\ of Play, \nprior to a Touch being made; or\n13.6.2\twhen the ball not in Possession\
|
98 |
+
\ of a player makes contact with the \nSideline or any ground outside the Field\
|
99 |
+
\ of Play."
|
100 |
+
- "13.5\tA player may only perform a Rollball at the Mark under the following \n\
|
101 |
+
circumstances:\n13.5.1\twhen a Touch has been made; or\n13.5.2\twhen Possession\
|
102 |
+
\ changes following the sixth Touch; or\n13.5.3\twhen Possession changes due to\
|
103 |
+
\ the ball being dropped or passed and \ngoes to the ground; or\n13.5.4\twhen\
|
104 |
+
\ Possession changes due to an Infringement by an attacking player \nat a Penalty,\
|
105 |
+
\ a Tap or a Rollball; or\nFIT Playing Rules - 5th Edition\nCOPYRIGHT © Touch\
|
106 |
+
\ Football Australia 2020\n11\n13.5.5\twhen Possession changes after the Half\
|
107 |
+
\ is Touched or when the Half \nplaces the ball on or over the Try Line; or\n\
|
108 |
+
13.5.6\tin replacement of a Penalty Tap; or\n13.5.7\twhen so directed by the Referee.13.6\t\
|
109 |
+
A player is to perform a Rollball seven (7) metres in-field under the following\
|
110 |
+
\ \ncircumstances:\n13.6.1\twhen a Change of Possession takes place due to a player\
|
111 |
+
\ in Possession \nmaking contact with the Sideline or any ground outside the Field\
|
112 |
+
\ of Play, \nprior to a Touch being made; or\n13.6.2\twhen the ball not in Possession\
|
113 |
+
\ of a player makes contact with the \nSideline or any ground outside the Field\
|
114 |
+
\ of Play."
|
115 |
+
- "See Appendix 1.Forced Interchange\nWhen a player is required to undertake a compulsory\
|
116 |
+
\ Interchange for \nan Infringement ruled more serious than a Penalty but less\
|
117 |
+
\ serious \nthan a Permanent Interchange, Sin Bin or Dismissal.Forward\nA position\
|
118 |
+
\ or direction towards the Dead Ball Line beyond the Team’s \nAttacking Try Line.Full\
|
119 |
+
\ Time\nThe expiration of the second period of time allowed for play.Half\nThe\
|
120 |
+
\ player who takes Possession following a Rollball.Half Time\nThe break in play\
|
121 |
+
\ between the two halves of a match.Imminent\nAbout to occur, it is almost certain\
|
122 |
+
\ to occur.Infringement\nThe action of a player contrary to the Rules of the game.In-Goal\
|
123 |
+
\ Area\nThe area in the Field of Play bounded by the Sidelines, the Try Lines\
|
124 |
+
\ \nand the Dead Ball Lines.There are two (2), one (1) at each end of the \nField\
|
125 |
+
\ of Play.See Appendix 1.Interchange\nThe act of an on-field player leaving the\
|
126 |
+
\ Field of Play to be replaced \nby an off-field player entering the Field of\
|
127 |
+
\ Play."
|
128 |
+
- source_sentence: When may players interchange following a try in Touch Rugby?
|
129 |
+
sentences:
|
130 |
+
- "5.2\tA Team must have a minimum of four (4) players on the field for a match\
|
131 |
+
\ to \ncommence or continue, except during a Drop-Off.5.3\tWhere the number of\
|
132 |
+
\ players on the field from one Team falls below four (4), \nthe match is to be\
|
133 |
+
\ abandoned and the non-offending Team is to be declared the \nWinner.5.3.1\t\
|
134 |
+
This does not apply for players sent to the Sin Bin Area.5.4\tIn mixed gender\
|
135 |
+
\ competitions, the maximum number of males allowed on the \nField of Play is\
|
136 |
+
\ three (3), the minimum male requirement is one (1) and the \nminimum female\
|
137 |
+
\ requirement is one (1).6 Team Coach and Team Officials \n6.1\tThe Team coach(s)\
|
138 |
+
\ and Team officials may be permitted inside the Perimeter \nbut shall be required\
|
139 |
+
\ to be positioned either in the Interchange Area or at the \nend of the Field\
|
140 |
+
\ of Play for the duration of the match.6.2\tThe Team coach(s) and Team officials\
|
141 |
+
\ may move from one position to the other \nbut shall do so without delay."
|
142 |
+
- "17.8\tFollowing a Try, players may Interchange at will, without having to wait\
|
143 |
+
\ for the \nplayer to enter the Interchange Area, but must do so prior to the\
|
144 |
+
\ Tap being taken \nto recommence play.18 Penalty \n18.1\tThe Tap must be performed\
|
145 |
+
\ in accordance with the Definitions.Ruling = The Referee will instruct the offending\
|
146 |
+
\ Team to return to the Mark and perform the \nTap again.18.2\tFor Infringements\
|
147 |
+
\ that occur between seven (7) metre lines, the Mark for the \nPenalty Tap is\
|
148 |
+
\ at the point of Infringement unless otherwise indicated by the \nReferee.18.3\t\
|
149 |
+
For Infringements that occur within the Seven Metre Zone the Tap must be \ntaken\
|
150 |
+
\ at the nearest seven (7) metre line."
|
151 |
+
- "4.4\tHats or caps are permitted to be worn during a match provided they are safe\
|
152 |
+
\ \nand meet any NTA regulations.4.5\tSafe footwear must be worn with exceptions\
|
153 |
+
\ allowed for game variants such as \nBeach Touch.4.6\tLight leather or synthetic\
|
154 |
+
\ boots with soft moulded soles are permitted.4.6.1\tShoes with screw-in studs\
|
155 |
+
\ are not to be worn by any player or Referee.4.7\tPlayers are not to participate\
|
156 |
+
\ in any match wearing any item of jewellery, \nchain, identification band/bracelet\
|
157 |
+
\ or similar item that may prove dangerous.Any jewellery or other items that cannot\
|
158 |
+
\ be removed are to be taped to the \nsatisfaction of the Referee.4.8\tLong (extend\
|
159 |
+
\ beyond the finger flesh when viewed from the palm) or sharp \nfingernails are\
|
160 |
+
\ not allowed.4.9\tReferees and players may wear spectacles or sunglasses provided\
|
161 |
+
\ they are safe \nand securely attached."
|
162 |
+
- source_sentence: Is there a time-out for injuries during a standard Touch Rugby
|
163 |
+
match?
|
164 |
+
sentences:
|
165 |
+
- "Line \nMarkings are to be laid out as shown in Appendix 1 - The Field of Play.Sidelines\
|
166 |
+
\ \nextend seven (7) metres beyond the Try Lines to join the Dead Ball Lines and\
|
167 |
+
\ \ndefine the In-Goal Areas which measure fifty (50) metres wide by seven (7)\
|
168 |
+
\ \nmetres in length.1.3\tThe Interchange Areas are located no closer than one\
|
169 |
+
\ (1) metre from each \nSideline.1.4\tSuitably sized markers, cones or corner\
|
170 |
+
\ posts of a distinguishing colour and \nmade from safe and pliable material should\
|
171 |
+
\ be positioned at the intersections of \nthe Sideline and Halfway line and the\
|
172 |
+
\ Sideline and the Try Line.1.4.1\tMarkers, cones or corner posts placed on the\
|
173 |
+
\ junction of the Sideline and \nTry Line are deemed to be in the Field of Play.1.4.2\t\
|
174 |
+
All other markers or cones are deemed to be out of the Field of Play.1.5\tThe\
|
175 |
+
\ standard playing surface is grass.Other surfaces including synthetic grass \n\
|
176 |
+
may be used but shall be subject to NTA approved standards."
|
177 |
+
- "10.10\tIf a player in Possession intentionally makes a Touch on an Offside defender\
|
178 |
+
\ \nwho is making every effort to retire and remain out of play, the Touch counts.FIT\
|
179 |
+
\ Playing Rules - 5th Edition\nCOPYRIGHT © Touch Football Australia 2020\n9\n\
|
180 |
+
10.11\tIf a Touch is made on a player in Possession while the player is juggling\
|
181 |
+
\ the ball \nin an attempt to maintain control of it, the Touch counts if the\
|
182 |
+
\ attacking player \nfollowing the Touch retains Possession.10.12\tIf a player\
|
183 |
+
\ in Possession is Touched and subsequently makes contact with \neither the Sideline,\
|
184 |
+
\ a field marker or the ground outside the Field of Play, the \nTouch counts and\
|
185 |
+
\ play continues with a Rollball at the Mark where the Touch \noccurred.10.13\t\
|
186 |
+
When a player from the Defending Team enters its defensive Seven Metre Zone, \n\
|
187 |
+
the Defending Team must move Forward at a reasonable pace until a Touch is \n\
|
188 |
+
Imminent or made.Ruling = A Penalty to the Attacking Team at the point of the\
|
189 |
+
\ Infringement."
|
190 |
+
- "4.9\tReferees and players may wear spectacles or sunglasses provided they are\
|
191 |
+
\ safe \nand securely attached.4.10\tReferees and players may wear sport monitoring\
|
192 |
+
\ equipment and medical \nsupports such as knee or ankle braces provided, at the\
|
193 |
+
\ sole discretion of \ncompetition’s controlling body, the items are not dangerous.5 \
|
194 |
+
\ Team Composition \n5.1\tA Team consists of a maximum of 14 players, no more\
|
195 |
+
\ than six (6) of whom are \nallowed on the field at any time.FIT Playing Rules\
|
196 |
+
\ - 5th Edition\n6\nCOPYRIGHT © Touch Football Australia 2020\nRuling = A Penalty\
|
197 |
+
\ awarded to the non-offending Team at the time the offence is identified \nseven\
|
198 |
+
\ (7) metres infield on the Halfway Line or the position of the ball, whichever\
|
199 |
+
\ is the \ngreater Advantage.5.2\tA Team must have a minimum of four (4) players\
|
200 |
+
\ on the field for a match to \ncommence or continue, except during a Drop-Off."
|
201 |
+
datasets:
|
202 |
+
- Trelis/touch-rugby-modernbert-pairs
|
203 |
+
pipeline_tag: sentence-similarity
|
204 |
+
library_name: sentence-transformers
|
205 |
+
---
|
206 |
+
|
207 |
+
# SentenceTransformer based on nomic-ai/modernbert-embed-base
|
208 |
+
|
209 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) on the [touch-rugby-modernbert-pairs](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
210 |
+
|
211 |
+
## Model Details
|
212 |
+
|
213 |
+
### Model Description
|
214 |
+
- **Model Type:** Sentence Transformer
|
215 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision 92168cbee600b1abbfc10842aba988aa69572291 -->
|
216 |
+
- **Maximum Sequence Length:** 8192 tokens
|
217 |
+
- **Output Dimensionality:** 768 dimensions
|
218 |
+
- **Similarity Function:** Cosine Similarity
|
219 |
+
- **Training Dataset:**
|
220 |
+
- [touch-rugby-modernbert-pairs](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs)
|
221 |
+
<!-- - **Language:** Unknown -->
|
222 |
+
<!-- - **License:** Unknown -->
|
223 |
+
|
224 |
+
### Model Sources
|
225 |
+
|
226 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
227 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
228 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
229 |
+
|
230 |
+
### Full Model Architecture
|
231 |
+
|
232 |
+
```
|
233 |
+
SentenceTransformer(
|
234 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
|
235 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
236 |
+
(2): Normalize()
|
237 |
+
)
|
238 |
+
```
|
239 |
+
|
240 |
+
## Usage
|
241 |
+
|
242 |
+
### Direct Usage (Sentence Transformers)
|
243 |
+
|
244 |
+
First install the Sentence Transformers library:
|
245 |
+
|
246 |
+
```bash
|
247 |
+
pip install -U sentence-transformers
|
248 |
+
```
|
249 |
+
|
250 |
+
Then you can load this model and run inference.
|
251 |
+
```python
|
252 |
+
from sentence_transformers import SentenceTransformer
|
253 |
+
|
254 |
+
# Download from the 🤗 Hub
|
255 |
+
model = SentenceTransformer("Trelis/modernbert-embed-base-touch-rugby-ft")
|
256 |
+
# Run inference
|
257 |
+
sentences = [
|
258 |
+
'Is there a time-out for injuries during a standard Touch Rugby match?',
|
259 |
+
'4.9\tReferees and players may wear spectacles or sunglasses provided they are safe \nand securely attached.4.10\tReferees and players may wear sport monitoring equipment and medical \nsupports such as knee or ankle braces provided, at the sole discretion of \ncompetition’s controlling body, the items are not dangerous.5\u2002 Team Composition \n5.1\tA Team consists of a maximum of 14 players, no more than six (6) of whom are \nallowed on the field at any time.FIT Playing Rules - 5th Edition\n6\nCOPYRIGHT © Touch Football Australia 2020\nRuling = A Penalty awarded to the non-offending Team at the time the offence is identified \nseven (7) metres infield on the Halfway Line or the position of the ball, whichever is the \ngreater Advantage.5.2\tA Team must have a minimum of four (4) players on the field for a match to \ncommence or continue, except during a Drop-Off.',
|
260 |
+
'10.10\tIf a player in Possession intentionally makes a Touch on an Offside defender \nwho is making every effort to retire and remain out of play, the Touch counts.FIT Playing Rules - 5th Edition\nCOPYRIGHT © Touch Football Australia 2020\n9\n10.11\tIf a Touch is made on a player in Possession while the player is juggling the ball \nin an attempt to maintain control of it, the Touch counts if the attacking player \nfollowing the Touch retains Possession.10.12\tIf a player in Possession is Touched and subsequently makes contact with \neither the Sideline, a field marker or the ground outside the Field of Play, the \nTouch counts and play continues with a Rollball at the Mark where the Touch \noccurred.10.13\tWhen a player from the Defending Team enters its defensive Seven Metre Zone, \nthe Defending Team must move Forward at a reasonable pace until a Touch is \nImminent or made.Ruling = A Penalty to the Attacking Team at the point of the Infringement.',
|
261 |
+
]
|
262 |
+
embeddings = model.encode(sentences)
|
263 |
+
print(embeddings.shape)
|
264 |
+
# [3, 768]
|
265 |
+
|
266 |
+
# Get the similarity scores for the embeddings
|
267 |
+
similarities = model.similarity(embeddings, embeddings)
|
268 |
+
print(similarities.shape)
|
269 |
+
# [3, 3]
|
270 |
+
```
|
271 |
+
|
272 |
+
<!--
|
273 |
+
### Direct Usage (Transformers)
|
274 |
+
|
275 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
276 |
+
|
277 |
+
</details>
|
278 |
+
-->
|
279 |
+
|
280 |
+
<!--
|
281 |
+
### Downstream Usage (Sentence Transformers)
|
282 |
+
|
283 |
+
You can finetune this model on your own dataset.
|
284 |
+
|
285 |
+
<details><summary>Click to expand</summary>
|
286 |
+
|
287 |
+
</details>
|
288 |
+
-->
|
289 |
+
|
290 |
+
<!--
|
291 |
+
### Out-of-Scope Use
|
292 |
+
|
293 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
294 |
+
-->
|
295 |
+
|
296 |
+
<!--
|
297 |
+
## Bias, Risks and Limitations
|
298 |
+
|
299 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
300 |
+
-->
|
301 |
+
|
302 |
+
<!--
|
303 |
+
### Recommendations
|
304 |
+
|
305 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
306 |
+
-->
|
307 |
+
|
308 |
+
## Training Details
|
309 |
+
|
310 |
+
### Training Dataset
|
311 |
+
|
312 |
+
#### touch-rugby-modernbert-pairs
|
313 |
+
|
314 |
+
* Dataset: [touch-rugby-modernbert-pairs](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs) at [7cb0ae2](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs/tree/7cb0ae2222504ad98d6ca368b68a657ba2b33e22)
|
315 |
+
* Size: 305 training samples
|
316 |
+
* Columns: <code>question</code> and <code>related_chunk</code>
|
317 |
+
* Approximate statistics based on the first 305 samples:
|
318 |
+
| | question | related_chunk |
|
319 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
320 |
+
| type | string | string |
|
321 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 18.73 tokens</li><li>max: 36 tokens</li></ul> | <ul><li>min: 147 tokens</li><li>mean: 231.2 tokens</li><li>max: 319 tokens</li></ul> |
|
322 |
+
* Samples:
|
323 |
+
| question | related_chunk |
|
324 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
325 |
+
| <code>What is defined as 'Forward' in the context of Touch Rugby?</code> | <code>Referee<br>The match official(s) appointed to make Rulings during the conduct <br>of a match.Rollball<br>The act of bringing the ball into play following a Touch or a Change <br>of Possession.Ruck/Rollball Area<br>The area, not exceeding one (1) metre in distance, between the <br>player performing a Rollball and the Half.Ruling<br>The decision made by a Referee as a result of particular <br>circumstance and may result in a Play On, a Tap Penalty, a discipline <br>option, Change of Possession or a Try.Seven Metre Zone<br>The area between the seven (7) metre line and the Try Line.See <br>Appendix 1.Sidelines<br>The side boundaries of the Field of Play.See Appendix 1.Sin Bin<br>A player sent to the Sin-Bin Area for a period of four (4) completed <br>Possessions.The player is counted as a player on the Field of Play <br>and cannot be replaced or Interchanged.</code> |
|
326 |
+
| <code>What is the numerical difference between the teams on the field of play during a Drop-Off if a player has been sent to the sin bin?</code> | <code>24.5 For the avoidance of doubt for clauses 24.3 and 24.4 the non-offending Team <br>will retain a numerical advantage on the Field of Play during the Drop-Off.25 Match Officials <br>25.1 The Referee is the sole judge on all match related matters inside the Perimeter <br>for the Duration of a match, has jurisdiction over all players, coaches and <br>officials and is required to:<br>25.1.1 Inspect the Field of Play, Line Markings and Markers prior to the <br>commencement of the Match to ensure the safety of all participants.25.1.2 Adjudicate on the Rules of the game;<br>25.1.3 Impose any sanction necessary to control the match;<br>25.1.4 Award Tries and record the progressive score;<br>25.1.5 Maintain a count of Touches during each Possession;<br>25.1.6 Award Penalties for Infringements against the Rules; and<br>25.1.7 Report to the relevant competition administration any Sin Bins, <br>Dismissals or injuries to any participant sustained during a Match.</code> |
|
327 |
+
| <code>What happens if neither team is leading after two minutes of play in a Drop-Off?</code> | <code>24.1.5 Should neither Team be leading at the expiration of two (2) minutes, a <br>signal is given and the match will pause at the next Touch or Dead Ball.Each Team will then remove another player from the Field of Play.24.1.6 The Match will recommence immediately after the players have left the <br>field at the same place where it paused (i.e.the Team retains Possession <br>at the designated number of Touches, or at Change of Possession due to <br>some Infringement or the sixth Touch) and the Match will continue until a <br>Try is scored.24.1.7 There is no time off during the Drop-Off and the clock does not stop at <br>the two (2) minute interval.24.1.8 Substitution during the Drop-Off is permitted in accordance with normal <br>Interchange Rules.24.2 Mixed gender Teams may have no more than (2) males on the field during the <br>Drop-Off.</code> |
|
328 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
329 |
+
```json
|
330 |
+
{
|
331 |
+
"scale": 20.0,
|
332 |
+
"similarity_fct": "cos_sim"
|
333 |
+
}
|
334 |
+
```
|
335 |
+
|
336 |
+
### Evaluation Dataset
|
337 |
+
|
338 |
+
#### touch-rugby-modernbert-pairs
|
339 |
+
|
340 |
+
* Dataset: [touch-rugby-modernbert-pairs](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs) at [7cb0ae2](https://huggingface.co/datasets/Trelis/touch-rugby-modernbert-pairs/tree/7cb0ae2222504ad98d6ca368b68a657ba2b33e22)
|
341 |
+
* Size: 305 evaluation samples
|
342 |
+
* Columns: <code>question</code> and <code>related_chunk</code>
|
343 |
+
* Approximate statistics based on the first 305 samples:
|
344 |
+
| | question | related_chunk |
|
345 |
+
|:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
346 |
+
| type | string | string |
|
347 |
+
| details | <ul><li>min: 13 tokens</li><li>mean: 17.61 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 147 tokens</li><li>mean: 230.39 tokens</li><li>max: 319 tokens</li></ul> |
|
348 |
+
* Samples:
|
349 |
+
| question | related_chunk |
|
350 |
+
|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
351 |
+
| <code>What is the penalty for an attacking player obstructing a defender in Touch Rugby?</code> | <code>Ruling = A Penalty to the Attacking Team at the point of the Infringement or on the seven (7) <br>metre line directly Forward of the Infringement.15.4 When a Rollball occurs within Defending Team’s Seven Metre Zone or a Penalty <br>Tap within ten (10) metres of the Defending Team’s Try Line, all players from the <br>Defending Team must have both feet on or behind their Try Line and no other <br>part of the body in contact with the ground Forward of their Try Line.Ruling = A Penalty to the Attacking Team at the seven (7) metre line directly Forward of the <br>point of the Infringement.15.5 After effecting the Touch, the defending player must retire the required seven <br>(7) metres or to the Defending Try Line as indicated by the Referee without <br>interfering with the Attacking Team.Ruling = A Penalty to the Attacking Team ten (10) metres Forward of the Infringement or if <br>on the Defensive Try Line, on the seven (7) metre line.</code> |
|
352 |
+
| <code>When must a player perform a Rollball seven metres in-field?</code> | <code>13.5 A player may only perform a Rollball at the Mark under the following <br>circumstances:<br>13.5.1 when a Touch has been made; or<br>13.5.2 when Possession changes following the sixth Touch; or<br>13.5.3 when Possession changes due to the ball being dropped or passed and <br>goes to the ground; or<br>13.5.4 when Possession changes due to an Infringement by an attacking player <br>at a Penalty, a Tap or a Rollball; or<br>FIT Playing Rules - 5th Edition<br>COPYRIGHT © Touch Football Australia 2020<br>11<br>13.5.5 when Possession changes after the Half is Touched or when the Half <br>places the ball on or over the Try Line; or<br>13.5.6 in replacement of a Penalty Tap; or<br>13.5.7 when so directed by the Referee.13.6 A player is to perform a Rollball seven (7) metres in-field under the following <br>circumstances:<br>13.6.1 when a Change of Possession takes place due to a player in Possession <br>making contact with the Sideline or any ground outside the Field of Play, <br>prior to a Touch being made; or<br>13.6.2 when the ball not in Poss...</code> |
|
353 |
+
| <code>What is the ruling if a player uses excessive force when making a touch?</code> | <code>FIT Playing Rules - 5th Edition<br>8<br>COPYRIGHT © Touch Football Australia 2020<br>9.6 If a player mishandles the ball and even if in an effort to gain control, the ball <br>is accidentally knocked Forward into any other Player, a Change of Possession <br>results.10 The Touch <br>10.1 A Touch may be made by either a defending player or a player in Possession.10.2 A defending player may not claim a Touch if contact has not been made.If a <br>player claims a Touch has been made, but the Referee is unsure the Touch will <br>count.Ruling = A Penalty to the Attacking Team at the point of the Infringement and the offending <br>player sent to the Sin Bin.10.3 Players of both Defending and Attacking Teams are to use the minimum force <br>necessary to make a Touch.Players must ensure that the method employed in <br>making a Touch does not pose an unnecessary risk to player safety.Ruling = A Penalty to the non-offending Team at the point of the Infringement.</code> |
|
354 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
355 |
+
```json
|
356 |
+
{
|
357 |
+
"scale": 20.0,
|
358 |
+
"similarity_fct": "cos_sim"
|
359 |
+
}
|
360 |
+
```
|
361 |
+
|
362 |
+
### Training Hyperparameters
|
363 |
+
#### Non-Default Hyperparameters
|
364 |
+
|
365 |
+
- `eval_strategy`: steps
|
366 |
+
- `per_device_train_batch_size`: 32
|
367 |
+
- `per_device_eval_batch_size`: 32
|
368 |
+
- `learning_rate`: 2e-05
|
369 |
+
- `num_train_epochs`: 1
|
370 |
+
- `lr_scheduler_type`: cosine
|
371 |
+
- `warmup_ratio`: 0.3
|
372 |
+
|
373 |
+
#### All Hyperparameters
|
374 |
+
<details><summary>Click to expand</summary>
|
375 |
+
|
376 |
+
- `overwrite_output_dir`: False
|
377 |
+
- `do_predict`: False
|
378 |
+
- `eval_strategy`: steps
|
379 |
+
- `prediction_loss_only`: True
|
380 |
+
- `per_device_train_batch_size`: 32
|
381 |
+
- `per_device_eval_batch_size`: 32
|
382 |
+
- `per_gpu_train_batch_size`: None
|
383 |
+
- `per_gpu_eval_batch_size`: None
|
384 |
+
- `gradient_accumulation_steps`: 1
|
385 |
+
- `eval_accumulation_steps`: None
|
386 |
+
- `torch_empty_cache_steps`: None
|
387 |
+
- `learning_rate`: 2e-05
|
388 |
+
- `weight_decay`: 0.0
|
389 |
+
- `adam_beta1`: 0.9
|
390 |
+
- `adam_beta2`: 0.999
|
391 |
+
- `adam_epsilon`: 1e-08
|
392 |
+
- `max_grad_norm`: 1.0
|
393 |
+
- `num_train_epochs`: 1
|
394 |
+
- `max_steps`: -1
|
395 |
+
- `lr_scheduler_type`: cosine
|
396 |
+
- `lr_scheduler_kwargs`: {}
|
397 |
+
- `warmup_ratio`: 0.3
|
398 |
+
- `warmup_steps`: 0
|
399 |
+
- `log_level`: passive
|
400 |
+
- `log_level_replica`: warning
|
401 |
+
- `log_on_each_node`: True
|
402 |
+
- `logging_nan_inf_filter`: True
|
403 |
+
- `save_safetensors`: True
|
404 |
+
- `save_on_each_node`: False
|
405 |
+
- `save_only_model`: False
|
406 |
+
- `restore_callback_states_from_checkpoint`: False
|
407 |
+
- `no_cuda`: False
|
408 |
+
- `use_cpu`: False
|
409 |
+
- `use_mps_device`: False
|
410 |
+
- `seed`: 42
|
411 |
+
- `data_seed`: None
|
412 |
+
- `jit_mode_eval`: False
|
413 |
+
- `use_ipex`: False
|
414 |
+
- `bf16`: False
|
415 |
+
- `fp16`: False
|
416 |
+
- `fp16_opt_level`: O1
|
417 |
+
- `half_precision_backend`: auto
|
418 |
+
- `bf16_full_eval`: False
|
419 |
+
- `fp16_full_eval`: False
|
420 |
+
- `tf32`: None
|
421 |
+
- `local_rank`: 0
|
422 |
+
- `ddp_backend`: None
|
423 |
+
- `tpu_num_cores`: None
|
424 |
+
- `tpu_metrics_debug`: False
|
425 |
+
- `debug`: []
|
426 |
+
- `dataloader_drop_last`: False
|
427 |
+
- `dataloader_num_workers`: 0
|
428 |
+
- `dataloader_prefetch_factor`: None
|
429 |
+
- `past_index`: -1
|
430 |
+
- `disable_tqdm`: False
|
431 |
+
- `remove_unused_columns`: True
|
432 |
+
- `label_names`: None
|
433 |
+
- `load_best_model_at_end`: False
|
434 |
+
- `ignore_data_skip`: False
|
435 |
+
- `fsdp`: []
|
436 |
+
- `fsdp_min_num_params`: 0
|
437 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
438 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
439 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
440 |
+
- `deepspeed`: None
|
441 |
+
- `label_smoothing_factor`: 0.0
|
442 |
+
- `optim`: adamw_torch
|
443 |
+
- `optim_args`: None
|
444 |
+
- `adafactor`: False
|
445 |
+
- `group_by_length`: False
|
446 |
+
- `length_column_name`: length
|
447 |
+
- `ddp_find_unused_parameters`: None
|
448 |
+
- `ddp_bucket_cap_mb`: None
|
449 |
+
- `ddp_broadcast_buffers`: False
|
450 |
+
- `dataloader_pin_memory`: True
|
451 |
+
- `dataloader_persistent_workers`: False
|
452 |
+
- `skip_memory_metrics`: True
|
453 |
+
- `use_legacy_prediction_loop`: False
|
454 |
+
- `push_to_hub`: False
|
455 |
+
- `resume_from_checkpoint`: None
|
456 |
+
- `hub_model_id`: None
|
457 |
+
- `hub_strategy`: every_save
|
458 |
+
- `hub_private_repo`: None
|
459 |
+
- `hub_always_push`: False
|
460 |
+
- `gradient_checkpointing`: False
|
461 |
+
- `gradient_checkpointing_kwargs`: None
|
462 |
+
- `include_inputs_for_metrics`: False
|
463 |
+
- `include_for_metrics`: []
|
464 |
+
- `eval_do_concat_batches`: True
|
465 |
+
- `fp16_backend`: auto
|
466 |
+
- `push_to_hub_model_id`: None
|
467 |
+
- `push_to_hub_organization`: None
|
468 |
+
- `mp_parameters`:
|
469 |
+
- `auto_find_batch_size`: False
|
470 |
+
- `full_determinism`: False
|
471 |
+
- `torchdynamo`: None
|
472 |
+
- `ray_scope`: last
|
473 |
+
- `ddp_timeout`: 1800
|
474 |
+
- `torch_compile`: False
|
475 |
+
- `torch_compile_backend`: None
|
476 |
+
- `torch_compile_mode`: None
|
477 |
+
- `dispatch_batches`: None
|
478 |
+
- `split_batches`: None
|
479 |
+
- `include_tokens_per_second`: False
|
480 |
+
- `include_num_input_tokens_seen`: False
|
481 |
+
- `neftune_noise_alpha`: None
|
482 |
+
- `optim_target_modules`: None
|
483 |
+
- `batch_eval_metrics`: False
|
484 |
+
- `eval_on_start`: False
|
485 |
+
- `use_liger_kernel`: False
|
486 |
+
- `eval_use_gather_object`: False
|
487 |
+
- `average_tokens_across_devices`: False
|
488 |
+
- `prompts`: None
|
489 |
+
- `batch_sampler`: batch_sampler
|
490 |
+
- `multi_dataset_batch_sampler`: proportional
|
491 |
+
|
492 |
+
</details>
|
493 |
+
|
494 |
+
### Training Logs
|
495 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
496 |
+
|:------:|:----:|:-------------:|:---------------:|
|
497 |
+
| 0.2222 | 2 | 2.7671 | nan |
|
498 |
+
| 0.4444 | 4 | 0.0 | nan |
|
499 |
+
| 0.6667 | 6 | 0.0 | nan |
|
500 |
+
| 0.8889 | 8 | 0.0 | nan |
|
501 |
+
|
502 |
+
|
503 |
+
### Framework Versions
|
504 |
+
- Python: 3.12.4
|
505 |
+
- Sentence Transformers: 3.3.1
|
506 |
+
- Transformers: 4.48.0
|
507 |
+
- PyTorch: 2.5.1
|
508 |
+
- Accelerate: 1.3.0
|
509 |
+
- Datasets: 2.17.1
|
510 |
+
- Tokenizers: 0.21.0
|
511 |
+
|
512 |
+
## Citation
|
513 |
+
|
514 |
+
### BibTeX
|
515 |
+
|
516 |
+
#### Sentence Transformers
|
517 |
+
```bibtex
|
518 |
+
@inproceedings{reimers-2019-sentence-bert,
|
519 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
520 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
521 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
522 |
+
month = "11",
|
523 |
+
year = "2019",
|
524 |
+
publisher = "Association for Computational Linguistics",
|
525 |
+
url = "https://arxiv.org/abs/1908.10084",
|
526 |
+
}
|
527 |
+
```
|
528 |
+
|
529 |
+
#### MultipleNegativesRankingLoss
|
530 |
+
```bibtex
|
531 |
+
@misc{henderson2017efficient,
|
532 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
533 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
534 |
+
year={2017},
|
535 |
+
eprint={1705.00652},
|
536 |
+
archivePrefix={arXiv},
|
537 |
+
primaryClass={cs.CL}
|
538 |
+
}
|
539 |
+
```
|
540 |
+
|
541 |
+
<!--
|
542 |
+
## Glossary
|
543 |
+
|
544 |
+
*Clearly define terms in order to be accessible across audiences.*
|
545 |
+
-->
|
546 |
+
|
547 |
+
<!--
|
548 |
+
## Model Card Authors
|
549 |
+
|
550 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
551 |
+
-->
|
552 |
+
|
553 |
+
<!--
|
554 |
+
## Model Card Contact
|
555 |
+
|
556 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
557 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nomic-ai/modernbert-embed-base",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 50281,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "mean",
|
13 |
+
"cls_token_id": 50281,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 50282,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 1152,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 22,
|
37 |
+
"pad_token_id": 50283,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"reference_compile": false,
|
40 |
+
"repad_logits_with_grad": false,
|
41 |
+
"sep_token_id": 50282,
|
42 |
+
"sparse_pred_ignore_index": -100,
|
43 |
+
"sparse_prediction": false,
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.48.0",
|
46 |
+
"vocab_size": 50368
|
47 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.48.0",
|
5 |
+
"pytorch": "2.5.1"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c17a60cc07bc4248e3ffd4d05a9ea38401c38601df4e40ae334d15d280c9350
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 8192,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|