deepapaikar
commited on
Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +509 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,509 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: thenlper/gte-small
|
3 |
+
library_name: sentence-transformers
|
4 |
+
pipeline_tag: sentence-similarity
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:4319
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
widget:
|
13 |
+
- source_sentence: whine stare jelli comfort fairmount former poni guttur innoc latitud
|
14 |
+
ceas firm spoil impress base sentiment aeroplan globe usurp monogram keen frau
|
15 |
+
opposit reimburs express ever craze oil bade directric save notic helmet proper
|
16 |
+
schmierkäs pale engrav chateau pair sleep cautious audibl squar disinclin keeper
|
17 |
+
gosh ravag brigandish stammer death colleg suit treason year storekeep rib wake
|
18 |
+
gaunt appear perforc afterward intim noisi goodi fear illumin marvel mantel volum
|
19 |
+
health belief soften conclus rode social sip tip mina wing suppli tommi slight
|
20 |
+
pomatum bombard simpli foreign mustard leg semi twist bend helen flush rough wound
|
21 |
+
rifl unqualifi shill soon enorm gloomi depress scratch soil car broken slam count
|
22 |
+
grace millineri pour swirl depriv smell strode direct wholli method modest shift
|
23 |
+
immigr finger leon differ worker fetch advis hord affair ardmor rivet white four
|
24 |
+
occup burst ridg exclam knuckl interest dead dentist wreckag drizzl kraus famous
|
25 |
+
chariti vite permiss jabez written twice pile loyalti frenchman shine wave meat
|
26 |
+
rang civilian deserv forev slip halt see charact feed foremost uniform chime within
|
27 |
+
suspect mount bash often lath outlin aim straw distract past erupt seren whoever
|
28 |
+
whenev fall doughboy remain scoot lawn task spectat altar bavarian spirit either
|
29 |
+
sold individu hillsid easili wick dream brain guess floor start decor incid dramat
|
30 |
+
condit guest accus monteith rumbl camp wherev truer blanket brown puzzl distanc
|
31 |
+
sight addit head withdrew everybodi ahead morn sought lost besid fli snatch local
|
32 |
+
mutil contribut pat cover behalf contest whistl futur hope patriot weari law area
|
33 |
+
brancardi block nose push girl conceiv offer fisherman stroll appeal bail stone
|
34 |
+
suppos caught amaz subsid bundl tack poorest roadsid princ miss clamor held retir
|
35 |
+
felt signific annex 1918 wild come whether demur mutter hesit panopli hetti soundless
|
36 |
+
felin patri bombast dyke concoct busili result impati covert stall mademoisell
|
37 |
+
danger moon grave abrupt dim goggl light overturn brought imaginari statement
|
38 |
+
commiss unbroken found
|
39 |
+
sentences:
|
40 |
+
- Is the content related to fiction genere
|
41 |
+
- Is the content related to non-fiction genere
|
42 |
+
- Is the content related to fiction genere
|
43 |
+
- source_sentence: condemn elaps reunion sword swept brow file kept hors high love
|
44 |
+
laughter phase heavier roof screen process fire skill deck moan remov moqui turn
|
45 |
+
welfar suspici buri countersign accept shrewd life cloud enjoy stiff rave ripe
|
46 |
+
insist shut pressur rule barb contact horizont handcart nutshel circumst oblig
|
47 |
+
attract summer brew sens gas raptur lest glimps depart ought rattler scene boat
|
48 |
+
tone price good famili valu wooden machin wheelwright dismiss instrument soul
|
49 |
+
self cannon scotch reconsid cling pall unfold temporarili influenc esteem astir
|
50 |
+
first tide stock lamp decid hush sign none choic note particl fizzl call entrench
|
51 |
+
steel retail horribl retain throw els temptat follow terribl labor kiss vers prouder
|
52 |
+
dawn cane anthoni wear quarter ransom flea unkind charg stream namur anoth skillet
|
53 |
+
glad bustl woebegon greater emin cord batch dame badg briefli shini answer lodg
|
54 |
+
bolt east suspicion milk lookout pronounc detect villa conclud hurt heard sprinter
|
55 |
+
charl neither wrinkl look associ reach pilot spite furi messag copybook dispos
|
56 |
+
rush plummet commenc adventur eventu left emperor strongest thank popul truth
|
57 |
+
chooser feroci gruel smash without halfbre bled possess engin rub athlet sympathet
|
58 |
+
riski bold thirti undertak surmount astronomi told citizen furnitur tenac do laddi
|
59 |
+
albert netherland princip assail brief poss hero drew swift rake philip victim
|
60 |
+
broil rate object thereof higher press discov conjectur cement clumsi tribut whirl
|
61 |
+
unaid great assur burnt alter bridg invad sprawl succeed think valuabl conquer
|
62 |
+
billow molest shaki motiv develop ruefulli bullet pretend dread special unruli
|
63 |
+
insinu confin vein reckon defi supplement sale popular ghost unpleas opportun
|
64 |
+
zeppelin heap pigskin readi fame sore forlorn seventh luncheon difficulti oven
|
65 |
+
sledg meredith interrupt linen sank live mistak hast cherish ambuscad mistaken
|
66 |
+
egg bridl whole neck snake pulp even cours gallant vocabulari protest repent tubbi
|
67 |
+
anchorag stay shuttl import allay plenti convict blindfold thousand timber crown
|
68 |
+
owner boundari echo suffic poke nearer
|
69 |
+
sentences:
|
70 |
+
- Is the content related to romance genere
|
71 |
+
- Is the content related to romance genere
|
72 |
+
- Is the content related to non-fiction genere
|
73 |
+
- source_sentence: hugo fume immortell memori shrine salut end withdraw potenti famin
|
74 |
+
stain scrawl gross avranch accomplish forgo queen tardet torn spoke rhone freedom
|
75 |
+
anglo priest boar bohemia rubicon discern collar myth lie captur cite uninstruct
|
76 |
+
waterwork child exist recoup eav commonwealth algebra hundr uphil whimsic wit
|
77 |
+
dignifi agreement trap draft ripost strait excit asylum conceal alfr nervii william
|
78 |
+
ask footnot inmost astonish hypothesi crisi spur deepest coalesc bottl fabric
|
79 |
+
pillar domqueur omnibus provoc cliff stuf spectacl rare mosqu hunger upkeep visit
|
80 |
+
magic scot melbourn frozen bind visigoth band portus complain smallest glasgow
|
81 |
+
irrig obtain starv hoof mould passion trial russia lad signal acquiesc john keat
|
82 |
+
sheet huntsman acceler immun easier trouser papaci mental expans ear roncesvall
|
83 |
+
topograph napl increas stupend bayeux fulli household tomfooleri briberi argonn
|
84 |
+
iliad contour elsewher inevit curios clean exterior snow generous normal leather
|
85 |
+
1030 mankind agincourt product apologia laughabl outer northeast stead appal kick
|
86 |
+
indic limeston algier river stage bout indomit residenti measur paradis vengeanc
|
87 |
+
statist observ ball aumal scientif guardian impecc rhetor crimin majest visibl
|
88 |
+
doorstep dauphin blown mantelshelf bethlehem earlier peculiar compli snort rocquefort
|
89 |
+
snowstorm tore llygnant expound unroof thenc misfortun tenpenc bookstal crosier
|
90 |
+
bowl reform expect fantast £100 barg prima notabl rock malign croker southward
|
91 |
+
scoundrel margin militari capstan injur decay prime amount sprang decis attitud
|
92 |
+
incapac brood monday econom valparaiso evid dimmer drought receptacl aesthet walk
|
93 |
+
fraud truli 1905 volley allen leader rumin troubl activ irredeem harmoni headlong
|
94 |
+
slant prig guadarrama peasantri began coupl climax british profound ayrshir tenth
|
95 |
+
convey platform will drunk proport cinematograph imago mother prolong equabl brick
|
96 |
+
approxim search foothil cun truest crusad report woman step tenur might anim frame
|
97 |
+
gaze threaten bosh renom explan whose crier parasit compel quest surviv lisieux
|
98 |
+
monograph hunt real
|
99 |
+
sentences:
|
100 |
+
- Is the content related to non-fiction genere
|
101 |
+
- Is the content related to romance genere
|
102 |
+
- Is the content related to romance genere
|
103 |
+
- source_sentence: argu hubertmil grace copious alphabet plombier beaumont fete fontanel
|
104 |
+
profus treasuri boyer fleurieu unworthili subprefect stuttgart majesti disinterest
|
105 |
+
hilar perish finger valencienn hord quell occup varieti madman diabl preoccupi
|
106 |
+
interest gras ordeal legion soap nail tract ostrich infanta imbu swaddl picturesqu
|
107 |
+
cent canon robust blasphemi titl pfister slay provenc outlin award indol past
|
108 |
+
pillag erupt sweat lawn remain fortif spectat pallid decreas resum gros heartrend
|
109 |
+
dizzi costaz hillsid guess sourc umbrella talleyrand kremlin overshadow dramat
|
110 |
+
condit blotch purport drank flore wherev circus chessman addit withdrew vien regim
|
111 |
+
benign 1792 brenta nazzolini cover snatch local contest contribut sabin victoir
|
112 |
+
walsh joiner necessarili wealth law talisman payment ivri vent raider push pichegru
|
113 |
+
beri conceiv pleiss haversack epidem steepl allot subsid embroid religi volney
|
114 |
+
broadest clamor summari wild inexhaust serent come transmiss vase midway danger
|
115 |
+
lill warrior guastalla gravest canouvill stormi partridg solemn drawer bray benefact
|
116 |
+
hurri ladl uphold chicken bereft ghiesubel let cower pacifi dragoon element smother
|
117 |
+
inexcus humili yield engag intermingl camel pillow must adversari modern heavili
|
118 |
+
writer porcelain harvill thing leav horizon baudemont freder carnot relat phrase
|
119 |
+
mistress adolph 1814 postur sordi moskwa quadrill alp arrang buit devast renown
|
120 |
+
deepli wismar three belliger domin ness disappoint cordial mahogani sampl 1788
|
121 |
+
poultri orosman surrend succumb commerc late promulg reput bouill legislatur compar
|
122 |
+
finess fair carbonari round danc crush particip move mold huddl benumb overlook
|
123 |
+
cleopatra varengo erad delic rais trumpet emili xviii calcul specimen suburb geograph
|
124 |
+
ukrain surveil disparag sceptr hinguerlot enlarg discus steer aright assum maul
|
125 |
+
loud oper cher immedi exploit fesch wealthi filial lieg imperator sleepi marriag
|
126 |
+
nich rise pend baudin revolv stupor fool voic manner malet thereupon cargo orfevr
|
127 |
+
mountain general chariot moldrecht immens amend sulmett allevi flew intox poet
|
128 |
+
laid indemn ugli
|
129 |
+
sentences:
|
130 |
+
- Is the content related to romance genere
|
131 |
+
- Is the content related to non-fiction genere
|
132 |
+
- Is the content related to romance genere
|
133 |
+
- source_sentence: celebr hitt correspond windmil doivent take june hove sequel petition
|
134 |
+
hamlet crash mond knotti grudg sportsman prowl morrow semblanc jargon reap full
|
135 |
+
ancestress cheruel manabozho merit buoy governor dine plain misstat grand dwelt
|
136 |
+
fir kind joint around hound san moranget cricket confirm frosti balk straggl regret
|
137 |
+
tenant invoc crop fervent tie uncharit savag omaha chassagoac conqueror infer
|
138 |
+
repast crack répondu mèmoir splendor anywher match sept divan prey caus pratiqu
|
139 |
+
theft dot disguis crime chaff incubus ouabouskiaou strike regardless disk croyant
|
140 |
+
auec top droitur brulé 1701 much infuri morass misconceiv back rigg midnight atroci
|
141 |
+
femm audess disput avail reluct tree shield andast peac solac utica set déchargent
|
142 |
+
ouasi resté lock nativ kaskaskia negoti renounc confeder crude luth part horseback
|
143 |
+
treacher orang réserv sit speedili mohegan enmiti pretens motionless giraff platt
|
144 |
+
estr clap accliv proceed pervers access fish probabl ambassador faillon visag
|
145 |
+
extend bow ottawa islinoi vexilla diver foment accuraci canton loutr bark level
|
146 |
+
spring asthmat carolina term assent antonio considér jesuit bishop disprov daumont
|
147 |
+
aver tangibao seneca amiti defect letter confluenc french dabbl threshold tomb
|
148 |
+
inquiri travel proprieti bush espèc idl dreami document descend courag foray downward
|
149 |
+
fring sandston incorrect parrot menez expressli displeasur eagl sépultur indec
|
150 |
+
escarpé dens strip quiet mush eastern evinc natur pick honnêt coureur 83me eighti
|
151 |
+
lichen toriman bell cachent confer stealthili spear waist catharin transfer merg
|
152 |
+
ferland gratitud blue friabl paw forget prochain risk caution still generos awar
|
153 |
+
burlesqu concentr mingl cinquièm pourtant altern us somebodi suppress unscrupul
|
154 |
+
discord coat dog pierron loup campaign mangèrent cloth theme rope unnatur discipl
|
155 |
+
haw battl superfici spendthrift empti tavern threat épuisé deliv deceas vicious
|
156 |
+
employ trunk endow notwithstand jansenist baptism offend sustain complic almost
|
157 |
+
larger commit villag invect green careen ownership request lightn braveri sunday
|
158 |
+
remedi current
|
159 |
+
sentences:
|
160 |
+
- Is the content related to romance genere
|
161 |
+
- Is the content related to non-fiction genere
|
162 |
+
- Is the content related to romance genere
|
163 |
+
---
|
164 |
+
|
165 |
+
# SentenceTransformer based on thenlper/gte-small
|
166 |
+
|
167 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-small](https://huggingface.co/thenlper/gte-small). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
168 |
+
|
169 |
+
## Model Details
|
170 |
+
|
171 |
+
### Model Description
|
172 |
+
- **Model Type:** Sentence Transformer
|
173 |
+
- **Base model:** [thenlper/gte-small](https://huggingface.co/thenlper/gte-small) <!-- at revision 50c7dd33df1027ef560fd504d95e277948c3c886 -->
|
174 |
+
- **Maximum Sequence Length:** 512 tokens
|
175 |
+
- **Output Dimensionality:** 384 tokens
|
176 |
+
- **Similarity Function:** Cosine Similarity
|
177 |
+
<!-- - **Training Dataset:** Unknown -->
|
178 |
+
<!-- - **Language:** Unknown -->
|
179 |
+
<!-- - **License:** Unknown -->
|
180 |
+
|
181 |
+
### Model Sources
|
182 |
+
|
183 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
184 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
185 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
186 |
+
|
187 |
+
### Full Model Architecture
|
188 |
+
|
189 |
+
```
|
190 |
+
SentenceTransformer(
|
191 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
192 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
193 |
+
(2): Normalize()
|
194 |
+
)
|
195 |
+
```
|
196 |
+
|
197 |
+
## Usage
|
198 |
+
|
199 |
+
### Direct Usage (Sentence Transformers)
|
200 |
+
|
201 |
+
First install the Sentence Transformers library:
|
202 |
+
|
203 |
+
```bash
|
204 |
+
pip install -U sentence-transformers
|
205 |
+
```
|
206 |
+
|
207 |
+
Then you can load this model and run inference.
|
208 |
+
```python
|
209 |
+
from sentence_transformers import SentenceTransformer
|
210 |
+
|
211 |
+
# Download from the 🤗 Hub
|
212 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
213 |
+
# Run inference
|
214 |
+
sentences = [
|
215 |
+
'celebr hitt correspond windmil doivent take june hove sequel petition hamlet crash mond knotti grudg sportsman prowl morrow semblanc jargon reap full ancestress cheruel manabozho merit buoy governor dine plain misstat grand dwelt fir kind joint around hound san moranget cricket confirm frosti balk straggl regret tenant invoc crop fervent tie uncharit savag omaha chassagoac conqueror infer repast crack répondu mèmoir splendor anywher match sept divan prey caus pratiqu theft dot disguis crime chaff incubus ouabouskiaou strike regardless disk croyant auec top droitur brulé 1701 much infuri morass misconceiv back rigg midnight atroci femm audess disput avail reluct tree shield andast peac solac utica set déchargent ouasi resté lock nativ kaskaskia negoti renounc confeder crude luth part horseback treacher orang réserv sit speedili mohegan enmiti pretens motionless giraff platt estr clap accliv proceed pervers access fish probabl ambassador faillon visag extend bow ottawa islinoi vexilla diver foment accuraci canton loutr bark level spring asthmat carolina term assent antonio considér jesuit bishop disprov daumont aver tangibao seneca amiti defect letter confluenc french dabbl threshold tomb inquiri travel proprieti bush espèc idl dreami document descend courag foray downward fring sandston incorrect parrot menez expressli displeasur eagl sépultur indec escarpé dens strip quiet mush eastern evinc natur pick honnêt coureur 83me eighti lichen toriman bell cachent confer stealthili spear waist catharin transfer merg ferland gratitud blue friabl paw forget prochain risk caution still generos awar burlesqu concentr mingl cinquièm pourtant altern us somebodi suppress unscrupul discord coat dog pierron loup campaign mangèrent cloth theme rope unnatur discipl haw battl superfici spendthrift empti tavern threat épuisé deliv deceas vicious employ trunk endow notwithstand jansenist baptism offend sustain complic almost larger commit villag invect green careen ownership request lightn braveri sunday remedi current',
|
216 |
+
'Is the content related to romance genere',
|
217 |
+
'Is the content related to romance genere',
|
218 |
+
]
|
219 |
+
embeddings = model.encode(sentences)
|
220 |
+
print(embeddings.shape)
|
221 |
+
# [3, 384]
|
222 |
+
|
223 |
+
# Get the similarity scores for the embeddings
|
224 |
+
similarities = model.similarity(embeddings, embeddings)
|
225 |
+
print(similarities.shape)
|
226 |
+
# [3, 3]
|
227 |
+
```
|
228 |
+
|
229 |
+
<!--
|
230 |
+
### Direct Usage (Transformers)
|
231 |
+
|
232 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
233 |
+
|
234 |
+
</details>
|
235 |
+
-->
|
236 |
+
|
237 |
+
<!--
|
238 |
+
### Downstream Usage (Sentence Transformers)
|
239 |
+
|
240 |
+
You can finetune this model on your own dataset.
|
241 |
+
|
242 |
+
<details><summary>Click to expand</summary>
|
243 |
+
|
244 |
+
</details>
|
245 |
+
-->
|
246 |
+
|
247 |
+
<!--
|
248 |
+
### Out-of-Scope Use
|
249 |
+
|
250 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
251 |
+
-->
|
252 |
+
|
253 |
+
<!--
|
254 |
+
## Bias, Risks and Limitations
|
255 |
+
|
256 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
257 |
+
-->
|
258 |
+
|
259 |
+
<!--
|
260 |
+
### Recommendations
|
261 |
+
|
262 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
263 |
+
-->
|
264 |
+
|
265 |
+
## Training Details
|
266 |
+
|
267 |
+
### Training Dataset
|
268 |
+
|
269 |
+
#### Unnamed Dataset
|
270 |
+
|
271 |
+
|
272 |
+
* Size: 4,319 training samples
|
273 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
274 |
+
* Approximate statistics based on the first 1000 samples:
|
275 |
+
| | anchor | positive |
|
276 |
+
|:--------|:--------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
277 |
+
| type | string | string |
|
278 |
+
| details | <ul><li>min: 449 tokens</li><li>mean: 506.92 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 10.73 tokens</li><li>max: 12 tokens</li></ul> |
|
279 |
+
* Samples:
|
280 |
+
| anchor | positive |
|
281 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------|
|
282 |
+
| <code>assum discredit loud immedi incumb wealthi speck flare sleepi marriag intang rise revolv stupor fool voic manner thereupon abhorr mountain general amend flew posi intox poet laid tel ugli issu insult armament assert croak illus deign discourag trust fund pray irregular aristocraci shoulder overcom dumb devil pas grass unnecessari heat event factotum shot stabl innumer fleshi later struggl vike arrog orchardward tune dissatisfact presum reclus seven behavior fine hebe hind ripen irrate brother annoy whitewash sunris curtain indulg delirium youth labori would unlucki unwrinkl initi hark bliss occas everyth folli subordin stamp glossi finish consist hall cave insight forg matter forward familiar hidden sandi noblest undevelop acr masonri wand took endeavor joke standpoint loveli picket caress nicknam coil temper unknown pledg sunk looker abil subterranean wari effemin go spit denounc recoveri violenc moorish gloomili wind stove religion senior stiffli shudder lean encount luckili pull weld approach liveli glyphi plagu funnel soulless inquir pearl tenabl unsaf justifi unhero curious subject laboratori societi afford dose hundredth thief tremor grizzl villan tumult knocker rainbow boy drama pitiless cynosur demeanor communic ironi lurk loftiest freshen offenc environ mixtur habitu blunt shirt straightway lieuten sofa lineament poison hypothet nonsens censor æon applaus blew blade sanguin caller heavenward resist readili tempor hatr rivalri purpl coward barber damask dialogu carpet seat disadvantag gad littl insignific rather apolog surpris frivol aloft uproari boot review ad thrown lavish trod curv join infirm wise undecid seclud protector humorist quiver peep repossess transit brewer warn swimmer reproduc failur upon rob draw wrist triumphant horror unusu leastway larg field rig durabl lord brink barrist show probe grow redund jacob sincer work twain sleev betroth anyon undo sadden darksom satin saint entreati central breez unconsid permit intellig gallon photograph whenc asid aristocrat taint ceil aloud</code> | <code>Is the content related to non-fiction genere</code> |
|
283 |
+
| <code>last highest gynê smoke proximum inclin synapteon gladden ekeinên flutter could ænian lead exact sleeper ascend faithless alik satisfi orcus merus nave frustra delphi muse balm realli regain arist convoy formid sell recal surest blast respect carnean mead envelop better dare moriar reduc talk glori mightest dicendum shrink abroad calm altisono sin ultima xxviii pous subterran kisso rage entha marri naught seldom upros race taphian restor elthont weather bewar forcibl lydian serm xenophon rest xeinôn rebuk spectr verum consilio satisfactorili medicin unfavor anthrôpoi prodess ætas 1437 lighten epebaiên across practic taken seer recommend dramont handsom tenor lepton hydatôn hêtis rose ill audiat mempto scalig propos suspicient falsehood long wetstein unintellig pluto enslav agit cross continu size lamb latebo ktypou cloudi like superstiti perchanc account colchian oaken euripidê delight infidel wed pitnonta excito mate liber discreet libya unpract whither gall murder weapon mean subsist cityless sepulchra nêpie eurota hyperechthairei antiop stop prosgelai earli achill metr suffici mellonta spot abiôton arbylê aveng catastroph kephalêi natal argous beyond sped known substant line parallel aeri given hew pavement euergesiôn egomet atmospher titan peal flatteri pheroean hygrotêt inclos givest tempt endear ôkeanou onta assonat payest realiti congeni sound pella unto advantag dynatai skimpôni apt expedi patro horrent illustri libri nautic beard stab seem situat lesser floweri success odyssey commemoratio unsulli palla lyei 1209 singular mellein unhonor languag surg regular eriosteptoi assertest gynaiko populac daphni scandal allianc stroke monk aught counter putter extinguish varianc elegi polydor pedest per fright bridegroom stadii unfortun skeptic horai solicitud publish offici kachla 1840 nation korytha corruptus kain topôn lament uncal olympus reveng cineri charon remittest length sipylus lolaus greatest unadorn shoot kalyptê nowher hospit blomfield promiscu iron shelter tipto stori unquest penthêrê</code> | <code>Is the content related to non-fiction genere</code> |
|
284 |
+
| <code>sank driven interrupt linen live sledg hast mistak alban cherish egg rhyme chief ezekiel whole excess neck shepherd robber snake even cours 160th neckti vocabulari wherefor vibrat protest repent stay import fanat pedestrian plenti convict threw thousand net timber crown owner echo poke battlement bugl nearer tole blush fresh darrel sail client warden happi colli strand congress eastward run limit scamp liberti celebr sacr squint treat outbreak dost offic hear bedroom brakeman correspond guilt glibli gabl son take jolli june mullen depot havin septemb leech guard bard extraordinari hamlet scarf tender juri knotti thurst unfad helpless strap hole rous slow shallow frequent morrow jargon befriend reap ocean spatter slaveri caesar isaiah forrest mile eliot full win pan wrong confront knee shear nice slid arrear angrili fourteen tentat merit governor bear togeth shook dine sermon fortitud web plain banker thrash sixteen grand grim forsooth railroad dwelt harrow burglar fir kind sober expector around hound joint hypocrit question clover skull snap bulli upper undu forehead sum cuff tramp cricket float speaker invis gestur mebb tax skeleton volcano drill tellin foreclosur editor confirm frosti scrambl regret ravel fiction hous holiday break schoolhous card pretenc crop fervent vittl tie mire whereon haughti fellow choos manag dinner infer crack dig index uneasi done drover foot agre studious verdict hand feat graven counterfeit brindl anywher fore thrill wolf partner heartbroken match martha prey caus imit muzzl public chalk beat welcom root celtic fifti person ladi excel confidenti jealousi damnabl xvii unutt sharpli crime sower train wrung manhood sunlight darken sharper secret grill elizabethan handwrit lay minut heav strike stalk horn amber near beg preacher loos christma discont rugos sleepless america tast consider top kidnap power buck much wreck ring merrier trick hard mischiev dagger mouth back knife prospect tear midnight cocoanut best pike abe gust dungeon poverti bond cassia gobbler exercis eben</code> | <code>Is the content related to fiction genere</code> |
|
285 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
286 |
+
```json
|
287 |
+
{
|
288 |
+
"scale": 20.0,
|
289 |
+
"similarity_fct": "cos_sim"
|
290 |
+
}
|
291 |
+
```
|
292 |
+
|
293 |
+
### Evaluation Dataset
|
294 |
+
|
295 |
+
#### Unnamed Dataset
|
296 |
+
|
297 |
+
|
298 |
+
* Size: 1,234 evaluation samples
|
299 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
300 |
+
* Approximate statistics based on the first 1000 samples:
|
301 |
+
| | anchor | positive |
|
302 |
+
|:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
303 |
+
| type | string | string |
|
304 |
+
| details | <ul><li>min: 453 tokens</li><li>mean: 507.1 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 10.71 tokens</li><li>max: 12 tokens</li></ul> |
|
305 |
+
* Samples:
|
306 |
+
| anchor | positive |
|
307 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------|
|
308 |
+
| <code>domest creed valentinian tone proclam peaceabl 1843 weakest incompet proscript realm esteem brigandag stock none incom authent competit follow labor vers wear ensembl impair student unalter glad cisalpin damocl sang perfidi pardon impera stupefi villa monopoli charl look link adag monomania messag hypocrisi priori counterpois publica gorgia redeem thank uncivil unwound fetter pascal serpit honorari maim superintend told homo promenad furnitur brief extract nehemiah furthermor competitor billion teas victim rate terminus higher mariti sacrileg behold bridg predecessor episcopi billow annot develop yardstick pretend special insinu kingship francai reckon sale devoid ghost difficulti driven falsifi pattern chief fatten contin retract dido repent thousand scholast ell librarian owner suffic fresh changer cartesian journeyman run treat offic ingenu war spontan bard extraordinari telescop extort assumpt gracious strategi frequent shallow aliquid manufactori ocean sibyl augustus mile galvan wrong usucapio knee beautifi wardenship bear togeth cart shook executor allobrog auger chapsal fortifi budget question implac entwin arbitrari float facto dearest logic commandit apprentic fiction advent traffic choos incred foot partner wolf evalu noel rioter muzzl root 1862 florenc manhood geometr nostra horn theseus beg overs melodrama inscript habent refrain helvetius disagre nodier similitud blanqui unemancip pike exercis obvious alli preambl wife ostens conquest compens coars cherbourg grantor invent duti epicur loss futil evapor gaul raison approb athenian insincer asham whim purpos unchang destruct imposit lacedaemonian wish conson pocket boobi commune relish ablest track cook blow friend geometri railway tiberius wash detriment meyer render teller ess amen arous idea personag sacrif repres stood david confrer fond sad cratch doubli attain advic vineyard pound habetur urgent britain communiti majorat juggleri biblic trim equal villein hazard expropri selfish declar taught ingratitud satisfact deliber wiser enthusiast</code> | <code>Is the content related to non-fiction genere</code> |
|
309 |
+
| <code>conscious chronolog leapt close sis drift lump station rank destitut contriv swivel grate stuck spare monoton thicket mesh yellow air fault choost reward scorn intent applic pestilenti contemptu greenhous mix pipe persuad plung avoid displac trustet ahoy concern critic sowsand name jounc downtown involuntari establish peril also settl flash voter mighti bang necess vial bewitch characterist adorn beauti sate decrepit citronell naturalist know conscienc fontenett laden strock deceiv inde pursuer xxii aimless moonlight archangel detain infatu frighten bought drows lucki pine trickl juic owfool pathet sunbeam tent needl gusti twas clung worthi diseas outrag recov made exhaust second begrudg cobweb privat corridor speak seventi bawn undress tarri remind enamour prompt lip graver ventur obedi basement forgotten crowd other sing incident breakwat excus wile rebound entangl philosophi flabbi deliver believ outang affect arriv vision soak bug realiz cruel frock promis pahdon everi modesti suzann fickl le african relief fortun laundri serenest ash straight damp awri lessen evil loudeh fonteett tardi spill hale hostess ladder avow medit seal longer well rebuff maintain quicker exclus donkey season hug wreath emphasi fill flag devious disturb bit tiger stolen intend drench unclean deep flourish apprehend admir veight flesh shiltren week anxieti violet how richard unbear everywher prefac conduct saunt stumbl though peopl sinc someth despair obey moor moral sill strang kine compassion mark doze flow dreamless wors crouch acquaint sugar typic doorsil leafi redempt unchalleng delug tarpaulin troop circumv hither reserv wander dirti crib cistern plead ruin serious slept scholar gradual drove fan mellow meet entertain till mantlepiec fairili sorri gasp southern heighten seed attempt joseph drown notion fascin constel rich consent speech teeth tire glorious pencil convuls glisten diffid lose citat dappl feast sooner belong splendid cigarett hoist sick midday tail fairst honor scorch savedt apathi color alvay inspect</code> | <code>Is the content related to romance genere</code> |
|
310 |
+
| <code>greatness late reput alarum compar fair rediscov realis round swell danc sayl crush particip move huddl materialis benumb prophesi infel unlaw rais suspit trumpet canibal herculean calcul yong specimen superiour forbear encreas fairest enlarg steer fama barrisor pediss preval shepheard umbra altum tergo catholiqu voluntatem assum bethink spar perplext oper immedi crab exploit wealthi catterpillar marriag labour rise revolv fool voic manner thereupon impo recevra montsureau abhorr enseam rendrer mountain general chariot amend flew poet laid tel issu insult radical assert bounti illus attyr discourag trust eundo penian mishap pray predict irregular unright lot expon shoulder overcom outright dumb treilli devil embrew enterd pas massing cognoscer essay splene exemplari grass traiter unnecessari prix scape heat event shot usd stabl highness syrtibus bel fleshi monsurri obsequi later struggl arrog intervent tune presum throne mess indur seven afflig judicial cyclop shakespearean fine sori prompter hind brother annoy inordin whosoev indulg lachesi perus youth span juli sadness 1888 would administr initi greediness obay hark bliss vellet occas folli subordin palladi stamp glossi finish consist hall cave lettr mercer forg 1865 gondomar matter forward niec stomack familiar noblest audaci thrid scap cure goos took crestfaln dispenc acheson zeal bodenstedt £300 temper pindus stephen barricado unknown elucid hostag desertful delighteth cruell 1681 clapdish wari go existen spit denounc violenc familia occisi wind religion eie lean oppidani encount bussii quellen desart cornhil pull approach haut plagu leas ornaverat epictetus nere pearl spenser sicil arrogantia justifi riot curious skirmish overlap subject faciebat societi afford celestial poultron villan humer jigg emrod 1903 boy drama dan communic 3830 offenc shaksper environ habitu blunt crafti down inviol men lieuten coit poison nonsens legitimaci scoff applaus blew letcher 1557 resist readili uncredit tempor hatr rivalri purpl coward librari errour sphære</code> | <code>Is the content related to romance genere</code> |
|
311 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
312 |
+
```json
|
313 |
+
{
|
314 |
+
"scale": 20.0,
|
315 |
+
"similarity_fct": "cos_sim"
|
316 |
+
}
|
317 |
+
```
|
318 |
+
|
319 |
+
### Training Hyperparameters
|
320 |
+
#### Non-Default Hyperparameters
|
321 |
+
|
322 |
+
- `eval_strategy`: steps
|
323 |
+
- `per_device_train_batch_size`: 16
|
324 |
+
- `per_device_eval_batch_size`: 16
|
325 |
+
- `num_train_epochs`: 2
|
326 |
+
- `warmup_ratio`: 0.1
|
327 |
+
- `fp16`: True
|
328 |
+
- `batch_sampler`: no_duplicates
|
329 |
+
|
330 |
+
#### All Hyperparameters
|
331 |
+
<details><summary>Click to expand</summary>
|
332 |
+
|
333 |
+
- `overwrite_output_dir`: False
|
334 |
+
- `do_predict`: False
|
335 |
+
- `eval_strategy`: steps
|
336 |
+
- `prediction_loss_only`: True
|
337 |
+
- `per_device_train_batch_size`: 16
|
338 |
+
- `per_device_eval_batch_size`: 16
|
339 |
+
- `per_gpu_train_batch_size`: None
|
340 |
+
- `per_gpu_eval_batch_size`: None
|
341 |
+
- `gradient_accumulation_steps`: 1
|
342 |
+
- `eval_accumulation_steps`: None
|
343 |
+
- `learning_rate`: 5e-05
|
344 |
+
- `weight_decay`: 0.0
|
345 |
+
- `adam_beta1`: 0.9
|
346 |
+
- `adam_beta2`: 0.999
|
347 |
+
- `adam_epsilon`: 1e-08
|
348 |
+
- `max_grad_norm`: 1.0
|
349 |
+
- `num_train_epochs`: 2
|
350 |
+
- `max_steps`: -1
|
351 |
+
- `lr_scheduler_type`: linear
|
352 |
+
- `lr_scheduler_kwargs`: {}
|
353 |
+
- `warmup_ratio`: 0.1
|
354 |
+
- `warmup_steps`: 0
|
355 |
+
- `log_level`: passive
|
356 |
+
- `log_level_replica`: warning
|
357 |
+
- `log_on_each_node`: True
|
358 |
+
- `logging_nan_inf_filter`: True
|
359 |
+
- `save_safetensors`: True
|
360 |
+
- `save_on_each_node`: False
|
361 |
+
- `save_only_model`: False
|
362 |
+
- `restore_callback_states_from_checkpoint`: False
|
363 |
+
- `no_cuda`: False
|
364 |
+
- `use_cpu`: False
|
365 |
+
- `use_mps_device`: False
|
366 |
+
- `seed`: 42
|
367 |
+
- `data_seed`: None
|
368 |
+
- `jit_mode_eval`: False
|
369 |
+
- `use_ipex`: False
|
370 |
+
- `bf16`: False
|
371 |
+
- `fp16`: True
|
372 |
+
- `fp16_opt_level`: O1
|
373 |
+
- `half_precision_backend`: auto
|
374 |
+
- `bf16_full_eval`: False
|
375 |
+
- `fp16_full_eval`: False
|
376 |
+
- `tf32`: None
|
377 |
+
- `local_rank`: 0
|
378 |
+
- `ddp_backend`: None
|
379 |
+
- `tpu_num_cores`: None
|
380 |
+
- `tpu_metrics_debug`: False
|
381 |
+
- `debug`: []
|
382 |
+
- `dataloader_drop_last`: False
|
383 |
+
- `dataloader_num_workers`: 0
|
384 |
+
- `dataloader_prefetch_factor`: None
|
385 |
+
- `past_index`: -1
|
386 |
+
- `disable_tqdm`: False
|
387 |
+
- `remove_unused_columns`: True
|
388 |
+
- `label_names`: None
|
389 |
+
- `load_best_model_at_end`: False
|
390 |
+
- `ignore_data_skip`: False
|
391 |
+
- `fsdp`: []
|
392 |
+
- `fsdp_min_num_params`: 0
|
393 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
394 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
395 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
396 |
+
- `deepspeed`: None
|
397 |
+
- `label_smoothing_factor`: 0.0
|
398 |
+
- `optim`: adamw_torch
|
399 |
+
- `optim_args`: None
|
400 |
+
- `adafactor`: False
|
401 |
+
- `group_by_length`: False
|
402 |
+
- `length_column_name`: length
|
403 |
+
- `ddp_find_unused_parameters`: None
|
404 |
+
- `ddp_bucket_cap_mb`: None
|
405 |
+
- `ddp_broadcast_buffers`: False
|
406 |
+
- `dataloader_pin_memory`: True
|
407 |
+
- `dataloader_persistent_workers`: False
|
408 |
+
- `skip_memory_metrics`: True
|
409 |
+
- `use_legacy_prediction_loop`: False
|
410 |
+
- `push_to_hub`: False
|
411 |
+
- `resume_from_checkpoint`: None
|
412 |
+
- `hub_model_id`: None
|
413 |
+
- `hub_strategy`: every_save
|
414 |
+
- `hub_private_repo`: False
|
415 |
+
- `hub_always_push`: False
|
416 |
+
- `gradient_checkpointing`: False
|
417 |
+
- `gradient_checkpointing_kwargs`: None
|
418 |
+
- `include_inputs_for_metrics`: False
|
419 |
+
- `eval_do_concat_batches`: True
|
420 |
+
- `fp16_backend`: auto
|
421 |
+
- `push_to_hub_model_id`: None
|
422 |
+
- `push_to_hub_organization`: None
|
423 |
+
- `mp_parameters`:
|
424 |
+
- `auto_find_batch_size`: False
|
425 |
+
- `full_determinism`: False
|
426 |
+
- `torchdynamo`: None
|
427 |
+
- `ray_scope`: last
|
428 |
+
- `ddp_timeout`: 1800
|
429 |
+
- `torch_compile`: False
|
430 |
+
- `torch_compile_backend`: None
|
431 |
+
- `torch_compile_mode`: None
|
432 |
+
- `dispatch_batches`: None
|
433 |
+
- `split_batches`: None
|
434 |
+
- `include_tokens_per_second`: False
|
435 |
+
- `include_num_input_tokens_seen`: False
|
436 |
+
- `neftune_noise_alpha`: None
|
437 |
+
- `optim_target_modules`: None
|
438 |
+
- `batch_eval_metrics`: False
|
439 |
+
- `eval_on_start`: False
|
440 |
+
- `batch_sampler`: no_duplicates
|
441 |
+
- `multi_dataset_batch_sampler`: proportional
|
442 |
+
|
443 |
+
</details>
|
444 |
+
|
445 |
+
### Training Logs
|
446 |
+
| Epoch | Step | Training Loss | loss |
|
447 |
+
|:------:|:----:|:-------------:|:------:|
|
448 |
+
| 0.3704 | 100 | 1.0978 | 0.9591 |
|
449 |
+
| 0.7407 | 200 | 1.089 | 1.0138 |
|
450 |
+
| 1.1111 | 300 | 1.0538 | 0.9570 |
|
451 |
+
| 1.4815 | 400 | 1.0502 | 0.9178 |
|
452 |
+
| 1.8519 | 500 | 1.0611 | 0.9197 |
|
453 |
+
|
454 |
+
|
455 |
+
### Framework Versions
|
456 |
+
- Python: 3.10.12
|
457 |
+
- Sentence Transformers: 3.1.0
|
458 |
+
- Transformers: 4.42.4
|
459 |
+
- PyTorch: 2.3.1+cu121
|
460 |
+
- Accelerate: 0.32.1
|
461 |
+
- Datasets: 3.0.0
|
462 |
+
- Tokenizers: 0.19.1
|
463 |
+
|
464 |
+
## Citation
|
465 |
+
|
466 |
+
### BibTeX
|
467 |
+
|
468 |
+
#### Sentence Transformers
|
469 |
+
```bibtex
|
470 |
+
@inproceedings{reimers-2019-sentence-bert,
|
471 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
472 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
473 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
474 |
+
month = "11",
|
475 |
+
year = "2019",
|
476 |
+
publisher = "Association for Computational Linguistics",
|
477 |
+
url = "https://arxiv.org/abs/1908.10084",
|
478 |
+
}
|
479 |
+
```
|
480 |
+
|
481 |
+
#### MultipleNegativesRankingLoss
|
482 |
+
```bibtex
|
483 |
+
@misc{henderson2017efficient,
|
484 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
485 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
486 |
+
year={2017},
|
487 |
+
eprint={1705.00652},
|
488 |
+
archivePrefix={arXiv},
|
489 |
+
primaryClass={cs.CL}
|
490 |
+
}
|
491 |
+
```
|
492 |
+
|
493 |
+
<!--
|
494 |
+
## Glossary
|
495 |
+
|
496 |
+
*Clearly define terms in order to be accessible across audiences.*
|
497 |
+
-->
|
498 |
+
|
499 |
+
<!--
|
500 |
+
## Model Card Authors
|
501 |
+
|
502 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
503 |
+
-->
|
504 |
+
|
505 |
+
<!--
|
506 |
+
## Model Card Contact
|
507 |
+
|
508 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
509 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "thenlper/gte-small",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.42.4",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.0",
|
4 |
+
"transformers": "4.42.4",
|
5 |
+
"pytorch": "2.3.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c94ea68ec7712edb2159c4dad7053d4940d08e1efd7e24a42805f8b9f468d4d2
|
3 |
+
size 133462128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|