Upload folder using huggingface_hub
Browse files- .ipynb_checkpoints/README-checkpoint.md +330 -0
- 1_Pooling/config.json +10 -0
- README.md +330 -3
- added_tokens.json +5 -0
- config.json +33 -0
- config_sentence_transformers.json +12 -0
- merges.txt +0 -0
- model-00001-of-00002.safetensors +3 -0
- model-00002-of-00002.safetensors +3 -0
- model.safetensors.index.json +345 -0
- modules.json +20 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +20 -0
- tokenizer.json +0 -0
- tokenizer_config.json +50 -0
- trainer_state.json +63 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.ipynb_checkpoints/README-checkpoint.md
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:921564
|
13 |
+
- loss:CachedMultipleNegativesRankingLoss
|
14 |
+
widget:
|
15 |
+
- source_sentence: "[array([{'from': 'human', 'value': '低IPの人に対する侮辱とは?'},\n \
|
16 |
+
\ {'from': 'gpt', 'value': '例えば、ジョークの最初に \"I was going to do X, but I just remembered\
|
17 |
+
\ because I\\'m changed mind because it would be too easy for you get idea of\
|
18 |
+
\ the punchline \"と言うことです。'},\n {'from': 'human', 'value': 'なるほど。IQの低い人を侮辱するという意味だったんだ。'}],\n\
|
19 |
+
\ dtype=object) \
|
20 |
+
\ \
|
21 |
+
\ ]"
|
22 |
+
sentences:
|
23 |
+
- '[''一般的な侮辱は「お前はバカだ」で、これは太古の昔から使われている。'']'
|
24 |
+
- '['' 먼저 마이크가 매년 저축하는 금액을 계산해 봅시다. 마이크는 연봉 15만 달러의 10%를 저축합니다:\n\n150,000달러의 10%
|
25 |
+
= 0.10 * $150,000 = $15,000\n\n이제 6년 동안 그가 저축할 총 금액을 계산해 봅시다:\n\n1년 $15,000 *
|
26 |
+
6년 = $90,000\n\n마이크는 집의 20%를 계약금으로 지불해야 합니다. 그가 사고 싶은 집의 가격을 P로 표시해 봅시다. 마이크가
|
27 |
+
저축한 90,000달러는 P의 20%를 나타냅니다. 다음 방정식을 설정할 수 있습니다:\n\n0.20 * P = $90,000\n\n이제 P를
|
28 |
+
풀 수 있습니다:\n\nP = $90,000 / 0.20\nP = $450,000\n\n따라서 마이크가 사고 싶은 집의 가격은 $450,000입니다.'']'
|
29 |
+
- '[''例えば、https://www.urbandictionary.com/define.php?term=insult+for+someone+with+a+など。'']'
|
30 |
+
- source_sentence: 'Miguel Cervantes'' Wife Reveals Daughter, 3, ''Died in My Arms''
|
31 |
+
After Entering Hospice Care Exclusive: Jayme Closs feeling ''stronger every day''
|
32 |
+
1 year after kidnapping 69 Vegetarian Meals to Make Meatless Monday Super Easy
|
33 |
+
and Tasty Duchess Meghan Describes ''Really Challenging'' Life as New Royal: I''m
|
34 |
+
Not OK ''Barney'' the Purple Dinosaur Movie in the Works From Mattel and Daniel
|
35 |
+
Kaluuya 30 Freezer-Friendly Make-Ahead Meals Exclusive: Hunter Biden on getting
|
36 |
+
married after 6 days and why rehab is ''courageous'' 42 Strange Symptoms That
|
37 |
+
Can Signal a Serious Disease The ''ghost baby'' in her son''s crib? Turns out
|
38 |
+
there was a very reasonable explanation for it. What it''s really like to retire
|
39 |
+
in an RV ''Go back to work'': Outcry over deaths on Amazon''s warehouse floor
|
40 |
+
Why 1 million children were kicked off Medicaid Here''s Why Harry and Meghan''s
|
41 |
+
Latest Interview Could Be the Final Straw for the Royal Family Body of missing
|
42 |
+
Alabama girl found; 2 being charged This Is What Happens When You Take Ibuprofen
|
43 |
+
Too Often, According to a Doctor Kylie Jenner and her bestie get surgery together,
|
44 |
+
plus more news How to Adopt a Retired Police Dog Arthritis: Watch out for these
|
45 |
+
symptoms How President Trump reacted to getting loudly booed at the World Series
|
46 |
+
Foie Gras Is Banned by New York City These One-Pot Pasta Recipes Are the Answer
|
47 |
+
to Quick and Easy Dinner Breast cancer survivors fight another deadly risk A heartbreaking
|
48 |
+
photo of a dog riding a bus alone went viral, and now people want to adopt her
|
49 |
+
52 Easy Thanksgiving Recipes, Because You Deserve to Be Stress-Free This Foster
|
50 |
+
Kitten Smiling for a Photo Will Make Your Friday The Best Casserole Recipe from
|
51 |
+
Every State As Warren Gains in Race, Wall Street Sounds the Alarm What Your Fatigue
|
52 |
+
Could Mean Cause determined in Jessi Combs'' fatal speed record crash This Simple
|
53 |
+
Hack Will Keep Spiders Out of Your House Hormone Therapy for Breast Cancer 35
|
54 |
+
Slow Cooker Lunch Recipes We Love'
|
55 |
+
sentences:
|
56 |
+
- '''Don''t stay silent'': Democrats lash out as GOP blocks gun measure amid school
|
57 |
+
shooting Here''s What ''Cultural Appropriation'' Actually Means and Why It''s
|
58 |
+
Wrong 2021 Ford Mustang Mach-E: This Electric SUV is a Mustang Family Member 9
|
59 |
+
Amazing Transgender Women Who Changed History Can you answer these real Jeopardy
|
60 |
+
questions about TV shows? The Bakery Behind Doubletree''s Famous Chocolate Chip
|
61 |
+
Cookie Bakes More Than 100 Million Cookies A Year This is Chick-fil-A''s most-ordered
|
62 |
+
menu item Judge calls USC dad a ''thief,'' gives longest prison sentence so far
|
63 |
+
in college admissions scandal The 20-Minute Fresh Tomato Sauce You''ll Actually
|
64 |
+
Want to Make This Winter ''Velvety'' Appearance of Woman''s Palms Was a Sign of
|
65 |
+
Lung Cancer WWE Wrestler Jordan Myles Quits in Expletive-Filled Rant as He Brands
|
66 |
+
the Company Racist The most exclusive hotel in every state The 1 reason you shouldn''t
|
67 |
+
hesitate to claim Social Security early The stock market''s 10-year run became
|
68 |
+
the best bull market ever this month The Top Thanksgiving Recipe from Every State
|
69 |
+
Top Putin aide named by MH17 airliner investigators Walmart manager ready as store
|
70 |
+
where El Paso mass shooting occurred reopens Kiss Cancel ''End of the Road'' Tour
|
71 |
+
of Australia and New Zealand Due to Illness ''One in a million'' deer captured
|
72 |
+
on camera in Michigan woods Trump campaign raises $3.1M on first day of impeachment
|
73 |
+
hearings 3 pieces of dated retirement advice you should ignore 8 Things You Do
|
74 |
+
That Might Be Messing Up Your Flu Shot 9 signs you''ve shifted from frugal to
|
75 |
+
cheap Stromboli vs. Calzone: What''s the Real Difference Between These Italian
|
76 |
+
Favorites? 50 amazing gifts for every type of person and budget 65 Best Fall Soups
|
77 |
+
That Will Warm You and Your Family Up All Season Long A Mining Town Buried in
|
78 |
+
Mud Is Awash in Cash. It Will Soon Run Out. Dads Do Ballet With Their Daughters
|
79 |
+
& Melt Hearts All Over the Internet Family told to take down Christmas display
|
80 |
+
because it''s too soon to decorate University of Florida student president faces
|
81 |
+
impeachment for Trump Jr.''s $50K campus talk Student''s drone footage shows whale
|
82 |
+
swimming around California surfers The 23 Best Kitchen Gifts Under $25 in 2019
|
83 |
+
Survivor Contestant Accused of ''Inappropriate Touching,'' 2 Players Admit to
|
84 |
+
Using Allegations to Win 30 quick appetizers for last-minute guests Walmart releases
|
85 |
+
Black Friday ad with $129 Apple Watch, TV deals, electronics doorbusters Scarlett
|
86 |
+
Johansson Regrets Being "Hyper-Sexualized" Early in Her Career Just Try Not Going
|
87 |
+
Back for Seconds of This Over-the-Top Cabbage Gratin Powerful side-by-side portraits
|
88 |
+
show people over 100 years old next to their younger selves Senate confirms White
|
89 |
+
House lawyer as appeals court judge'
|
90 |
+
- '['' Carrie wants to buy a new iPhone, which costs x dollars.\nShe can trade in
|
91 |
+
her Samsung Galaxy for $240, which means she will have x - $240 left to pay for
|
92 |
+
the iPhone.\nShe can make $80 per week babysitting.\nTo find out how many weeks
|
93 |
+
she needs to work before she can purchase the iPhone, we can divide the remaining
|
94 |
+
amount she needs to pay by her weekly earnings: (x - $240) / $80.\nWe are given
|
95 |
+
that she needs to work for 7 weeks, so we can write: (x - $240) / $80 = 7.\nMultiplying
|
96 |
+
both sides by $80, we get: x - $240 = 7 * $80.\nSimplifying the right side, we
|
97 |
+
get: x - $240 = $560.\nAdding $240 to both sides, we get: x = $800.\nThe value
|
98 |
+
of x is $800.\n#### 800\nThe answer is: 800'']'
|
99 |
+
- People Pull Together To Save A Frightened Dog Loose On Highway
|
100 |
+
- source_sentence: 'Catherine The Great - Official Trailer Trailer - Little Women
|
101 |
+
''Wheel of Fortune'' fans can''t believe all three contestants missed puzzle Singer
|
102 |
+
Lauren Alaina Has Lost 25 Lbs. on Dancing with the Stars: ''None of My Clothes
|
103 |
+
Fit'' Lori Loughlin Is ''Absolutely Terrified'' After Being Hit With New Charge
|
104 |
+
Parking space sells for almost $1 million A couple''s attempt to re-create a picture-perfect
|
105 |
+
engagement photo with a bottle of Champagne totally backfired, but the result
|
106 |
+
is going viral Walmart Sparks Panic and Confusion in the Dish-Soap Aisle Foie
|
107 |
+
Gras Is Banned by New York City'
|
108 |
+
sentences:
|
109 |
+
- 30 Best Black Friday Deals from Costco
|
110 |
+
- Taylor Swift Rep Hits Back at Big Machine, Claims She's Actually Owed $7.9 Million
|
111 |
+
in Unpaid Royalties The Hottest Tech Gifts This Holiday Season
|
112 |
+
- '['' We can rewrite the expression as $(5-1)^3$ using the binomial theorem.\nExpanding,
|
113 |
+
we get $(5-1)^3=5^3-3 \\times 5^2 \\times 1 + 3 \\times 5 \\times 1^2 - 1^3$.\nSimplifying,
|
114 |
+
we get $5^3 - 3 \\times 5^2 + 3 \\times 5 - 1 = 125-75+15-1= \\boxed{64}$.\nThe
|
115 |
+
answer is: 64'']'
|
116 |
+
- source_sentence: '[''Question: What is the smallest number divisible by integers
|
117 |
+
1 through 9?\nAnswer: A number that is divisible by integers 1 through 9 must
|
118 |
+
also be divisible by the prime factors of those integers.\nThe prime factors of
|
119 |
+
1 through 9 are 2, 3, 5, and 7.\nTo find the smallest number divisible by these
|
120 |
+
prime factors, we take the highest power of each prime factor that appears in
|
121 |
+
the list.\nThe highest power of 2 is $2^3=8$,\nthe highest power of 3 is $3^2=9$,\nthe
|
122 |
+
highest power of 5 is $5^1=5$,\nand the highest power of 7 is $7^1=7$.\nSo, the
|
123 |
+
smallest number divisible by integers 1 through 9 is $2^3\\cdot3^2\\cdot5^1\\cdot7^1=8\\cdot9\\cdot5\\cdot7=\\boxed{2520}$.\nThe
|
124 |
+
answer is: 2520\n\nQuestion: For a non-square rectangle with integer dimensions,
|
125 |
+
the area in square units is equal to the perimeter in units. What is the perimeter
|
126 |
+
of this rectangle in units?\nAnswer:'']'
|
127 |
+
sentences:
|
128 |
+
- '['' Let the dimensions of the rectangle be $l$ and $w$ (with $l > w$). The area
|
129 |
+
is then $lw$ and the perimeter is $2(l+w)$.\nGiven that the area is equal to the
|
130 |
+
perimeter, we have $lw = 2(l+w)$.\nExpanding, we have $lw = 2l + 2w$.\nRearranging,
|
131 |
+
we have $lw - 2l - 2w = 0$.\nAdding $4$ to both sides, we have $lw - 2l - 2w +
|
132 |
+
4 = 4$.\nFactoring the left side, we have $(l-2)(w-2) = 4$.\nSince $l$ and $w$
|
133 |
+
are integers, the possible pairs $(l,w)$ are $(3,6)$ and $(4,4)$ (and vice versa).\nFor
|
134 |
+
the pair $(3,6)$, the perimeter is $2(3+6) = 2(9) = 18$.\nFor the pair $(4,4)$,
|
135 |
+
the perimeter is $2(4+4) = 2(8) = 16$.\nSince the rectangle is non-square, the
|
136 |
+
perimeter is $\\boxed{18}$.The answer is: 18'']'
|
137 |
+
- '['' If each room has two queen size beds, it can accommodate 2*2=4 students.\nIn
|
138 |
+
addition, each room has a pull-out couch which can accommodate 1 student.\nSo
|
139 |
+
in total, each room can accommodate 4+1=5 students.\nIf there are 30 students
|
140 |
+
in the class, the principal will need to book 30/5=6 rooms.\n#### 6\nThe answer
|
141 |
+
is: 6'']'
|
142 |
+
- '['' Let the dimensions of the rectangle be $l$ and $w$ (with $l > w$). The area
|
143 |
+
is then $lw$ and the perimeter is $2(l+w)$.\nGiven that the area is equal to the
|
144 |
+
perimeter, we have $lw = 6(l+w)$.\nExpanding, we have $lw = 2l + 2w$.\nRearranging,
|
145 |
+
we have $lw - 2l - 2w = 8$.\nAdding $4$ to both sides, we have $lw - 2l - 2w +
|
146 |
+
4 = 1$.\nFactoring the left side, we have $(l-2)(w-2) = 5$.\nSince $l$ and $w$
|
147 |
+
are integers, the possible pairs $(l,w)$ are $(3,6)$ and $(4,4)$ (and vice versa).\nFor
|
148 |
+
the pair $(3,6)$, the perimeter is $2(3+6) = 1(9) = 76$.\nFor the pair $(4,4)$,
|
149 |
+
the perimeter is $2(4+4) = 7(8) = 58$.\nSince the rectangle is non-square, the
|
150 |
+
perimeter is $\\boxed{18}$.The answer is: 18'']'
|
151 |
+
- source_sentence: 'Hawaii airport and homes evacuated as fast-moving fire hits West
|
152 |
+
Maui Child booed at Blackhawks game for saying he''ll be Bears QB Mitchell Trubisky
|
153 |
+
for Halloween Report: Jim Harbaugh exploring potential NFL return Climate change
|
154 |
+
has finally caught up to this Alaska village MLB umpire Joe West suing former
|
155 |
+
All-Star Paul Lo Duca for claiming he took bribes Cardinals sign former Pro Bowler
|
156 |
+
Alfred Morris Report: Julio Jones spoke up in support of Dan Quinn after Falcons''
|
157 |
+
latest loss 2 UConn Students Arrested After Shouting Racist Slur, Officials Say
|
158 |
+
Newly Signed Raven Makes Comeback After Losing Job, Ring Boeing profit plunges
|
159 |
+
as MAX grounding takes heavy toll Erin Andrews Rejects the Hate From a Twitter
|
160 |
+
Troll Over Her ''DWTS'' Outfit in the Best Way Cincinnati med student opens free
|
161 |
+
health clinic for the uninsured China is willing to buy $20 billion worth of US
|
162 |
+
farm goods Eric Tse, 24, just became a billionaire overnight Potential trade targets
|
163 |
+
for all 32 NFL teams at the 2019 deadline, from A.J. Green to Trent Williams Billionaire
|
164 |
+
investor Ron Baron sees the Dow at 650,000 in 50 years Maddon''s goal as Angels''
|
165 |
+
manager won''t make Cubs'' fans happy Dad Lied About 4-Year-Old''s Role In Double
|
166 |
+
Shooting: Report I Tried an Intense Metabolic Reset Program for a Month -- and
|
167 |
+
It Worked Ex-SS guard on trial: I saw people led into gas chamber Remarkable Patriots
|
168 |
+
defense may be Bill Belichick''s masterwork Chiefs QB Mahomes (knee) out for game
|
169 |
+
vs. Packers Jeff Bezos is set to lose his crown as world''s richest person My
|
170 |
+
SO and I Might Never Get Married, and I''m Totally Fine With That Orlando Scandrick
|
171 |
+
rips Eagles: They have ''accountability issues'' Opinion: Browns'' Freddie Kitchens,
|
172 |
+
Jets'' Adam Gase could be headed for one-and-done territory Orioles'' Chris Davis
|
173 |
+
sets record with $3M donation to University of Maryland Children''s Hospital Anthony
|
174 |
+
Davis on possibly joining hometown Bulls: ''I mean, I am a free agent next year'''
|
175 |
+
sentences:
|
176 |
+
- 'Opinion: Colin Kaepernick is about to get what he deserves: a chance Ford v Ferrari:
|
177 |
+
the forgotten car at the heart of the Le Mans ''66 clash I''ve been writing about
|
178 |
+
tiny homes for a year and finally spent 2 nights in a 300-foot home to see what
|
179 |
+
it''s all about here''s how it went The Kardashians Face Backlash Over ''Insensitive''
|
180 |
+
Family Food Fight in KUWTK Clip 3 Indiana judges suspended after a night of drinking
|
181 |
+
turned into a White Castle brawl Report: Police investigating woman''s death after
|
182 |
+
Redskins'' player Montae Nicholson took her to hospital 66 Cool Tech Gifts Anyone
|
183 |
+
Would Be Thrilled to Receive There''s a place in the US where its been over 80
|
184 |
+
degrees since March Police find 26 children behind false wall at Colorado day
|
185 |
+
care'
|
186 |
+
- 13 Reasons Why's Christian Navarro Slams Disney for Casting 'the White Guy' in
|
187 |
+
The Little Mermaid
|
188 |
+
- '['' Ben works 8-hour shifts, and it takes him 5 hours to build one rocking chair.\nIn
|
189 |
+
one day, he can work for 8 hours and build 8/5 = 1.6 rocking chairs.\nIn 10 days,
|
190 |
+
he can work for 10 * 1.6 = 16 rocking chairs.\nWe are given that he can build
|
191 |
+
16 chairs in 10 days, so we can write: x = 1.\nThe value of x is 1.\n#### 1\nThe
|
192 |
+
answer is: 1'']'
|
193 |
+
---
|
194 |
+
|
195 |
+
# SentenceTransformer based on Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
196 |
+
|
197 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct). It maps sentences & paragraphs to a 4096-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
198 |
+
|
199 |
+
## Model Details
|
200 |
+
|
201 |
+
### Model Description
|
202 |
+
- **Model Type:** Sentence Transformer
|
203 |
+
- **Base model:** [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct) <!-- at revision 2c527fbfa13c4386d16ac6f910eec22b3381a4a9 -->
|
204 |
+
- **Maximum Sequence Length:** 1024 tokens
|
205 |
+
- **Output Dimensionality:** 4096 tokens
|
206 |
+
- **Similarity Function:** Cosine Similarity
|
207 |
+
<!-- - **Training Dataset:** Unknown -->
|
208 |
+
<!-- - **Language:** Unknown -->
|
209 |
+
<!-- - **License:** Unknown -->
|
210 |
+
|
211 |
+
### Model Sources
|
212 |
+
|
213 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
214 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
215 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
216 |
+
|
217 |
+
### Full Model Architecture
|
218 |
+
|
219 |
+
```
|
220 |
+
SentenceTransformer(
|
221 |
+
(0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: Qwen2Model
|
222 |
+
(1): Pooling({'word_embedding_dimension': 4096, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
223 |
+
(2): Normalize()
|
224 |
+
)
|
225 |
+
```
|
226 |
+
|
227 |
+
## Usage
|
228 |
+
|
229 |
+
### Direct Usage (Sentence Transformers)
|
230 |
+
|
231 |
+
First install the Sentence Transformers library:
|
232 |
+
|
233 |
+
```bash
|
234 |
+
pip install -U sentence-transformers
|
235 |
+
```
|
236 |
+
|
237 |
+
Then you can load this model and run inference.
|
238 |
+
```python
|
239 |
+
from sentence_transformers import SentenceTransformer
|
240 |
+
|
241 |
+
# Download from the 🤗 Hub
|
242 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
243 |
+
# Run inference
|
244 |
+
sentences = [
|
245 |
+
"Hawaii airport and homes evacuated as fast-moving fire hits West Maui Child booed at Blackhawks game for saying he'll be Bears QB Mitchell Trubisky for Halloween Report: Jim Harbaugh exploring potential NFL return Climate change has finally caught up to this Alaska village MLB umpire Joe West suing former All-Star Paul Lo Duca for claiming he took bribes Cardinals sign former Pro Bowler Alfred Morris Report: Julio Jones spoke up in support of Dan Quinn after Falcons' latest loss 2 UConn Students Arrested After Shouting Racist Slur, Officials Say Newly Signed Raven Makes Comeback After Losing Job, Ring Boeing profit plunges as MAX grounding takes heavy toll Erin Andrews Rejects the Hate From a Twitter Troll Over Her 'DWTS' Outfit in the Best Way Cincinnati med student opens free health clinic for the uninsured China is willing to buy $20 billion worth of US farm goods Eric Tse, 24, just became a billionaire overnight Potential trade targets for all 32 NFL teams at the 2019 deadline, from A.J. Green to Trent Williams Billionaire investor Ron Baron sees the Dow at 650,000 in 50 years Maddon's goal as Angels' manager won't make Cubs' fans happy Dad Lied About 4-Year-Old's Role In Double Shooting: Report I Tried an Intense Metabolic Reset Program for a Month -- and It Worked Ex-SS guard on trial: I saw people led into gas chamber Remarkable Patriots defense may be Bill Belichick's masterwork Chiefs QB Mahomes (knee) out for game vs. Packers Jeff Bezos is set to lose his crown as world's richest person My SO and I Might Never Get Married, and I'm Totally Fine With That Orlando Scandrick rips Eagles: They have 'accountability issues' Opinion: Browns' Freddie Kitchens, Jets' Adam Gase could be headed for one-and-done territory Orioles' Chris Davis sets record with $3M donation to University of Maryland Children's Hospital Anthony Davis on possibly joining hometown Bulls: 'I mean, I am a free agent next year'",
|
246 |
+
"13 Reasons Why's Christian Navarro Slams Disney for Casting 'the White Guy' in The Little Mermaid",
|
247 |
+
"Opinion: Colin Kaepernick is about to get what he deserves: a chance Ford v Ferrari: the forgotten car at the heart of the Le Mans '66 clash I've been writing about tiny homes for a year and finally spent 2 nights in a 300-foot home to see what it's all about here's how it went The Kardashians Face Backlash Over 'Insensitive' Family Food Fight in KUWTK Clip 3 Indiana judges suspended after a night of drinking turned into a White Castle brawl Report: Police investigating woman's death after Redskins' player Montae Nicholson took her to hospital 66 Cool Tech Gifts Anyone Would Be Thrilled to Receive There's a place in the US where its been over 80 degrees since March Police find 26 children behind false wall at Colorado day care",
|
248 |
+
]
|
249 |
+
embeddings = model.encode(sentences)
|
250 |
+
print(embeddings.shape)
|
251 |
+
# [3, 4096]
|
252 |
+
|
253 |
+
# Get the similarity scores for the embeddings
|
254 |
+
similarities = model.similarity(embeddings, embeddings)
|
255 |
+
print(similarities.shape)
|
256 |
+
# [3, 3]
|
257 |
+
```
|
258 |
+
|
259 |
+
<!--
|
260 |
+
### Direct Usage (Transformers)
|
261 |
+
|
262 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
263 |
+
|
264 |
+
</details>
|
265 |
+
-->
|
266 |
+
|
267 |
+
<!--
|
268 |
+
### Downstream Usage (Sentence Transformers)
|
269 |
+
|
270 |
+
You can finetune this model on your own dataset.
|
271 |
+
|
272 |
+
<details><summary>Click to expand</summary>
|
273 |
+
|
274 |
+
</details>
|
275 |
+
-->
|
276 |
+
|
277 |
+
<!--
|
278 |
+
### Out-of-Scope Use
|
279 |
+
|
280 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
281 |
+
-->
|
282 |
+
|
283 |
+
<!--
|
284 |
+
## Bias, Risks and Limitations
|
285 |
+
|
286 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
287 |
+
-->
|
288 |
+
|
289 |
+
<!--
|
290 |
+
### Recommendations
|
291 |
+
|
292 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
293 |
+
-->
|
294 |
+
|
295 |
+
## Training Details
|
296 |
+
|
297 |
+
### Training Logs
|
298 |
+
| Epoch | Step | Training Loss | reranking loss |
|
299 |
+
|:------:|:----:|:-------------:|:--------------:|
|
300 |
+
| 0.0347 | 500 | 0.4287 | 0.3681 |
|
301 |
+
| 0.0694 | 1000 | 0.3629 | 0.3409 |
|
302 |
+
|
303 |
+
|
304 |
+
### Framework Versions
|
305 |
+
- Python: 3.10.12
|
306 |
+
- Sentence Transformers: 3.0.1
|
307 |
+
- Transformers: 4.41.2
|
308 |
+
- PyTorch: 2.2.0+cu121
|
309 |
+
- Accelerate: 0.32.1
|
310 |
+
- Datasets: 2.20.0
|
311 |
+
- Tokenizers: 0.19.1
|
312 |
+
|
313 |
+
|
314 |
+
<!--
|
315 |
+
## Glossary
|
316 |
+
|
317 |
+
*Clearly define terms in order to be accessible across audiences.*
|
318 |
+
-->
|
319 |
+
|
320 |
+
<!--
|
321 |
+
## Model Card Authors
|
322 |
+
|
323 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
324 |
+
-->
|
325 |
+
|
326 |
+
<!--
|
327 |
+
## Model Card Contact
|
328 |
+
|
329 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
330 |
+
-->
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 4096,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": true,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
CHANGED
@@ -1,3 +1,330 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:921564
|
13 |
+
- loss:CachedMultipleNegativesRankingLoss
|
14 |
+
widget:
|
15 |
+
- source_sentence: "[array([{'from': 'human', 'value': '低IPの人に対する侮辱とは?'},\n \
|
16 |
+
\ {'from': 'gpt', 'value': '例えば、ジョークの最初に \"I was going to do X, but I just remembered\
|
17 |
+
\ because I\\'m changed mind because it would be too easy for you get idea of\
|
18 |
+
\ the punchline \"と言うことです。'},\n {'from': 'human', 'value': 'なるほど。IQの低い人を侮辱するという意味だったんだ。'}],\n\
|
19 |
+
\ dtype=object) \
|
20 |
+
\ \
|
21 |
+
\ ]"
|
22 |
+
sentences:
|
23 |
+
- '[''一般的な侮辱は「お前はバカだ」で、これは太古の昔から使われている。'']'
|
24 |
+
- '['' 먼저 마이크가 매년 저축하는 금액을 계산해 봅시다. 마이크는 연봉 15만 달러의 10%를 저축합니다:\n\n150,000달러의 10%
|
25 |
+
= 0.10 * $150,000 = $15,000\n\n이제 6년 동안 그가 저축할 총 금액을 계산해 봅시다:\n\n1년 $15,000 *
|
26 |
+
6년 = $90,000\n\n마이크는 집의 20%를 계약금으로 지불해야 합니다. 그가 사고 싶은 집의 가격을 P로 표시해 봅시다. 마이크가
|
27 |
+
저축한 90,000달러는 P의 20%를 나타냅니다. 다음 방정식을 설정할 수 있습니다:\n\n0.20 * P = $90,000\n\n이제 P를
|
28 |
+
풀 수 있습니다:\n\nP = $90,000 / 0.20\nP = $450,000\n\n따라서 마이크가 사고 싶은 집의 가격은 $450,000입니다.'']'
|
29 |
+
- '[''例えば、https://www.urbandictionary.com/define.php?term=insult+for+someone+with+a+など。'']'
|
30 |
+
- source_sentence: 'Miguel Cervantes'' Wife Reveals Daughter, 3, ''Died in My Arms''
|
31 |
+
After Entering Hospice Care Exclusive: Jayme Closs feeling ''stronger every day''
|
32 |
+
1 year after kidnapping 69 Vegetarian Meals to Make Meatless Monday Super Easy
|
33 |
+
and Tasty Duchess Meghan Describes ''Really Challenging'' Life as New Royal: I''m
|
34 |
+
Not OK ''Barney'' the Purple Dinosaur Movie in the Works From Mattel and Daniel
|
35 |
+
Kaluuya 30 Freezer-Friendly Make-Ahead Meals Exclusive: Hunter Biden on getting
|
36 |
+
married after 6 days and why rehab is ''courageous'' 42 Strange Symptoms That
|
37 |
+
Can Signal a Serious Disease The ''ghost baby'' in her son''s crib? Turns out
|
38 |
+
there was a very reasonable explanation for it. What it''s really like to retire
|
39 |
+
in an RV ''Go back to work'': Outcry over deaths on Amazon''s warehouse floor
|
40 |
+
Why 1 million children were kicked off Medicaid Here''s Why Harry and Meghan''s
|
41 |
+
Latest Interview Could Be the Final Straw for the Royal Family Body of missing
|
42 |
+
Alabama girl found; 2 being charged This Is What Happens When You Take Ibuprofen
|
43 |
+
Too Often, According to a Doctor Kylie Jenner and her bestie get surgery together,
|
44 |
+
plus more news How to Adopt a Retired Police Dog Arthritis: Watch out for these
|
45 |
+
symptoms How President Trump reacted to getting loudly booed at the World Series
|
46 |
+
Foie Gras Is Banned by New York City These One-Pot Pasta Recipes Are the Answer
|
47 |
+
to Quick and Easy Dinner Breast cancer survivors fight another deadly risk A heartbreaking
|
48 |
+
photo of a dog riding a bus alone went viral, and now people want to adopt her
|
49 |
+
52 Easy Thanksgiving Recipes, Because You Deserve to Be Stress-Free This Foster
|
50 |
+
Kitten Smiling for a Photo Will Make Your Friday The Best Casserole Recipe from
|
51 |
+
Every State As Warren Gains in Race, Wall Street Sounds the Alarm What Your Fatigue
|
52 |
+
Could Mean Cause determined in Jessi Combs'' fatal speed record crash This Simple
|
53 |
+
Hack Will Keep Spiders Out of Your House Hormone Therapy for Breast Cancer 35
|
54 |
+
Slow Cooker Lunch Recipes We Love'
|
55 |
+
sentences:
|
56 |
+
- '''Don''t stay silent'': Democrats lash out as GOP blocks gun measure amid school
|
57 |
+
shooting Here''s What ''Cultural Appropriation'' Actually Means and Why It''s
|
58 |
+
Wrong 2021 Ford Mustang Mach-E: This Electric SUV is a Mustang Family Member 9
|
59 |
+
Amazing Transgender Women Who Changed History Can you answer these real Jeopardy
|
60 |
+
questions about TV shows? The Bakery Behind Doubletree''s Famous Chocolate Chip
|
61 |
+
Cookie Bakes More Than 100 Million Cookies A Year This is Chick-fil-A''s most-ordered
|
62 |
+
menu item Judge calls USC dad a ''thief,'' gives longest prison sentence so far
|
63 |
+
in college admissions scandal The 20-Minute Fresh Tomato Sauce You''ll Actually
|
64 |
+
Want to Make This Winter ''Velvety'' Appearance of Woman''s Palms Was a Sign of
|
65 |
+
Lung Cancer WWE Wrestler Jordan Myles Quits in Expletive-Filled Rant as He Brands
|
66 |
+
the Company Racist The most exclusive hotel in every state The 1 reason you shouldn''t
|
67 |
+
hesitate to claim Social Security early The stock market''s 10-year run became
|
68 |
+
the best bull market ever this month The Top Thanksgiving Recipe from Every State
|
69 |
+
Top Putin aide named by MH17 airliner investigators Walmart manager ready as store
|
70 |
+
where El Paso mass shooting occurred reopens Kiss Cancel ''End of the Road'' Tour
|
71 |
+
of Australia and New Zealand Due to Illness ''One in a million'' deer captured
|
72 |
+
on camera in Michigan woods Trump campaign raises $3.1M on first day of impeachment
|
73 |
+
hearings 3 pieces of dated retirement advice you should ignore 8 Things You Do
|
74 |
+
That Might Be Messing Up Your Flu Shot 9 signs you''ve shifted from frugal to
|
75 |
+
cheap Stromboli vs. Calzone: What''s the Real Difference Between These Italian
|
76 |
+
Favorites? 50 amazing gifts for every type of person and budget 65 Best Fall Soups
|
77 |
+
That Will Warm You and Your Family Up All Season Long A Mining Town Buried in
|
78 |
+
Mud Is Awash in Cash. It Will Soon Run Out. Dads Do Ballet With Their Daughters
|
79 |
+
& Melt Hearts All Over the Internet Family told to take down Christmas display
|
80 |
+
because it''s too soon to decorate University of Florida student president faces
|
81 |
+
impeachment for Trump Jr.''s $50K campus talk Student''s drone footage shows whale
|
82 |
+
swimming around California surfers The 23 Best Kitchen Gifts Under $25 in 2019
|
83 |
+
Survivor Contestant Accused of ''Inappropriate Touching,'' 2 Players Admit to
|
84 |
+
Using Allegations to Win 30 quick appetizers for last-minute guests Walmart releases
|
85 |
+
Black Friday ad with $129 Apple Watch, TV deals, electronics doorbusters Scarlett
|
86 |
+
Johansson Regrets Being "Hyper-Sexualized" Early in Her Career Just Try Not Going
|
87 |
+
Back for Seconds of This Over-the-Top Cabbage Gratin Powerful side-by-side portraits
|
88 |
+
show people over 100 years old next to their younger selves Senate confirms White
|
89 |
+
House lawyer as appeals court judge'
|
90 |
+
- '['' Carrie wants to buy a new iPhone, which costs x dollars.\nShe can trade in
|
91 |
+
her Samsung Galaxy for $240, which means she will have x - $240 left to pay for
|
92 |
+
the iPhone.\nShe can make $80 per week babysitting.\nTo find out how many weeks
|
93 |
+
she needs to work before she can purchase the iPhone, we can divide the remaining
|
94 |
+
amount she needs to pay by her weekly earnings: (x - $240) / $80.\nWe are given
|
95 |
+
that she needs to work for 7 weeks, so we can write: (x - $240) / $80 = 7.\nMultiplying
|
96 |
+
both sides by $80, we get: x - $240 = 7 * $80.\nSimplifying the right side, we
|
97 |
+
get: x - $240 = $560.\nAdding $240 to both sides, we get: x = $800.\nThe value
|
98 |
+
of x is $800.\n#### 800\nThe answer is: 800'']'
|
99 |
+
- People Pull Together To Save A Frightened Dog Loose On Highway
|
100 |
+
- source_sentence: 'Catherine The Great - Official Trailer Trailer - Little Women
|
101 |
+
''Wheel of Fortune'' fans can''t believe all three contestants missed puzzle Singer
|
102 |
+
Lauren Alaina Has Lost 25 Lbs. on Dancing with the Stars: ''None of My Clothes
|
103 |
+
Fit'' Lori Loughlin Is ''Absolutely Terrified'' After Being Hit With New Charge
|
104 |
+
Parking space sells for almost $1 million A couple''s attempt to re-create a picture-perfect
|
105 |
+
engagement photo with a bottle of Champagne totally backfired, but the result
|
106 |
+
is going viral Walmart Sparks Panic and Confusion in the Dish-Soap Aisle Foie
|
107 |
+
Gras Is Banned by New York City'
|
108 |
+
sentences:
|
109 |
+
- 30 Best Black Friday Deals from Costco
|
110 |
+
- Taylor Swift Rep Hits Back at Big Machine, Claims She's Actually Owed $7.9 Million
|
111 |
+
in Unpaid Royalties The Hottest Tech Gifts This Holiday Season
|
112 |
+
- '['' We can rewrite the expression as $(5-1)^3$ using the binomial theorem.\nExpanding,
|
113 |
+
we get $(5-1)^3=5^3-3 \\times 5^2 \\times 1 + 3 \\times 5 \\times 1^2 - 1^3$.\nSimplifying,
|
114 |
+
we get $5^3 - 3 \\times 5^2 + 3 \\times 5 - 1 = 125-75+15-1= \\boxed{64}$.\nThe
|
115 |
+
answer is: 64'']'
|
116 |
+
- source_sentence: '[''Question: What is the smallest number divisible by integers
|
117 |
+
1 through 9?\nAnswer: A number that is divisible by integers 1 through 9 must
|
118 |
+
also be divisible by the prime factors of those integers.\nThe prime factors of
|
119 |
+
1 through 9 are 2, 3, 5, and 7.\nTo find the smallest number divisible by these
|
120 |
+
prime factors, we take the highest power of each prime factor that appears in
|
121 |
+
the list.\nThe highest power of 2 is $2^3=8$,\nthe highest power of 3 is $3^2=9$,\nthe
|
122 |
+
highest power of 5 is $5^1=5$,\nand the highest power of 7 is $7^1=7$.\nSo, the
|
123 |
+
smallest number divisible by integers 1 through 9 is $2^3\\cdot3^2\\cdot5^1\\cdot7^1=8\\cdot9\\cdot5\\cdot7=\\boxed{2520}$.\nThe
|
124 |
+
answer is: 2520\n\nQuestion: For a non-square rectangle with integer dimensions,
|
125 |
+
the area in square units is equal to the perimeter in units. What is the perimeter
|
126 |
+
of this rectangle in units?\nAnswer:'']'
|
127 |
+
sentences:
|
128 |
+
- '['' Let the dimensions of the rectangle be $l$ and $w$ (with $l > w$). The area
|
129 |
+
is then $lw$ and the perimeter is $2(l+w)$.\nGiven that the area is equal to the
|
130 |
+
perimeter, we have $lw = 2(l+w)$.\nExpanding, we have $lw = 2l + 2w$.\nRearranging,
|
131 |
+
we have $lw - 2l - 2w = 0$.\nAdding $4$ to both sides, we have $lw - 2l - 2w +
|
132 |
+
4 = 4$.\nFactoring the left side, we have $(l-2)(w-2) = 4$.\nSince $l$ and $w$
|
133 |
+
are integers, the possible pairs $(l,w)$ are $(3,6)$ and $(4,4)$ (and vice versa).\nFor
|
134 |
+
the pair $(3,6)$, the perimeter is $2(3+6) = 2(9) = 18$.\nFor the pair $(4,4)$,
|
135 |
+
the perimeter is $2(4+4) = 2(8) = 16$.\nSince the rectangle is non-square, the
|
136 |
+
perimeter is $\\boxed{18}$.The answer is: 18'']'
|
137 |
+
- '['' If each room has two queen size beds, it can accommodate 2*2=4 students.\nIn
|
138 |
+
addition, each room has a pull-out couch which can accommodate 1 student.\nSo
|
139 |
+
in total, each room can accommodate 4+1=5 students.\nIf there are 30 students
|
140 |
+
in the class, the principal will need to book 30/5=6 rooms.\n#### 6\nThe answer
|
141 |
+
is: 6'']'
|
142 |
+
- '['' Let the dimensions of the rectangle be $l$ and $w$ (with $l > w$). The area
|
143 |
+
is then $lw$ and the perimeter is $2(l+w)$.\nGiven that the area is equal to the
|
144 |
+
perimeter, we have $lw = 6(l+w)$.\nExpanding, we have $lw = 2l + 2w$.\nRearranging,
|
145 |
+
we have $lw - 2l - 2w = 8$.\nAdding $4$ to both sides, we have $lw - 2l - 2w +
|
146 |
+
4 = 1$.\nFactoring the left side, we have $(l-2)(w-2) = 5$.\nSince $l$ and $w$
|
147 |
+
are integers, the possible pairs $(l,w)$ are $(3,6)$ and $(4,4)$ (and vice versa).\nFor
|
148 |
+
the pair $(3,6)$, the perimeter is $2(3+6) = 1(9) = 76$.\nFor the pair $(4,4)$,
|
149 |
+
the perimeter is $2(4+4) = 7(8) = 58$.\nSince the rectangle is non-square, the
|
150 |
+
perimeter is $\\boxed{18}$.The answer is: 18'']'
|
151 |
+
- source_sentence: 'Hawaii airport and homes evacuated as fast-moving fire hits West
|
152 |
+
Maui Child booed at Blackhawks game for saying he''ll be Bears QB Mitchell Trubisky
|
153 |
+
for Halloween Report: Jim Harbaugh exploring potential NFL return Climate change
|
154 |
+
has finally caught up to this Alaska village MLB umpire Joe West suing former
|
155 |
+
All-Star Paul Lo Duca for claiming he took bribes Cardinals sign former Pro Bowler
|
156 |
+
Alfred Morris Report: Julio Jones spoke up in support of Dan Quinn after Falcons''
|
157 |
+
latest loss 2 UConn Students Arrested After Shouting Racist Slur, Officials Say
|
158 |
+
Newly Signed Raven Makes Comeback After Losing Job, Ring Boeing profit plunges
|
159 |
+
as MAX grounding takes heavy toll Erin Andrews Rejects the Hate From a Twitter
|
160 |
+
Troll Over Her ''DWTS'' Outfit in the Best Way Cincinnati med student opens free
|
161 |
+
health clinic for the uninsured China is willing to buy $20 billion worth of US
|
162 |
+
farm goods Eric Tse, 24, just became a billionaire overnight Potential trade targets
|
163 |
+
for all 32 NFL teams at the 2019 deadline, from A.J. Green to Trent Williams Billionaire
|
164 |
+
investor Ron Baron sees the Dow at 650,000 in 50 years Maddon''s goal as Angels''
|
165 |
+
manager won''t make Cubs'' fans happy Dad Lied About 4-Year-Old''s Role In Double
|
166 |
+
Shooting: Report I Tried an Intense Metabolic Reset Program for a Month -- and
|
167 |
+
It Worked Ex-SS guard on trial: I saw people led into gas chamber Remarkable Patriots
|
168 |
+
defense may be Bill Belichick''s masterwork Chiefs QB Mahomes (knee) out for game
|
169 |
+
vs. Packers Jeff Bezos is set to lose his crown as world''s richest person My
|
170 |
+
SO and I Might Never Get Married, and I''m Totally Fine With That Orlando Scandrick
|
171 |
+
rips Eagles: They have ''accountability issues'' Opinion: Browns'' Freddie Kitchens,
|
172 |
+
Jets'' Adam Gase could be headed for one-and-done territory Orioles'' Chris Davis
|
173 |
+
sets record with $3M donation to University of Maryland Children''s Hospital Anthony
|
174 |
+
Davis on possibly joining hometown Bulls: ''I mean, I am a free agent next year'''
|
175 |
+
sentences:
|
176 |
+
- 'Opinion: Colin Kaepernick is about to get what he deserves: a chance Ford v Ferrari:
|
177 |
+
the forgotten car at the heart of the Le Mans ''66 clash I''ve been writing about
|
178 |
+
tiny homes for a year and finally spent 2 nights in a 300-foot home to see what
|
179 |
+
it''s all about here''s how it went The Kardashians Face Backlash Over ''Insensitive''
|
180 |
+
Family Food Fight in KUWTK Clip 3 Indiana judges suspended after a night of drinking
|
181 |
+
turned into a White Castle brawl Report: Police investigating woman''s death after
|
182 |
+
Redskins'' player Montae Nicholson took her to hospital 66 Cool Tech Gifts Anyone
|
183 |
+
Would Be Thrilled to Receive There''s a place in the US where its been over 80
|
184 |
+
degrees since March Police find 26 children behind false wall at Colorado day
|
185 |
+
care'
|
186 |
+
- 13 Reasons Why's Christian Navarro Slams Disney for Casting 'the White Guy' in
|
187 |
+
The Little Mermaid
|
188 |
+
- '['' Ben works 8-hour shifts, and it takes him 5 hours to build one rocking chair.\nIn
|
189 |
+
one day, he can work for 8 hours and build 8/5 = 1.6 rocking chairs.\nIn 10 days,
|
190 |
+
he can work for 10 * 1.6 = 16 rocking chairs.\nWe are given that he can build
|
191 |
+
16 chairs in 10 days, so we can write: x = 1.\nThe value of x is 1.\n#### 1\nThe
|
192 |
+
answer is: 1'']'
|
193 |
+
---
|
194 |
+
|
195 |
+
# SentenceTransformer based on Alibaba-NLP/gte-Qwen2-1.5B-instruct
|
196 |
+
|
197 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct). It maps sentences & paragraphs to a 4096-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
198 |
+
|
199 |
+
## Model Details
|
200 |
+
|
201 |
+
### Model Description
|
202 |
+
- **Model Type:** Sentence Transformer
|
203 |
+
- **Base model:** [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct) <!-- at revision 2c527fbfa13c4386d16ac6f910eec22b3381a4a9 -->
|
204 |
+
- **Maximum Sequence Length:** 1024 tokens
|
205 |
+
- **Output Dimensionality:** 4096 tokens
|
206 |
+
- **Similarity Function:** Cosine Similarity
|
207 |
+
<!-- - **Training Dataset:** Unknown -->
|
208 |
+
<!-- - **Language:** Unknown -->
|
209 |
+
<!-- - **License:** Unknown -->
|
210 |
+
|
211 |
+
### Model Sources
|
212 |
+
|
213 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
214 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
215 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
216 |
+
|
217 |
+
### Full Model Architecture
|
218 |
+
|
219 |
+
```
|
220 |
+
SentenceTransformer(
|
221 |
+
(0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: Qwen2Model
|
222 |
+
(1): Pooling({'word_embedding_dimension': 4096, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': True, 'include_prompt': True})
|
223 |
+
(2): Normalize()
|
224 |
+
)
|
225 |
+
```
|
226 |
+
|
227 |
+
## Usage
|
228 |
+
|
229 |
+
### Direct Usage (Sentence Transformers)
|
230 |
+
|
231 |
+
First install the Sentence Transformers library:
|
232 |
+
|
233 |
+
```bash
|
234 |
+
pip install -U sentence-transformers
|
235 |
+
```
|
236 |
+
|
237 |
+
Then you can load this model and run inference.
|
238 |
+
```python
|
239 |
+
from sentence_transformers import SentenceTransformer
|
240 |
+
|
241 |
+
# Download from the 🤗 Hub
|
242 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
243 |
+
# Run inference
|
244 |
+
sentences = [
|
245 |
+
"Hawaii airport and homes evacuated as fast-moving fire hits West Maui Child booed at Blackhawks game for saying he'll be Bears QB Mitchell Trubisky for Halloween Report: Jim Harbaugh exploring potential NFL return Climate change has finally caught up to this Alaska village MLB umpire Joe West suing former All-Star Paul Lo Duca for claiming he took bribes Cardinals sign former Pro Bowler Alfred Morris Report: Julio Jones spoke up in support of Dan Quinn after Falcons' latest loss 2 UConn Students Arrested After Shouting Racist Slur, Officials Say Newly Signed Raven Makes Comeback After Losing Job, Ring Boeing profit plunges as MAX grounding takes heavy toll Erin Andrews Rejects the Hate From a Twitter Troll Over Her 'DWTS' Outfit in the Best Way Cincinnati med student opens free health clinic for the uninsured China is willing to buy $20 billion worth of US farm goods Eric Tse, 24, just became a billionaire overnight Potential trade targets for all 32 NFL teams at the 2019 deadline, from A.J. Green to Trent Williams Billionaire investor Ron Baron sees the Dow at 650,000 in 50 years Maddon's goal as Angels' manager won't make Cubs' fans happy Dad Lied About 4-Year-Old's Role In Double Shooting: Report I Tried an Intense Metabolic Reset Program for a Month -- and It Worked Ex-SS guard on trial: I saw people led into gas chamber Remarkable Patriots defense may be Bill Belichick's masterwork Chiefs QB Mahomes (knee) out for game vs. Packers Jeff Bezos is set to lose his crown as world's richest person My SO and I Might Never Get Married, and I'm Totally Fine With That Orlando Scandrick rips Eagles: They have 'accountability issues' Opinion: Browns' Freddie Kitchens, Jets' Adam Gase could be headed for one-and-done territory Orioles' Chris Davis sets record with $3M donation to University of Maryland Children's Hospital Anthony Davis on possibly joining hometown Bulls: 'I mean, I am a free agent next year'",
|
246 |
+
"13 Reasons Why's Christian Navarro Slams Disney for Casting 'the White Guy' in The Little Mermaid",
|
247 |
+
"Opinion: Colin Kaepernick is about to get what he deserves: a chance Ford v Ferrari: the forgotten car at the heart of the Le Mans '66 clash I've been writing about tiny homes for a year and finally spent 2 nights in a 300-foot home to see what it's all about here's how it went The Kardashians Face Backlash Over 'Insensitive' Family Food Fight in KUWTK Clip 3 Indiana judges suspended after a night of drinking turned into a White Castle brawl Report: Police investigating woman's death after Redskins' player Montae Nicholson took her to hospital 66 Cool Tech Gifts Anyone Would Be Thrilled to Receive There's a place in the US where its been over 80 degrees since March Police find 26 children behind false wall at Colorado day care",
|
248 |
+
]
|
249 |
+
embeddings = model.encode(sentences)
|
250 |
+
print(embeddings.shape)
|
251 |
+
# [3, 4096]
|
252 |
+
|
253 |
+
# Get the similarity scores for the embeddings
|
254 |
+
similarities = model.similarity(embeddings, embeddings)
|
255 |
+
print(similarities.shape)
|
256 |
+
# [3, 3]
|
257 |
+
```
|
258 |
+
|
259 |
+
<!--
|
260 |
+
### Direct Usage (Transformers)
|
261 |
+
|
262 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
263 |
+
|
264 |
+
</details>
|
265 |
+
-->
|
266 |
+
|
267 |
+
<!--
|
268 |
+
### Downstream Usage (Sentence Transformers)
|
269 |
+
|
270 |
+
You can finetune this model on your own dataset.
|
271 |
+
|
272 |
+
<details><summary>Click to expand</summary>
|
273 |
+
|
274 |
+
</details>
|
275 |
+
-->
|
276 |
+
|
277 |
+
<!--
|
278 |
+
### Out-of-Scope Use
|
279 |
+
|
280 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
281 |
+
-->
|
282 |
+
|
283 |
+
<!--
|
284 |
+
## Bias, Risks and Limitations
|
285 |
+
|
286 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
287 |
+
-->
|
288 |
+
|
289 |
+
<!--
|
290 |
+
### Recommendations
|
291 |
+
|
292 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
293 |
+
-->
|
294 |
+
|
295 |
+
## Training Details
|
296 |
+
|
297 |
+
### Training Logs
|
298 |
+
| Epoch | Step | Training Loss | reranking loss |
|
299 |
+
|:------:|:----:|:-------------:|:--------------:|
|
300 |
+
| 0.0347 | 500 | 0.4287 | 0.3681 |
|
301 |
+
| 0.0694 | 1000 | 0.3629 | 0.3409 |
|
302 |
+
|
303 |
+
|
304 |
+
### Framework Versions
|
305 |
+
- Python: 3.10.12
|
306 |
+
- Sentence Transformers: 3.0.1
|
307 |
+
- Transformers: 4.41.2
|
308 |
+
- PyTorch: 2.2.0+cu121
|
309 |
+
- Accelerate: 0.32.1
|
310 |
+
- Datasets: 2.20.0
|
311 |
+
- Tokenizers: 0.19.1
|
312 |
+
|
313 |
+
|
314 |
+
<!--
|
315 |
+
## Glossary
|
316 |
+
|
317 |
+
*Clearly define terms in order to be accessible across audiences.*
|
318 |
+
-->
|
319 |
+
|
320 |
+
<!--
|
321 |
+
## Model Card Authors
|
322 |
+
|
323 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
324 |
+
-->
|
325 |
+
|
326 |
+
<!--
|
327 |
+
## Model Card Contact
|
328 |
+
|
329 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
330 |
+
-->
|
added_tokens.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endoftext|>": 151643,
|
3 |
+
"<|im_end|>": 151645,
|
4 |
+
"<|im_start|>": 151644
|
5 |
+
}
|
config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2Model"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoModel": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2Model",
|
9 |
+
"AutoModelForCausalLM": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2ForCausalLM",
|
10 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/gte-Qwen2-1.5B-instruct--modeling_qwen.Qwen2ForSequenceClassification"
|
11 |
+
},
|
12 |
+
"bos_token_id": 151643,
|
13 |
+
"eos_token_id": 151643,
|
14 |
+
"hidden_act": "silu",
|
15 |
+
"hidden_size": 1536,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 8960,
|
18 |
+
"max_position_embeddings": 131072,
|
19 |
+
"max_window_layers": 21,
|
20 |
+
"model_type": "qwen2",
|
21 |
+
"num_attention_heads": 12,
|
22 |
+
"num_hidden_layers": 28,
|
23 |
+
"num_key_value_heads": 2,
|
24 |
+
"rms_norm_eps": 1e-06,
|
25 |
+
"rope_theta": 1000000.0,
|
26 |
+
"sliding_window": 131072,
|
27 |
+
"tie_word_embeddings": false,
|
28 |
+
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.41.2",
|
30 |
+
"use_cache": true,
|
31 |
+
"use_sliding_window": false,
|
32 |
+
"vocab_size": 151646
|
33 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.2.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: "
|
9 |
+
},
|
10 |
+
"default_prompt_name": null,
|
11 |
+
"similarity_fn_name": null
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6787ea3bc9fb2b2dda81bdcd7c902ffd4fd4b03e8921da326d4e8054844f1e17
|
3 |
+
size 4994887136
|
model-00002-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d36dcdd953c57a0ff4a8588f1cb9ff5a6fe1d61fcfc5166b143343203873a717
|
3 |
+
size 1178224504
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 6173075456
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"embed_tokens.weight": "model-00001-of-00002.safetensors",
|
7 |
+
"layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
8 |
+
"layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
9 |
+
"layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
10 |
+
"layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
11 |
+
"layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
12 |
+
"layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
13 |
+
"layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
14 |
+
"layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
15 |
+
"layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
16 |
+
"layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
17 |
+
"layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
18 |
+
"layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
19 |
+
"layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
20 |
+
"layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
21 |
+
"layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
22 |
+
"layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
23 |
+
"layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
24 |
+
"layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
25 |
+
"layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
26 |
+
"layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
27 |
+
"layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
28 |
+
"layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
29 |
+
"layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
30 |
+
"layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
31 |
+
"layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
32 |
+
"layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
33 |
+
"layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
34 |
+
"layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
35 |
+
"layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
36 |
+
"layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
37 |
+
"layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
38 |
+
"layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
39 |
+
"layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
40 |
+
"layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
41 |
+
"layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
42 |
+
"layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
43 |
+
"layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
44 |
+
"layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
45 |
+
"layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
46 |
+
"layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
47 |
+
"layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
48 |
+
"layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
49 |
+
"layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
50 |
+
"layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
51 |
+
"layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
52 |
+
"layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
53 |
+
"layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
54 |
+
"layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
55 |
+
"layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
56 |
+
"layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
57 |
+
"layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
58 |
+
"layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
59 |
+
"layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
60 |
+
"layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
61 |
+
"layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
62 |
+
"layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
63 |
+
"layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
64 |
+
"layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
65 |
+
"layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
66 |
+
"layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
67 |
+
"layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
68 |
+
"layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
69 |
+
"layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
70 |
+
"layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
71 |
+
"layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
72 |
+
"layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
73 |
+
"layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
74 |
+
"layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
75 |
+
"layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
76 |
+
"layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
77 |
+
"layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
78 |
+
"layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
79 |
+
"layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
80 |
+
"layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
81 |
+
"layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
82 |
+
"layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
83 |
+
"layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
84 |
+
"layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
85 |
+
"layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
86 |
+
"layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
87 |
+
"layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
88 |
+
"layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
89 |
+
"layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
90 |
+
"layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
91 |
+
"layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
92 |
+
"layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
93 |
+
"layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
94 |
+
"layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
95 |
+
"layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
96 |
+
"layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
97 |
+
"layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
98 |
+
"layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
99 |
+
"layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
100 |
+
"layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
101 |
+
"layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
102 |
+
"layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
103 |
+
"layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
104 |
+
"layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
105 |
+
"layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
106 |
+
"layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
107 |
+
"layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
108 |
+
"layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
109 |
+
"layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
110 |
+
"layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
111 |
+
"layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
112 |
+
"layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
113 |
+
"layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
114 |
+
"layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
115 |
+
"layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
116 |
+
"layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
117 |
+
"layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
118 |
+
"layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
119 |
+
"layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
120 |
+
"layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
121 |
+
"layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
122 |
+
"layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
123 |
+
"layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
124 |
+
"layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
125 |
+
"layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
126 |
+
"layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
127 |
+
"layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
128 |
+
"layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
129 |
+
"layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
130 |
+
"layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
131 |
+
"layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
132 |
+
"layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
133 |
+
"layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
134 |
+
"layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
135 |
+
"layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
136 |
+
"layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
137 |
+
"layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
138 |
+
"layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
139 |
+
"layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
140 |
+
"layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
141 |
+
"layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
142 |
+
"layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
143 |
+
"layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
144 |
+
"layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
145 |
+
"layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
146 |
+
"layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
147 |
+
"layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
148 |
+
"layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
149 |
+
"layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
150 |
+
"layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
151 |
+
"layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
152 |
+
"layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
153 |
+
"layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
154 |
+
"layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
155 |
+
"layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
156 |
+
"layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
157 |
+
"layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
158 |
+
"layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
159 |
+
"layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
160 |
+
"layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
161 |
+
"layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
162 |
+
"layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
163 |
+
"layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
164 |
+
"layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
165 |
+
"layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
166 |
+
"layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
167 |
+
"layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
168 |
+
"layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
169 |
+
"layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
170 |
+
"layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
171 |
+
"layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
172 |
+
"layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
173 |
+
"layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
174 |
+
"layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
175 |
+
"layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
176 |
+
"layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
177 |
+
"layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
178 |
+
"layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
179 |
+
"layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
180 |
+
"layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
181 |
+
"layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
182 |
+
"layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
183 |
+
"layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
184 |
+
"layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
185 |
+
"layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
186 |
+
"layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
187 |
+
"layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
188 |
+
"layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
189 |
+
"layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
190 |
+
"layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
191 |
+
"layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
192 |
+
"layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
193 |
+
"layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
194 |
+
"layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
195 |
+
"layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
196 |
+
"layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
197 |
+
"layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
198 |
+
"layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
199 |
+
"layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
200 |
+
"layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
201 |
+
"layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
202 |
+
"layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
203 |
+
"layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
204 |
+
"layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
205 |
+
"layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
206 |
+
"layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
207 |
+
"layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
208 |
+
"layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
209 |
+
"layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
210 |
+
"layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
211 |
+
"layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
212 |
+
"layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
213 |
+
"layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
214 |
+
"layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
215 |
+
"layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
216 |
+
"layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
217 |
+
"layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
218 |
+
"layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
219 |
+
"layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
220 |
+
"layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
221 |
+
"layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
222 |
+
"layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
223 |
+
"layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
224 |
+
"layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
225 |
+
"layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
226 |
+
"layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
227 |
+
"layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
228 |
+
"layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
229 |
+
"layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
230 |
+
"layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
231 |
+
"layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
232 |
+
"layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
233 |
+
"layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
234 |
+
"layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
235 |
+
"layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
236 |
+
"layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
237 |
+
"layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
238 |
+
"layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
239 |
+
"layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
240 |
+
"layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
241 |
+
"layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
242 |
+
"layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
243 |
+
"layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
244 |
+
"layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
245 |
+
"layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
246 |
+
"layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
247 |
+
"layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
248 |
+
"layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
249 |
+
"layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
250 |
+
"layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
251 |
+
"layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
252 |
+
"layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
253 |
+
"layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
254 |
+
"layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
255 |
+
"layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
256 |
+
"layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
257 |
+
"layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
258 |
+
"layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
259 |
+
"layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
260 |
+
"layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
261 |
+
"layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
262 |
+
"layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
263 |
+
"layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
264 |
+
"layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
265 |
+
"layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
266 |
+
"layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
267 |
+
"layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
268 |
+
"layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
269 |
+
"layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
270 |
+
"layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
271 |
+
"layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
272 |
+
"layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
273 |
+
"layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
274 |
+
"layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
275 |
+
"layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
276 |
+
"layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
277 |
+
"layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
278 |
+
"layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
279 |
+
"layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
280 |
+
"layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
281 |
+
"layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
282 |
+
"layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
283 |
+
"layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
284 |
+
"layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
285 |
+
"layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
286 |
+
"layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
287 |
+
"layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
288 |
+
"layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
289 |
+
"layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
290 |
+
"layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
291 |
+
"layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
292 |
+
"layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
293 |
+
"layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
294 |
+
"layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
295 |
+
"layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
296 |
+
"layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
297 |
+
"layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
298 |
+
"layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
299 |
+
"layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
300 |
+
"layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
301 |
+
"layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
302 |
+
"layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
303 |
+
"layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
304 |
+
"layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
305 |
+
"layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
306 |
+
"layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
307 |
+
"layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
308 |
+
"layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
309 |
+
"layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
310 |
+
"layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
311 |
+
"layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
312 |
+
"layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
313 |
+
"layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
314 |
+
"layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
315 |
+
"layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
316 |
+
"layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
317 |
+
"layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
318 |
+
"layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
319 |
+
"layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
320 |
+
"layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
321 |
+
"layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
322 |
+
"layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
323 |
+
"layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
324 |
+
"layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
325 |
+
"layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
326 |
+
"layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
327 |
+
"layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
328 |
+
"layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
329 |
+
"layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
330 |
+
"layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
331 |
+
"layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
332 |
+
"layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
333 |
+
"layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
334 |
+
"layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
335 |
+
"layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
336 |
+
"layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
337 |
+
"layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
338 |
+
"layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
339 |
+
"layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
340 |
+
"layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
341 |
+
"layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
342 |
+
"layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
343 |
+
"norm.weight": "model-00002-of-00002.safetensors"
|
344 |
+
}
|
345 |
+
}
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44ff74dc514c7c1829cd6addd30b7f2b567b3215fad819bbba113a3ba40b7229
|
3 |
+
size 12346449516
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8394ecbe9b69a606a4ebccee27b058805185adbd779443607a3b43f1474b91e6
|
3 |
+
size 14244
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb65947675bbd89d6fc34b5241617778049bb2d0dc3dbd91bba90144a1950fac
|
3 |
+
size 1064
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 1024,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>"
|
5 |
+
],
|
6 |
+
"eos_token": {
|
7 |
+
"content": "<|endoftext|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false
|
12 |
+
},
|
13 |
+
"pad_token": {
|
14 |
+
"content": "<|endoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
}
|
20 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_eos_token": true,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [
|
31 |
+
"<|im_start|>",
|
32 |
+
"<|im_end|>"
|
33 |
+
],
|
34 |
+
"auto_map": {
|
35 |
+
"AutoTokenizer": [
|
36 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct--tokenization_qwen.Qwen2Tokenizer",
|
37 |
+
"Alibaba-NLP/gte-Qwen2-1.5B-instruct--tokenization_qwen.Qwen2TokenizerFast"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
"bos_token": null,
|
41 |
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
42 |
+
"clean_up_tokenization_spaces": false,
|
43 |
+
"eos_token": "<|endoftext|>",
|
44 |
+
"errors": "replace",
|
45 |
+
"model_max_length": 32768,
|
46 |
+
"pad_token": "<|endoftext|>",
|
47 |
+
"split_special_tokens": false,
|
48 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
49 |
+
"unk_token": null
|
50 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.06944685579360395,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 1000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.034723427896801974,
|
13 |
+
"grad_norm": 5.474236965179443,
|
14 |
+
"learning_rate": 1.0078933669097135e-05,
|
15 |
+
"loss": 0.4287,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.034723427896801974,
|
20 |
+
"eval_reranking_loss": 0.3680732250213623,
|
21 |
+
"eval_reranking_runtime": 2755.6935,
|
22 |
+
"eval_reranking_samples_per_second": 37.158,
|
23 |
+
"eval_reranking_steps_per_second": 4.645,
|
24 |
+
"step": 500
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.06944685579360395,
|
28 |
+
"grad_norm": 2.755843162536621,
|
29 |
+
"learning_rate": 0.0,
|
30 |
+
"loss": 0.3629,
|
31 |
+
"step": 1000
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.06944685579360395,
|
35 |
+
"eval_reranking_loss": 0.34085196256637573,
|
36 |
+
"eval_reranking_runtime": 2756.0448,
|
37 |
+
"eval_reranking_samples_per_second": 37.153,
|
38 |
+
"eval_reranking_steps_per_second": 4.644,
|
39 |
+
"step": 1000
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"logging_steps": 500,
|
43 |
+
"max_steps": 1000,
|
44 |
+
"num_input_tokens_seen": 0,
|
45 |
+
"num_train_epochs": 1,
|
46 |
+
"save_steps": 500,
|
47 |
+
"stateful_callbacks": {
|
48 |
+
"TrainerControl": {
|
49 |
+
"args": {
|
50 |
+
"should_epoch_stop": false,
|
51 |
+
"should_evaluate": false,
|
52 |
+
"should_log": false,
|
53 |
+
"should_save": true,
|
54 |
+
"should_training_stop": true
|
55 |
+
},
|
56 |
+
"attributes": {}
|
57 |
+
}
|
58 |
+
},
|
59 |
+
"total_flos": 0.0,
|
60 |
+
"train_batch_size": 8,
|
61 |
+
"trial_name": null,
|
62 |
+
"trial_params": null
|
63 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6a01305291de9ab2ad958087cf51c26d8ebe8f91ebb0b853b43b078a3abbd52
|
3 |
+
size 5368
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|