knguyennguyen
commited on
Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +499 -0
- config.json +28 -0
- config_sentence_transformers.json +10 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.json +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:4693
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: sentence-transformers/all-distilroberta-v1
|
10 |
+
widget:
|
11 |
+
- source_sentence: I'm looking for a protective summer accessory for my little girl
|
12 |
+
that has a wide brim to shield her from the sun. It should be adjustable for a
|
13 |
+
perfect fit and made from breathable, soft material. A variety of colors would
|
14 |
+
be great too!
|
15 |
+
sentences:
|
16 |
+
- "Title: Goodthreads Men's Lightweight French Terry Crewneck Sweatshirt Descripion:\
|
17 |
+
\ ['An Amazon brand - This lightweight French terry cotton crewneck sweatshirt\
|
18 |
+
\ is a go-to layer for workouts and lounging'\n \"Goodthreads' collection of men's\
|
19 |
+
\ clothing crafted with care takes wear-everywhere apparel to the next level.\
|
20 |
+
\ Create can't-miss pairings with long- and short-sleeve button-down shirts in\
|
21 |
+
\ standard and slim fits, plus chino pants and shorts made from wrinkle-free non-iron\
|
22 |
+
\ fabric. With these classics-and T-shirts, polo shirts, and outerwear to round\
|
23 |
+
\ out your look-Goodthreads is your go-to for wardrobe staples with the style\
|
24 |
+
\ you want.\"]"
|
25 |
+
- "Title: Baby Sun Hat UPF 50+ for Girls Wide Brim Toddler Sun Hat Adjustable Summer\
|
26 |
+
\ Hat Descripion: ['Features' \"High sun protection factor to protect children's\
|
27 |
+
\ safety\"\n 'The material is safe and will not cause any harm to children'\n\
|
28 |
+
\ 'Convenient to carry, you can easily tuck the hat into the bag'\n 'Good breathability,\
|
29 |
+
\ prevent the child from sultry, maintain a refreshing feeling'\n 'Size Adjustment'\n\
|
30 |
+
\ \"Features a cord tie to help the hat fit perfectly with each child. You can\
|
31 |
+
\ adjust the size of the hat to make sure it doesn't come off.\"\n 'multicolor'\n\
|
32 |
+
\ 'We have prepared variety different colors for the baby sun hat, comfortable\
|
33 |
+
\ and soft material, a must-have protective tool for the baby to go out'\n 'Note:'\n\
|
34 |
+
\ '1.If you have any doubts, please contact customer service for suggestions;'\n\
|
35 |
+
\ '2.Colors may appear slightly different via website due to computer picture\
|
36 |
+
\ resolution and monitor settings;'\n '3.There is 0.79\"-1.18\" difference according\
|
37 |
+
\ to manual measurement.']"
|
38 |
+
- 'Title: Uber Eats Washed Adjustable Baseball Cap Peaked Cap Dad Hat Unisex Breathable
|
39 |
+
Quick-Drying Top Hat Baseball Cap Men''s Hat… Black Descripion: [''Men Women Cotton
|
40 |
+
Adjustable Washed Twill Baseball Cap Hat.A Personalized Cowboy Cap Is Great Fun
|
41 |
+
And A Perfect Gift For Yourself/Friends/Lover.'']'
|
42 |
+
- source_sentence: I'm looking for a cute summer outfit for a little girl that includes
|
43 |
+
a stylish top and matching pants. It should be comfortable and perfect for warm
|
44 |
+
weather, ideally with a playful design.
|
45 |
+
sentences:
|
46 |
+
- "Title: Toddler Baby Girl Clothes Baby Girl Crop Top + Leopard Flare Long Pants 2PCS Little\
|
47 |
+
\ Girl Summer Outfits Sets Descripion: ['Size\\xa0Chart'\n 'Size\\xa090:\\xa0Top\\\
|
48 |
+
xa0Length\\xa010.6inch,Bust\\xa0*11.8inch,Pants Length\\xa018.9inch,\\xa0For\\\
|
49 |
+
xa0Age\\xa02-3\\xa0T.'\n 'Size\\xa0100:\\xa0Top\\xa0Length\\xa011.4inch,Bust\\\
|
50 |
+
xa0*12.2inch,Pants Length\\xa020.4inch, For\\xa0Age\\xa03-4\\xa0T.'\n 'Size\\\
|
51 |
+
xa0110:\\xa0Top\\xa0Length\\xa012.2inch,Bust\\xa0*12.6inch,Pants Length\\xa022.0inch,\
|
52 |
+
\ For\\xa0Age\\xa04-5\\xa0T.'\n 'Size\\xa0120:\\xa0Top\\xa0Length\\xa013.0inch,Bust\\\
|
53 |
+
xa0*13.0inch,Pants Length\\xa023.6inch, For\\xa0Age\\xa05-6\\xa0T.'\n 'Customer\\\
|
54 |
+
xa0Service'\n 'If\\xa0you\\xa0encounter\\xa0any\\xa0problems,\\xa0please\\xa0contact\\\
|
55 |
+
xa0us,\\xa0we\\xa0will\\xa0actively\\xa0deal\\xa0with\\xa0your\\xa0problem\\xa0within\\\
|
56 |
+
xa024\\xa0hours'\n 'Wish\\xa0your\\xa0baby\\xa0a\\xa0happy\\xa0and\\xa0healthy\\\
|
57 |
+
xa0growth!\\xa0!\\xa0!']"
|
58 |
+
- 'Title: Dad EST Keychain Gifts for First Time Dad, New Daddy Soon Promoted To
|
59 |
+
Be Pregnancy Gift for Men Fathers Day Dad Birthday Descripion: [''Dad EST Keychain
|
60 |
+
Gifts for First Time Dad, New Daddy Soon Promoted To Be Pregnancy Gift for Men
|
61 |
+
Fathers Day Dad Birthday Present'']'
|
62 |
+
- "Title: Goodthreads Men's Lightweight French Terry Crewneck Sweatshirt Descripion:\
|
63 |
+
\ ['An Amazon brand - This lightweight French terry cotton crewneck sweatshirt\
|
64 |
+
\ is a go-to layer for workouts and lounging'\n \"Goodthreads' collection of men's\
|
65 |
+
\ clothing crafted with care takes wear-everywhere apparel to the next level.\
|
66 |
+
\ Create can't-miss pairings with long- and short-sleeve button-down shirts in\
|
67 |
+
\ standard and slim fits, plus chino pants and shorts made from wrinkle-free non-iron\
|
68 |
+
\ fabric. With these classics-and T-shirts, polo shirts, and outerwear to round\
|
69 |
+
\ out your look-Goodthreads is your go-to for wardrobe staples with the style\
|
70 |
+
\ you want.\"]"
|
71 |
+
- source_sentence: I'm looking for a comprehensive kit that can help maintain the
|
72 |
+
sparkle and shine of my precious jewelry, particularly for items made of gold
|
73 |
+
and diamonds. It should be easy to use and include various tools for cleaning
|
74 |
+
and polishing, making it suitable for regular upkeep and travel.
|
75 |
+
sentences:
|
76 |
+
- "Title: Connoisseurs Jewelry Cleaning Kit for Gold, Platinum, Diamonds & Precious\
|
77 |
+
\ Stones. Includes Dip-in Solution, Polishing Cloth for Gold and Diamond Dazzle\
|
78 |
+
\ Stik for mounts and settings. Bundle of 3 items. Descripion: ['Daily wear can\
|
79 |
+
\ take a toll and cause your jewelry to look dull and opaque, that’s why we have\
|
80 |
+
\ one of the most trustworthy and convenient jewelry cleaning kits available to\
|
81 |
+
\ make your pieces shine for a long time.'\n 'Includes:' '1 x Fine Jewelry Cleaner'\n\
|
82 |
+
\ 'Advanced cleaning formula that reduces the appearance of tiny scratches caused\
|
83 |
+
\ by regular use. Brings shine to gold as well as platinum, diamonds and other\
|
84 |
+
\ precious stones. Includes a dipping tray. Ammonia-Free.'\n '1 x UltraSoft Jewelry\
|
85 |
+
\ Polishing Cloth for Gold'\n 'Easy 2-step cleaning and polishing system that\
|
86 |
+
\ removes tarnish and helps prevent further buildup by creating an anti-tarnish\
|
87 |
+
\ shield. This cloth leaves your pieces soft and shiny. Also great for polishing\
|
88 |
+
\ watches. Made with 100% renewable cotton fiber.'\n '1 x Diamond Dazzle Stik'\n\
|
89 |
+
\ 'The Stik is a cleaning pen that delivers a micro-fine cleansing and polishing\
|
90 |
+
\ solution to an anti-scratch brush. The brush was designed to clean mountings\
|
91 |
+
\ and settings. Turns dull and dirty diamonds to sparkling diamonds. Regular use\
|
92 |
+
\ reduces the appearance of tiny scratches in settings. Safe to use on Diamonds\
|
93 |
+
\ and precious and semi-precious gems, including platinum and gold settings. Cosmetic\
|
94 |
+
\ pen-sized. Great for travel. Do not use on Pearls.'\n 'Directions:'\n 'Use the\
|
95 |
+
\ Fine Jewelry Cleaner to dip in your most tarnished jewelry for 30 seconds. Remove\
|
96 |
+
\ using the included tray and rinse with warm water. Dry with a lint-free cloth.\
|
97 |
+
\ Once dry, buff your jewelry with the included cloth. First, gently rub your\
|
98 |
+
\ jewelry with the lighter colored side of the cloth to remove any remaining dirt\
|
99 |
+
\ and tarnish. Second, use the darker side of the cloth to buff and polish to\
|
100 |
+
\ shine. The cloth can be used on gold and platinum. Use the Diamond Dazzle Stik\
|
101 |
+
\ for detailing and cleaning behind stones and other mounting areas. Do not use\
|
102 |
+
\ the Stik on pearls.']"
|
103 |
+
- 'Title: Grip Boost Boys Yellow Peace Youth Football Gloves Pro Elite - Youth Kids
|
104 |
+
Sizes - $44.95 Youth Large Descripion: [''IMPROVED GRIP: With our proprietary
|
105 |
+
formula, Grip Boost youth football glove grip is made from crab shells. This unique
|
106 |
+
layer is made with advanced suspended polymerization technology that provides
|
107 |
+
the #1 grip in football performance gear. PEACE: Patented palm design is used
|
108 |
+
to show your Cheetah speed on the field. ADJUSTABLE FOR PERFECT FIT: The back-of-hand
|
109 |
+
has a unique compression material that is designed to conform to the contour of
|
110 |
+
your hand, with a large gusset to provide a custom fit football glove. Grip Boost
|
111 |
+
Stealth Pro Elite Football Gloves fit like a second skin. ALL WEATHER: Grip Boost
|
112 |
+
Football Gloves is specially formulated to react and stick in any weather. Play
|
113 |
+
like the pros, rain-or-shine. These football receiver gloves mens and youth are
|
114 |
+
perfect for bringing in a clean catch without fumble. COMFORTABLE FEEL: Keep control
|
115 |
+
with precision thanks to the thin, barely-there feel material with grip and support
|
116 |
+
when you need it most. Moisture-wicking and ventilation prevent chafing and reduce
|
117 |
+
distraction. These receiver gloves offer optimal function thats fit for the pros,
|
118 |
+
designed by the pros, yet perfect for every player. Grip Boost Stealth Pro Elite
|
119 |
+
Solid Color youth kids football gloves is a product that you can rely on for seasons
|
120 |
+
to come. Designed to enhance overall performance, Grip Boost Youth football gloves
|
121 |
+
provide the #1 Grip In Footballtechnology in the game. Â\xa0A formula conceptualized
|
122 |
+
through a blend of passion for both performance and science, these football gloves
|
123 |
+
provide the best grip for professional athletes, youth football gloves, and high
|
124 |
+
school football players.Â'']'
|
125 |
+
- 'Title: Kids Full-Coverage Tube FaceMask Bandanas, UV Protection Neck Gaiter Headband,
|
126 |
+
boy and girl breathable elastic FaceMask dustproof towel neck leggings multifunctional
|
127 |
+
headband boy and girl balaclava Descripion: [''100% Microfiber Polyester with
|
128 |
+
high UV protection,Highly stretchable and Seamless keep you comfortable in any
|
129 |
+
position,They are soft,breathable fabric. LIGHTWEIGHT & BREATHABLE - The moisture
|
130 |
+
wicking fabric of our face covers transfer heat and humidity away from your body
|
131 |
+
and outside of the mouth cover, keeping you cool. The quick dry fabric technology
|
132 |
+
lets your facemask bandana dry in minutes, instead of hours. Wear it as a face
|
133 |
+
facemask to protect your face from getting burned from the sun! STRONG PROTECTION:These
|
134 |
+
neck gaiter protects you from the harmful elements, like dust, dirt, sand, UV,
|
135 |
+
When you are running or riding in outdoor, these neck gaiters are a need to wear
|
136 |
+
and they can help keep your nose and mouth clean PERFECT ACCESSORY - These festival
|
137 |
+
rave facemask are great for dusty music festivals, concerts, biking, raves, and
|
138 |
+
outdoor sports like paintball, hiking, yoga, exercising, yard work, riding or
|
139 |
+
whenever you want to live dust free! ONE SIZE FITS MOST - These facemask bandanas
|
140 |
+
feature a super stretchy fabric that accommodates most head sizes (men, women
|
141 |
+
& kids).'']'
|
142 |
+
- source_sentence: I'm looking for a charming accessory that can be a thoughtful present.
|
143 |
+
It should feature a unique design and be durable enough to withstand occasional
|
144 |
+
exposure to water.
|
145 |
+
sentences:
|
146 |
+
- 'Title: Tokyo Ghoul Costume Woman Men Trucker Jacket Classic Ripped Slim Denim
|
147 |
+
Jacket with Holes Kaneki Ken Hooded Jacket Cosplay Descripion: ["Women''s Ripped
|
148 |
+
Distressed Casual Denim Jacket Men''s Classic Trucker Jacket Women''s Stretch
|
149 |
+
Denim Jacket Anime men and women couple denim clothes Cartoon character print
|
150 |
+
zipper, pocket, fashion coat 1piece set: jacket With or without liner: No liner
|
151 |
+
Sleeve length: long sleeve Whether Hooded: Hooded Thickness: General Error range:
|
152 |
+
2-3CM Suitable for the season: four seasons Main fabric composition:polyester
|
153 |
+
Pattern: cartoon Style details: printing Edition type: Loose Process: hot stamping
|
154 |
+
About size: (S): Suitable for height 57-59 inches,bust 44 inches (M): Suitable
|
155 |
+
for height 59-63 inches,bust 45inches (L): Suitable for height 63-65 inches,bust
|
156 |
+
47 inches (XL): Suitable for height 65-67inches,bust 48.5 inches (2XL): Suitable
|
157 |
+
for height 67-68 inches,bust 50 inches (3XL): Suitable for height 68-70 inches,bust
|
158 |
+
51 inches"]'
|
159 |
+
- 'Title: CD Projekt Red Samurai Cyberpunk 2077 Logo T-Shirt for Men - Official
|
160 |
+
Apparel Inspired by The Hit Video Game Descripion: ["Wash inside out in cold water
|
161 |
+
with similar colors, don''t use a dryer, don''t iron decorative parts. Tee comes
|
162 |
+
with its clothing tag and yellow collar. THICKNESS 170 GSM"]'
|
163 |
+
- 'Title: Star Singer Double Sided Bezel Pendant Key Chain for Gift Descripion:
|
164 |
+
[''This pendant is water resistant but not waterproof. The key chain is very beautiful
|
165 |
+
and can be used as a gift.'']'
|
166 |
+
- source_sentence: I'm looking for a versatile and comfortable outerwear option that
|
167 |
+
can provide warmth and protection from the wind. It should be suitable for both
|
168 |
+
adults and kids, easy to clean, and made from a soft and breathable material.
|
169 |
+
sentences:
|
170 |
+
- 'Title: Cover Custom Western Texas Stars Washable and Reusable Warm Windproof
|
171 |
+
for Women Men Boys Girls Kids Descripion: [''Our Products Are Made Of High-Quality
|
172 |
+
100% Polyester Fiber, Which Is Very Soft, Breathable And Washable.'']'
|
173 |
+
- "Title: Goodthreads Men's Soft Cotton Long-Sleeve Pullover Hoodie T-Shirt Descripion:\
|
174 |
+
\ ['An Amazon brand - This pullover hoodie in soft cotton features a self-tie\
|
175 |
+
\ drawstring and a kangaroo pocket. Made in our Signature Tumbled Cotton for a\
|
176 |
+
\ soft, yet sturdy, hand. We utilize a unique Heritage Wash to give our garments\
|
177 |
+
\ a custom, lived-in feel right away'\n \"Goodthreads' collection of men's clothing\
|
178 |
+
\ crafted with care takes wear-everywhere apparel to the next level. Create can't-miss\
|
179 |
+
\ pairings with long- and short-sleeve button-down shirts in standard and slim\
|
180 |
+
\ fits, plus chino pants and shorts made from wrinkle-free non-iron fabric. With\
|
181 |
+
\ these classics-and T-shirts, polo shirts, and outerwear to round out your look-Goodthreads\
|
182 |
+
\ is your go-to for wardrobe staples with the style you want.\"]"
|
183 |
+
- 'Title: Sicilia Mini Boxing Gloves - Perfect To Hang in Car Mirror | Sicily Flag
|
184 |
+
Gloves Descripion: [''Made with pride from high-quality materials, these tiny
|
185 |
+
gloves sporting the Sicilian flag are the ultimate invitation to celebrate your
|
186 |
+
heritage. Hang them from your rear view mirror, or on your new vintage backpack
|
187 |
+
for a celebratory impact of attention.'']'
|
188 |
+
pipeline_tag: sentence-similarity
|
189 |
+
library_name: sentence-transformers
|
190 |
+
---
|
191 |
+
|
192 |
+
# SentenceTransformer based on sentence-transformers/all-distilroberta-v1
|
193 |
+
|
194 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-distilroberta-v1](https://huggingface.co/sentence-transformers/all-distilroberta-v1). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
195 |
+
|
196 |
+
## Model Details
|
197 |
+
|
198 |
+
### Model Description
|
199 |
+
- **Model Type:** Sentence Transformer
|
200 |
+
- **Base model:** [sentence-transformers/all-distilroberta-v1](https://huggingface.co/sentence-transformers/all-distilroberta-v1) <!-- at revision 8d88b92a34345fd6a139aa47768c9881720006ce -->
|
201 |
+
- **Maximum Sequence Length:** 128 tokens
|
202 |
+
- **Output Dimensionality:** 768 tokens
|
203 |
+
- **Similarity Function:** Cosine Similarity
|
204 |
+
<!-- - **Training Dataset:** Unknown -->
|
205 |
+
<!-- - **Language:** Unknown -->
|
206 |
+
<!-- - **License:** Unknown -->
|
207 |
+
|
208 |
+
### Model Sources
|
209 |
+
|
210 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
211 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
212 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
213 |
+
|
214 |
+
### Full Model Architecture
|
215 |
+
|
216 |
+
```
|
217 |
+
SentenceTransformer(
|
218 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: RobertaModel
|
219 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
220 |
+
)
|
221 |
+
```
|
222 |
+
|
223 |
+
## Usage
|
224 |
+
|
225 |
+
### Direct Usage (Sentence Transformers)
|
226 |
+
|
227 |
+
First install the Sentence Transformers library:
|
228 |
+
|
229 |
+
```bash
|
230 |
+
pip install -U sentence-transformers
|
231 |
+
```
|
232 |
+
|
233 |
+
Then you can load this model and run inference.
|
234 |
+
```python
|
235 |
+
from sentence_transformers import SentenceTransformer
|
236 |
+
|
237 |
+
# Download from the 🤗 Hub
|
238 |
+
model = SentenceTransformer("knguyennguyen/distill_fashion_5k")
|
239 |
+
# Run inference
|
240 |
+
sentences = [
|
241 |
+
"I'm looking for a versatile and comfortable outerwear option that can provide warmth and protection from the wind. It should be suitable for both adults and kids, easy to clean, and made from a soft and breathable material.",
|
242 |
+
"Title: Cover Custom Western Texas Stars Washable and Reusable Warm Windproof for Women Men Boys Girls Kids Descripion: ['Our Products Are Made Of High-Quality 100% Polyester Fiber, Which Is Very Soft, Breathable And Washable.']",
|
243 |
+
'Title: Goodthreads Men\'s Soft Cotton Long-Sleeve Pullover Hoodie T-Shirt Descripion: [\'An Amazon brand - This pullover hoodie in soft cotton features a self-tie drawstring and a kangaroo pocket. Made in our Signature Tumbled Cotton for a soft, yet sturdy, hand. We utilize a unique Heritage Wash to give our garments a custom, lived-in feel right away\'\n "Goodthreads\' collection of men\'s clothing crafted with care takes wear-everywhere apparel to the next level. Create can\'t-miss pairings with long- and short-sleeve button-down shirts in standard and slim fits, plus chino pants and shorts made from wrinkle-free non-iron fabric. With these classics-and T-shirts, polo shirts, and outerwear to round out your look-Goodthreads is your go-to for wardrobe staples with the style you want."]',
|
244 |
+
]
|
245 |
+
embeddings = model.encode(sentences)
|
246 |
+
print(embeddings.shape)
|
247 |
+
# [3, 768]
|
248 |
+
|
249 |
+
# Get the similarity scores for the embeddings
|
250 |
+
similarities = model.similarity(embeddings, embeddings)
|
251 |
+
print(similarities.shape)
|
252 |
+
# [3, 3]
|
253 |
+
```
|
254 |
+
|
255 |
+
<!--
|
256 |
+
### Direct Usage (Transformers)
|
257 |
+
|
258 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
259 |
+
|
260 |
+
</details>
|
261 |
+
-->
|
262 |
+
|
263 |
+
<!--
|
264 |
+
### Downstream Usage (Sentence Transformers)
|
265 |
+
|
266 |
+
You can finetune this model on your own dataset.
|
267 |
+
|
268 |
+
<details><summary>Click to expand</summary>
|
269 |
+
|
270 |
+
</details>
|
271 |
+
-->
|
272 |
+
|
273 |
+
<!--
|
274 |
+
### Out-of-Scope Use
|
275 |
+
|
276 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
277 |
+
-->
|
278 |
+
|
279 |
+
<!--
|
280 |
+
## Bias, Risks and Limitations
|
281 |
+
|
282 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
283 |
+
-->
|
284 |
+
|
285 |
+
<!--
|
286 |
+
### Recommendations
|
287 |
+
|
288 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
289 |
+
-->
|
290 |
+
|
291 |
+
## Training Details
|
292 |
+
|
293 |
+
### Training Dataset
|
294 |
+
|
295 |
+
#### Unnamed Dataset
|
296 |
+
|
297 |
+
|
298 |
+
* Size: 4,693 training samples
|
299 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
300 |
+
* Approximate statistics based on the first 1000 samples:
|
301 |
+
| | sentence_0 | sentence_1 |
|
302 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
303 |
+
| type | string | string |
|
304 |
+
| details | <ul><li>min: 26 tokens</li><li>mean: 44.67 tokens</li><li>max: 88 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 107.26 tokens</li><li>max: 128 tokens</li></ul> |
|
305 |
+
* Samples:
|
306 |
+
| sentence_0 | sentence_1 |
|
307 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
308 |
+
| <code>I'm looking for a bulk selection of decorative pieces that can be used to create unique jewelry and craft projects. They should come in a variety of styles, with a classic gold finish, and be suitable for personalizing items like necklaces or bracelets. These accessories should also be versatile enough for various crafting applications and make a lovely gift for special occasions.</code> | <code>Title: Wholesale Bulk 50PCS Mixed Gold Charms Pendants DIY for Jewelry Making and Crafting Descripion: ['50pcs Mixed KC Gold Charms Pendants for Jewelry Making, DIY Craft Charms Bulk for Necklace Bracelet Jewelry Making Crafting'<br> 'Color:' 'KC Gold.' 'Size:' '0.42" - 1.15" (11 - 28 mm).'<br> 'Main Material:' 'Alloy, Metal.' 'Package Include:'<br> "50pcs gold charm. 1 pcs chamois cloth of SUNEEY.(Suitable for cleaning and polishing jewelry.) Gold charm, it's fashion, creative, full of special means, is a very useful accessory,Exquisite and classical design charms.It can be used in all kinds of decoration. A good gift for yourself or friends, or birthday gift, anniversary present. Suit for key chain, bag pendant, sweater chain pendants, ornaments, escort cards, scrapbooking, and other crafts etc. These charm can be used to create earrings, necklaces, charm bracelets, and all kinds of jewelry making and craft projects. Perfect for scrapbooking project, necklace pendant drop, jewelry making accessories. Jewelry Making Accessory Mixed wholesale metal charms Assorted themes great array of subjects and styles for all kinds of interests Great for parties or groups. This gold pendant set is very charming with the unique design. It is simple and understated but gorgeous and classy. This pretty design can highlight your appearance, grasp everyone's eyes in the crowd. We’re confident that you will love them,as it will make you stand out."]</code> |
|
309 |
+
| <code>I'm looking for a stylish and comfortable cropped top that I can easily throw on after my workouts. It should be versatile enough for casual outings and have a fit that flatters the figure.</code> | <code>Title: Core 10 Women's Soft Workout Cropped Hoodie Sweatshirt Descripion: ['An Amazon brand - This cropped Hoodie is a wardrobe-essential sweatshirt that features an easy, flattering fit for all your post-workout, layering, or everyday styling needs'<br> 'Empowering women to reach their full potential is at the heart of what we do. Because when you’re wearing Core 10, you’re ready to experience more. Be more. Live more.']</code> |
|
310 |
+
| <code>I'm looking for a comfortable and stylish top for women that has a relaxed fit and a hood. It should come in trendy colors and allow for easy movement, making it perfect for casual outings or workouts.</code> | <code>Title: PUMA Ladies' Hooded Tee Descripion: ['Features: PUMA Colors: Black, Pink, and Purple Dropped shoulder seams Side slits for enhanced range of motion Mesh interior lined hood Puma Branded taping inside of neckline Content: 60% Cotton | 40% Modal Sizing: S-XL Relaxed fit Size Conversion: S = 4-6 | M = 8-10 | L = 12-14 | XL = 16-18 Model is wearing a size small Color & size subject to availability Care Instructions: Machine wash in cool water Permanent press cycle Do not bleach Do not tumble dry Drying horizontally Cool iron Do not dry clean']</code> |
|
311 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
312 |
+
```json
|
313 |
+
{
|
314 |
+
"scale": 20.0,
|
315 |
+
"similarity_fct": "cos_sim"
|
316 |
+
}
|
317 |
+
```
|
318 |
+
|
319 |
+
### Training Hyperparameters
|
320 |
+
#### Non-Default Hyperparameters
|
321 |
+
|
322 |
+
- `per_device_train_batch_size`: 128
|
323 |
+
- `per_device_eval_batch_size`: 128
|
324 |
+
- `num_train_epochs`: 5
|
325 |
+
- `multi_dataset_batch_sampler`: round_robin
|
326 |
+
|
327 |
+
#### All Hyperparameters
|
328 |
+
<details><summary>Click to expand</summary>
|
329 |
+
|
330 |
+
- `overwrite_output_dir`: False
|
331 |
+
- `do_predict`: False
|
332 |
+
- `eval_strategy`: no
|
333 |
+
- `prediction_loss_only`: True
|
334 |
+
- `per_device_train_batch_size`: 128
|
335 |
+
- `per_device_eval_batch_size`: 128
|
336 |
+
- `per_gpu_train_batch_size`: None
|
337 |
+
- `per_gpu_eval_batch_size`: None
|
338 |
+
- `gradient_accumulation_steps`: 1
|
339 |
+
- `eval_accumulation_steps`: None
|
340 |
+
- `torch_empty_cache_steps`: None
|
341 |
+
- `learning_rate`: 5e-05
|
342 |
+
- `weight_decay`: 0.0
|
343 |
+
- `adam_beta1`: 0.9
|
344 |
+
- `adam_beta2`: 0.999
|
345 |
+
- `adam_epsilon`: 1e-08
|
346 |
+
- `max_grad_norm`: 1
|
347 |
+
- `num_train_epochs`: 5
|
348 |
+
- `max_steps`: -1
|
349 |
+
- `lr_scheduler_type`: linear
|
350 |
+
- `lr_scheduler_kwargs`: {}
|
351 |
+
- `warmup_ratio`: 0.0
|
352 |
+
- `warmup_steps`: 0
|
353 |
+
- `log_level`: passive
|
354 |
+
- `log_level_replica`: warning
|
355 |
+
- `log_on_each_node`: True
|
356 |
+
- `logging_nan_inf_filter`: True
|
357 |
+
- `save_safetensors`: True
|
358 |
+
- `save_on_each_node`: False
|
359 |
+
- `save_only_model`: False
|
360 |
+
- `restore_callback_states_from_checkpoint`: False
|
361 |
+
- `no_cuda`: False
|
362 |
+
- `use_cpu`: False
|
363 |
+
- `use_mps_device`: False
|
364 |
+
- `seed`: 42
|
365 |
+
- `data_seed`: None
|
366 |
+
- `jit_mode_eval`: False
|
367 |
+
- `use_ipex`: False
|
368 |
+
- `bf16`: False
|
369 |
+
- `fp16`: False
|
370 |
+
- `fp16_opt_level`: O1
|
371 |
+
- `half_precision_backend`: auto
|
372 |
+
- `bf16_full_eval`: False
|
373 |
+
- `fp16_full_eval`: False
|
374 |
+
- `tf32`: None
|
375 |
+
- `local_rank`: 0
|
376 |
+
- `ddp_backend`: None
|
377 |
+
- `tpu_num_cores`: None
|
378 |
+
- `tpu_metrics_debug`: False
|
379 |
+
- `debug`: []
|
380 |
+
- `dataloader_drop_last`: False
|
381 |
+
- `dataloader_num_workers`: 0
|
382 |
+
- `dataloader_prefetch_factor`: None
|
383 |
+
- `past_index`: -1
|
384 |
+
- `disable_tqdm`: False
|
385 |
+
- `remove_unused_columns`: True
|
386 |
+
- `label_names`: None
|
387 |
+
- `load_best_model_at_end`: False
|
388 |
+
- `ignore_data_skip`: False
|
389 |
+
- `fsdp`: []
|
390 |
+
- `fsdp_min_num_params`: 0
|
391 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
392 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
393 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
394 |
+
- `deepspeed`: None
|
395 |
+
- `label_smoothing_factor`: 0.0
|
396 |
+
- `optim`: adamw_torch
|
397 |
+
- `optim_args`: None
|
398 |
+
- `adafactor`: False
|
399 |
+
- `group_by_length`: False
|
400 |
+
- `length_column_name`: length
|
401 |
+
- `ddp_find_unused_parameters`: None
|
402 |
+
- `ddp_bucket_cap_mb`: None
|
403 |
+
- `ddp_broadcast_buffers`: False
|
404 |
+
- `dataloader_pin_memory`: True
|
405 |
+
- `dataloader_persistent_workers`: False
|
406 |
+
- `skip_memory_metrics`: True
|
407 |
+
- `use_legacy_prediction_loop`: False
|
408 |
+
- `push_to_hub`: False
|
409 |
+
- `resume_from_checkpoint`: None
|
410 |
+
- `hub_model_id`: None
|
411 |
+
- `hub_strategy`: every_save
|
412 |
+
- `hub_private_repo`: False
|
413 |
+
- `hub_always_push`: False
|
414 |
+
- `gradient_checkpointing`: False
|
415 |
+
- `gradient_checkpointing_kwargs`: None
|
416 |
+
- `include_inputs_for_metrics`: False
|
417 |
+
- `eval_do_concat_batches`: True
|
418 |
+
- `fp16_backend`: auto
|
419 |
+
- `push_to_hub_model_id`: None
|
420 |
+
- `push_to_hub_organization`: None
|
421 |
+
- `mp_parameters`:
|
422 |
+
- `auto_find_batch_size`: False
|
423 |
+
- `full_determinism`: False
|
424 |
+
- `torchdynamo`: None
|
425 |
+
- `ray_scope`: last
|
426 |
+
- `ddp_timeout`: 1800
|
427 |
+
- `torch_compile`: False
|
428 |
+
- `torch_compile_backend`: None
|
429 |
+
- `torch_compile_mode`: None
|
430 |
+
- `dispatch_batches`: None
|
431 |
+
- `split_batches`: None
|
432 |
+
- `include_tokens_per_second`: False
|
433 |
+
- `include_num_input_tokens_seen`: False
|
434 |
+
- `neftune_noise_alpha`: None
|
435 |
+
- `optim_target_modules`: None
|
436 |
+
- `batch_eval_metrics`: False
|
437 |
+
- `eval_on_start`: False
|
438 |
+
- `use_liger_kernel`: False
|
439 |
+
- `eval_use_gather_object`: False
|
440 |
+
- `batch_sampler`: batch_sampler
|
441 |
+
- `multi_dataset_batch_sampler`: round_robin
|
442 |
+
|
443 |
+
</details>
|
444 |
+
|
445 |
+
### Framework Versions
|
446 |
+
- Python: 3.11.11
|
447 |
+
- Sentence Transformers: 3.1.1
|
448 |
+
- Transformers: 4.45.2
|
449 |
+
- PyTorch: 2.5.1+cu121
|
450 |
+
- Accelerate: 1.2.1
|
451 |
+
- Datasets: 3.2.0
|
452 |
+
- Tokenizers: 0.20.3
|
453 |
+
|
454 |
+
## Citation
|
455 |
+
|
456 |
+
### BibTeX
|
457 |
+
|
458 |
+
#### Sentence Transformers
|
459 |
+
```bibtex
|
460 |
+
@inproceedings{reimers-2019-sentence-bert,
|
461 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
462 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
463 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
464 |
+
month = "11",
|
465 |
+
year = "2019",
|
466 |
+
publisher = "Association for Computational Linguistics",
|
467 |
+
url = "https://arxiv.org/abs/1908.10084",
|
468 |
+
}
|
469 |
+
```
|
470 |
+
|
471 |
+
#### MultipleNegativesRankingLoss
|
472 |
+
```bibtex
|
473 |
+
@misc{henderson2017efficient,
|
474 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
475 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
476 |
+
year={2017},
|
477 |
+
eprint={1705.00652},
|
478 |
+
archivePrefix={arXiv},
|
479 |
+
primaryClass={cs.CL}
|
480 |
+
}
|
481 |
+
```
|
482 |
+
|
483 |
+
<!--
|
484 |
+
## Glossary
|
485 |
+
|
486 |
+
*Clearly define terms in order to be accessible across audiences.*
|
487 |
+
-->
|
488 |
+
|
489 |
+
<!--
|
490 |
+
## Model Card Authors
|
491 |
+
|
492 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
493 |
+
-->
|
494 |
+
|
495 |
+
<!--
|
496 |
+
## Model Card Contact
|
497 |
+
|
498 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
499 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-distilroberta-v1",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 514,
|
18 |
+
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 6,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"torch_dtype": "float32",
|
24 |
+
"transformers_version": "4.45.2",
|
25 |
+
"type_vocab_size": 1,
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 50265
|
28 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.5.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da63f88cd2849f1e51aa33ad0f00545c3b7a0bdd4c9cb57b4b94fb463315b226
|
3 |
+
size 328485128
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50264": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": false,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"max_length": 128,
|
52 |
+
"model_max_length": 128,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "<pad>",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "</s>",
|
58 |
+
"stride": 0,
|
59 |
+
"tokenizer_class": "RobertaTokenizer",
|
60 |
+
"trim_offsets": true,
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "<unk>"
|
64 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|