ShauryaNova
commited on
Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +72 -0
- checkpoint-870/1_Pooling/config.json +10 -0
- checkpoint-870/README.md +507 -0
- checkpoint-870/config.json +26 -0
- checkpoint-870/config_sentence_transformers.json +10 -0
- checkpoint-870/model.safetensors +3 -0
- checkpoint-870/modules.json +20 -0
- checkpoint-870/optimizer.pt +3 -0
- checkpoint-870/rng_state.pth +3 -0
- checkpoint-870/scheduler.pt +3 -0
- checkpoint-870/sentence_bert_config.json +4 -0
- checkpoint-870/special_tokens_map.json +37 -0
- checkpoint-870/tokenizer.json +0 -0
- checkpoint-870/tokenizer_config.json +64 -0
- checkpoint-870/trainer_state.json +319 -0
- checkpoint-870/training_args.bin +3 -0
- checkpoint-870/vocab.txt +0 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- runs/Jun28_17-15-52_r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg/events.out.tfevents.1719594954.r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg.100.0 +2 -2
- runs/Jun28_17-15-52_r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg/events.out.tfevents.1719596615.r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg.100.1 +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- training_args.bin +3 -0
- training_params.json +33 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
library_name: sentence-transformers
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- autotrain
|
9 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'search_query: i love autotrain'
|
12 |
+
sentences:
|
13 |
+
- 'search_query: huggingface auto train'
|
14 |
+
- 'search_query: hugging face auto train'
|
15 |
+
- 'search_query: i love autotrain'
|
16 |
+
pipeline_tag: sentence-similarity
|
17 |
+
---
|
18 |
+
|
19 |
+
# Model Trained Using AutoTrain
|
20 |
+
|
21 |
+
- Problem type: Sentence Transformers
|
22 |
+
|
23 |
+
## Validation Metrics
|
24 |
+
loss: 0.056603044271469116
|
25 |
+
|
26 |
+
cosine_accuracy: 1.0
|
27 |
+
|
28 |
+
dot_accuracy: 0.0
|
29 |
+
|
30 |
+
manhattan_accuracy: 1.0
|
31 |
+
|
32 |
+
euclidean_accuracy: 1.0
|
33 |
+
|
34 |
+
max_accuracy: 1.0
|
35 |
+
|
36 |
+
runtime: 43.9603
|
37 |
+
|
38 |
+
samples_per_second: 13.194
|
39 |
+
|
40 |
+
steps_per_second: 0.842
|
41 |
+
|
42 |
+
: 3.0
|
43 |
+
|
44 |
+
## Usage
|
45 |
+
|
46 |
+
### Direct Usage (Sentence Transformers)
|
47 |
+
|
48 |
+
First install the Sentence Transformers library:
|
49 |
+
|
50 |
+
```bash
|
51 |
+
pip install -U sentence-transformers
|
52 |
+
```
|
53 |
+
|
54 |
+
Then you can load this model and run inference.
|
55 |
+
```python
|
56 |
+
from sentence_transformers import SentenceTransformer
|
57 |
+
|
58 |
+
# Download from the Hugging Face Hub
|
59 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
60 |
+
# Run inference
|
61 |
+
sentences = [
|
62 |
+
'search_query: autotrain',
|
63 |
+
'search_query: auto train',
|
64 |
+
'search_query: i love autotrain',
|
65 |
+
]
|
66 |
+
embeddings = model.encode(sentences)
|
67 |
+
print(embeddings.shape)
|
68 |
+
|
69 |
+
# Get the similarity scores for the embeddings
|
70 |
+
similarities = model.similarity(embeddings, embeddings)
|
71 |
+
print(similarities.shape)
|
72 |
+
```
|
checkpoint-870/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-870/README.md
ADDED
@@ -0,0 +1,507 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
metrics:
|
7 |
+
- cosine_accuracy
|
8 |
+
- dot_accuracy
|
9 |
+
- manhattan_accuracy
|
10 |
+
- euclidean_accuracy
|
11 |
+
- max_accuracy
|
12 |
+
pipeline_tag: sentence-similarity
|
13 |
+
tags:
|
14 |
+
- sentence-transformers
|
15 |
+
- sentence-similarity
|
16 |
+
- feature-extraction
|
17 |
+
- generated_from_trainer
|
18 |
+
- dataset_size:2320
|
19 |
+
- loss:MultipleNegativesRankingLoss
|
20 |
+
widget:
|
21 |
+
- source_sentence: DENNIE FOSTE Men's Poly Cotton Washed Light Blue Jeans(DF-JNS-015)
|
22 |
+
sentences:
|
23 |
+
- https://www.amazon.in/dp/B0BZDFGSCR
|
24 |
+
- DENNIE FOSTE presents this streachable fabric Polycotton jeans. It's good quality
|
25 |
+
fabric would certainly make you feel good and confident when you wear it. Comfortable
|
26 |
+
front pockets, comfortable back pockets, highly durable and stretchable jeans
|
27 |
+
for man. Perfect for casual, beach parties wear high on style and quality, these
|
28 |
+
stretchable jeans are as versatile as they are comfortable. Wear it with a casual
|
29 |
+
tee for a smart look. Wear it casually and be at ease throughout the day or it
|
30 |
+
can also blend to perfection on your special ocassions.
|
31 |
+
- urbano fashion mens slim fit jeans
|
32 |
+
- source_sentence: ZESICA Women's 2023 Summer Bohemian Solid Color Lace Trim Flowy
|
33 |
+
A Line Beach Long Maxi Skirt with Pockets
|
34 |
+
sentences:
|
35 |
+
- aratlench acrylic pendant necklace earrings – long statement leaf charm necklace
|
36 |
+
tortoise resin palm leaf earrings fashion necklaces earrings for women girls
|
37 |
+
- https://www.amazon.com/dp/B09X19HV5D
|
38 |
+
- zesica womens 2023 summer bohemian solid color lace trim flowy a line beach long
|
39 |
+
maxi skirt with pockets
|
40 |
+
- source_sentence: DHRUVI TRENDZ Men's Shirts || Rayon Tropical Printed Shirts for
|
41 |
+
Men || Summer Wear Shirt for Men || Perfect for Outing || Vacation || DateWear
|
42 |
+
Shirt for Boys || Gift for Men
|
43 |
+
sentences:
|
44 |
+
- om sai latest creation shirt for men rayon shirts for men tropical leaf printed
|
45 |
+
short sleeve spread collar shirts for boy casual beach wear festive shirt for
|
46 |
+
men
|
47 |
+
- https://www.amazon.in/dp/B0C18PR364
|
48 |
+
- Men's Fashion Products Are Our partywear outfit collection for men includes a
|
49 |
+
shirt neckline, Short-sleeves, and a button placket on the front. Perfect Regular
|
50 |
+
Fit with Best Look. simple spread collar and soft felt in the fabric which makes
|
51 |
+
the shirt very easy and comfortable to wear casually. From the newest designs
|
52 |
+
and trendiest styles for men we are making fashionable clothing affordable. Shirts
|
53 |
+
feel soft and light on the body. Pairing with the right colored denim we can imagine
|
54 |
+
the outfit is best suited for dining parties and night outs. Our men's Tropical
|
55 |
+
shirts are made of the Best fabric which is lightweight and breathable. Perfect
|
56 |
+
for summer and hot weather keeps your body dry and comfortable all day. This casual
|
57 |
+
summer shirts design with a Fancy Hawaii collar, short sleeve, botton down, Tropical
|
58 |
+
print and classic regular fit. This beach shirts with multiple unique color and
|
59 |
+
pattern, each of which is a unique experience, make you shine this summer. Perfect
|
60 |
+
gift for yourself, families, or friends. Perfect for camp, sun beach, birthday
|
61 |
+
party, vacation, bachelor party, cruise, camp, or any casual daily wear.
|
62 |
+
- source_sentence: Molie Bridal Austrian Crystal Necklace and Earrings Jewelry Set
|
63 |
+
Gifts fit with Wedding Dress
|
64 |
+
sentences:
|
65 |
+
- You should have this jewelry set near you all the time since it is so fashion
|
66 |
+
and eye-catching. You can wear it and have it with you to support you wherever
|
67 |
+
you go. Make a statement with this wonderful jewelry set. Molie Molie has been
|
68 |
+
found for many years, referred to "Molie", which denotes to treat all of the world's
|
69 |
+
women like an Molie jewelry and meet their fantasies and satisfactions. We have
|
70 |
+
our own factory to ensure our items' plating and the strict criteria of the plating
|
71 |
+
thickness. The physical characteristics of human require us to adopt a higher
|
72 |
+
standard of plating process. At the same time, it create a good condition to reduce
|
73 |
+
production cost while maintain high quality of our item. Moreover, We are committed
|
74 |
+
to provide customers with competitive products and best customer services, since
|
75 |
+
its inception has been its high quality themselves, stylish design, superb manufacturing
|
76 |
+
process. Besides, we concentrate on improving the service based on the creative,
|
77 |
+
showing brand attributes. All in all, we take Customers' satisfactions as our
|
78 |
+
first priority.
|
79 |
+
- https://www.amazon.com/dp/B071VM3BKW
|
80 |
+
- coofandy mens short sleeve hoodie relaxed fit fashion casual sweatshirts lightweight
|
81 |
+
hip hop streetwear t shirts
|
82 |
+
- source_sentence: Steve Madden Clutch Crossbody
|
83 |
+
sentences:
|
84 |
+
- https://www.amazon.com/dp/B07VCDT9VR
|
85 |
+
- See and BSCENE with this Clear bag. Carry it as a crossbody or clutch. The exterior
|
86 |
+
is Clear and includes an internal pouch.
|
87 |
+
- womens dezier mens regular shirt 6032sformal1110multicolor extra large
|
88 |
+
model-index:
|
89 |
+
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
90 |
+
results:
|
91 |
+
- task:
|
92 |
+
type: triplet
|
93 |
+
name: Triplet
|
94 |
+
dataset:
|
95 |
+
name: Unknown
|
96 |
+
type: unknown
|
97 |
+
metrics:
|
98 |
+
- type: cosine_accuracy
|
99 |
+
value: 1.0
|
100 |
+
name: Cosine Accuracy
|
101 |
+
- type: dot_accuracy
|
102 |
+
value: 0.0
|
103 |
+
name: Dot Accuracy
|
104 |
+
- type: manhattan_accuracy
|
105 |
+
value: 1.0
|
106 |
+
name: Manhattan Accuracy
|
107 |
+
- type: euclidean_accuracy
|
108 |
+
value: 1.0
|
109 |
+
name: Euclidean Accuracy
|
110 |
+
- type: max_accuracy
|
111 |
+
value: 1.0
|
112 |
+
name: Max Accuracy
|
113 |
+
---
|
114 |
+
|
115 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
116 |
+
|
117 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
118 |
+
|
119 |
+
## Model Details
|
120 |
+
|
121 |
+
### Model Description
|
122 |
+
- **Model Type:** Sentence Transformer
|
123 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision 8b3219a92973c328a8e22fadcfa821b5dc75636a -->
|
124 |
+
- **Maximum Sequence Length:** 256 tokens
|
125 |
+
- **Output Dimensionality:** 384 tokens
|
126 |
+
- **Similarity Function:** Cosine Similarity
|
127 |
+
<!-- - **Training Dataset:** Unknown -->
|
128 |
+
<!-- - **Language:** Unknown -->
|
129 |
+
<!-- - **License:** Unknown -->
|
130 |
+
|
131 |
+
### Model Sources
|
132 |
+
|
133 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
134 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
135 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
136 |
+
|
137 |
+
### Full Model Architecture
|
138 |
+
|
139 |
+
```
|
140 |
+
SentenceTransformer(
|
141 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
142 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
143 |
+
(2): Normalize()
|
144 |
+
)
|
145 |
+
```
|
146 |
+
|
147 |
+
## Usage
|
148 |
+
|
149 |
+
### Direct Usage (Sentence Transformers)
|
150 |
+
|
151 |
+
First install the Sentence Transformers library:
|
152 |
+
|
153 |
+
```bash
|
154 |
+
pip install -U sentence-transformers
|
155 |
+
```
|
156 |
+
|
157 |
+
Then you can load this model and run inference.
|
158 |
+
```python
|
159 |
+
from sentence_transformers import SentenceTransformer
|
160 |
+
|
161 |
+
# Download from the 🤗 Hub
|
162 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
163 |
+
# Run inference
|
164 |
+
sentences = [
|
165 |
+
'Steve Madden Clutch Crossbody',
|
166 |
+
'See and BSCENE with this Clear bag. Carry it as a crossbody or clutch. The exterior is Clear and includes an internal pouch.',
|
167 |
+
'https://www.amazon.com/dp/B07VCDT9VR',
|
168 |
+
]
|
169 |
+
embeddings = model.encode(sentences)
|
170 |
+
print(embeddings.shape)
|
171 |
+
# [3, 384]
|
172 |
+
|
173 |
+
# Get the similarity scores for the embeddings
|
174 |
+
similarities = model.similarity(embeddings, embeddings)
|
175 |
+
print(similarities.shape)
|
176 |
+
# [3, 3]
|
177 |
+
```
|
178 |
+
|
179 |
+
<!--
|
180 |
+
### Direct Usage (Transformers)
|
181 |
+
|
182 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
183 |
+
|
184 |
+
</details>
|
185 |
+
-->
|
186 |
+
|
187 |
+
<!--
|
188 |
+
### Downstream Usage (Sentence Transformers)
|
189 |
+
|
190 |
+
You can finetune this model on your own dataset.
|
191 |
+
|
192 |
+
<details><summary>Click to expand</summary>
|
193 |
+
|
194 |
+
</details>
|
195 |
+
-->
|
196 |
+
|
197 |
+
<!--
|
198 |
+
### Out-of-Scope Use
|
199 |
+
|
200 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
201 |
+
-->
|
202 |
+
|
203 |
+
## Evaluation
|
204 |
+
|
205 |
+
### Metrics
|
206 |
+
|
207 |
+
#### Triplet
|
208 |
+
|
209 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
210 |
+
|
211 |
+
| Metric | Value |
|
212 |
+
|:-------------------|:--------|
|
213 |
+
| cosine_accuracy | 1.0 |
|
214 |
+
| dot_accuracy | 0.0 |
|
215 |
+
| manhattan_accuracy | 1.0 |
|
216 |
+
| euclidean_accuracy | 1.0 |
|
217 |
+
| **max_accuracy** | **1.0** |
|
218 |
+
|
219 |
+
<!--
|
220 |
+
## Bias, Risks and Limitations
|
221 |
+
|
222 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
223 |
+
-->
|
224 |
+
|
225 |
+
<!--
|
226 |
+
### Recommendations
|
227 |
+
|
228 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
229 |
+
-->
|
230 |
+
|
231 |
+
## Training Details
|
232 |
+
|
233 |
+
### Training Dataset
|
234 |
+
|
235 |
+
#### Unnamed Dataset
|
236 |
+
|
237 |
+
|
238 |
+
* Size: 2,320 training samples
|
239 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
240 |
+
* Approximate statistics based on the first 1000 samples:
|
241 |
+
| | anchor | positive | negative |
|
242 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
243 |
+
| type | string | string | string |
|
244 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 21.75 tokens</li><li>max: 55 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 59.78 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 23.3 tokens</li><li>max: 25 tokens</li></ul> |
|
245 |
+
* Samples:
|
246 |
+
| anchor | positive | negative |
|
247 |
+
|:--------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------|
|
248 |
+
| <code>Shiaili Classic Plus Size Skirts for Women Flowy Pleated Midi Length Skirt</code> | <code>shiaili classic plus size skirts for women flowy pleated midi length skirt</code> | <code>https://www.amazon.com/dp/B0BMTRJRG6</code> |
|
249 |
+
| <code>ANRABESS Women's Casual Long Sleeve Draped Open Front Knit Pockets Long Cardigan Jackets Sweater</code> | <code>anrabess womens casual long sleeve draped open front knit pockets long cardigan jackets sweater</code> | <code>https://www.amazon.com/dp/B0B2W6QGYB</code> |
|
250 |
+
| <code>RipSkirt Hawaii | Length 2 with Pockets | Quick Wrap, Quick Dry, Travel Skirt with Side Pockets</code> | <code>RipSkirt Hawaii is the active woman’s perfect skirt. Wear your RipSkirt straight from the beach to the bistro, we’ve got you covered. Our custom fabric doesn’t cling, flatters almost every figure, repels water, and dries quickly if soaked. [no more wet bum marks when leaving the pool] Length 2 is our most popular length and is perfect for work, play, and around town and has side pockets deep enough for a large phone. Content: 93% polyester 7% spandex</code> | <code>https://www.amazon.com/dp/B09X714HBM</code> |
|
251 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
252 |
+
```json
|
253 |
+
{
|
254 |
+
"scale": 20.0,
|
255 |
+
"similarity_fct": "cos_sim"
|
256 |
+
}
|
257 |
+
```
|
258 |
+
|
259 |
+
### Evaluation Dataset
|
260 |
+
|
261 |
+
#### Unnamed Dataset
|
262 |
+
|
263 |
+
|
264 |
+
* Size: 580 evaluation samples
|
265 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
266 |
+
* Approximate statistics based on the first 1000 samples:
|
267 |
+
| | anchor | positive | negative |
|
268 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
269 |
+
| type | string | string | string |
|
270 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 21.92 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 55.98 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 23.37 tokens</li><li>max: 25 tokens</li></ul> |
|
271 |
+
* Samples:
|
272 |
+
| anchor | positive | negative |
|
273 |
+
|:---------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------|
|
274 |
+
| <code>Hotouch Lightweight Crochet Cardigan for Women Long Sleeve Open Front Knit Oversized Cardigans Sweaters</code> | <code>hotouch lightweight crochet cardigan for women long sleeve open front knit oversized cardigans sweaters</code> | <code>https://www.amazon.com/dp/B0C1FM1JDZ</code> |
|
275 |
+
| <code>SEIKO Men's SNK809 5 Automatic Stainless Steel Watch with Black Canvas Strap</code> | <code>Black dial. Silver-tone stainless steel case with a black canvas band. Automatic movement. 30 meters / 100 feet water resistance. Fixed bezel. Tang clasp. Case size 37 mm x 11 mm. Seiko SNK809 Seiko 5 Watch.The Seiko 5 Men's Automatic Black Strap Black Dial Watch is a stylish timepiece with the convenience of automatic movement. A uniquely designed, black dial features white Arabic numbers marking the hours on an inner circle and the minutes on an outer circle, while small, bar indexes encircle the dial on an outside minute track. Silver-tone hands with luminous fill make it easy to tell time day or night, and the slim second hand is detailed with a red accent. For added convenience, a day and date display are set at three o'clock. The polished stainless steel case extends to meet the black nylon strap, which wraps comfortably around the wrist and fastens with a traditional buckle. Water resistant to 30 feet (100 meters), this high-performance watch is perfect for everyday wear.This is an automatic mechanical watch. Automatic watches do not operate on batteries, instead, they are powered automatically by the movement of the wearer’s arm. If the main spring in your automatic watch is not wound sufficiently, timekeeping may become less accurate. In order to maintain accuracy, wear the watch for 8 hours or more per day, or manually wind the main spring by turning the crown. When not in use, automatic watches may be kept charged with an automatic watch winder – a watch storage unit which may be purchased separately. From Humble beginnings, Kintaro Hattori’s Vision for Seiko has become reality. A consuming passion for excellence - imprinted in our Corporate DNA passed from generation to generation. Seiko, for 125 years committed to the art and science of time. A culture of innovation connects a 19th century Tokyo clock shop with 20th century advances in timekeeping to an extraordinary 21st century "quiet revolution." Continually driven by dedication and passion, established a multitude of world’s first technologies… transforming the principles of timekeeping. The first quartz wristwatch – changed the history of time. The first Kinetic – marked a new era in quartz watch technology. In 1969, Seiko Astron, the first quartz wristwatch - was introduced. In an instant, Seiko exponentially improved the accuracy of wristwatches –And Seiko technology firmly established today’s standard in Olympic and sports timing. 1984, another celebrated first – Kinetic Technology – powered by body movement. Kinetic – a quartz mechanism with unparalleled accuracy –the driving force behind more world’s firsts. Kinetic Chronograph – the next generation of high performance timekeeping. Kinetic Auto Relay – automatically resets to the correct time. Kinetic Perpetual - combining the date perfect technology of perpetual calendar with the genius of Kinetic Auto Relay. And now Kinetic Direct Drive – move, and the watch is powered automatically. Or hand wind it and see the power you are generating in real time. In the realm of fine watches, time is measured by Seiko innovation – A heritage of dedication to the art and science of time.See more</code> | <code>https://www.amazon.com/dp/B002SSUQFG</code> |
|
276 |
+
| <code>Carhartt Men's Rain Defender Loose Fit Midweight Thermal-Lined Full-Zip Sweatshirt</code> | <code>This men's full-zip sweatshirt is equipped for light rain. Made from midweight fleece with a water-repellent finish and thermal lining. Features inner and outer pockets that include storage for your phone. 10.5-ounce, 50% cotton / 50% polyester fleece. Polyester fleece lining for warmth. Rain Defender® durable water repellent (DWR) keeps you dry and moving in light rain. Original fit. Full-zip front with brass zipper. Attached, thermal-lined three-piece hood with drawcord closure. Spandex-reinforced rib-knit cuffs and waist help keep out the cold. Two front handwarmer pockets with flaps for added security. Hidden media pocket. Inside pocket with zipper closure. Locker loop.</code> | <code>https://www.amazon.com/dp/B08BG5V4KR</code> |
|
277 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
278 |
+
```json
|
279 |
+
{
|
280 |
+
"scale": 20.0,
|
281 |
+
"similarity_fct": "cos_sim"
|
282 |
+
}
|
283 |
+
```
|
284 |
+
|
285 |
+
### Training Hyperparameters
|
286 |
+
#### Non-Default Hyperparameters
|
287 |
+
|
288 |
+
- `eval_strategy`: epoch
|
289 |
+
- `per_device_eval_batch_size`: 16
|
290 |
+
- `learning_rate`: 3e-05
|
291 |
+
- `lr_scheduler_type`: cosine
|
292 |
+
- `warmup_ratio`: 0.1
|
293 |
+
- `load_best_model_at_end`: True
|
294 |
+
- `ddp_find_unused_parameters`: False
|
295 |
+
|
296 |
+
#### All Hyperparameters
|
297 |
+
<details><summary>Click to expand</summary>
|
298 |
+
|
299 |
+
- `overwrite_output_dir`: False
|
300 |
+
- `do_predict`: False
|
301 |
+
- `eval_strategy`: epoch
|
302 |
+
- `prediction_loss_only`: True
|
303 |
+
- `per_device_train_batch_size`: 8
|
304 |
+
- `per_device_eval_batch_size`: 16
|
305 |
+
- `per_gpu_train_batch_size`: None
|
306 |
+
- `per_gpu_eval_batch_size`: None
|
307 |
+
- `gradient_accumulation_steps`: 1
|
308 |
+
- `eval_accumulation_steps`: None
|
309 |
+
- `learning_rate`: 3e-05
|
310 |
+
- `weight_decay`: 0.0
|
311 |
+
- `adam_beta1`: 0.9
|
312 |
+
- `adam_beta2`: 0.999
|
313 |
+
- `adam_epsilon`: 1e-08
|
314 |
+
- `max_grad_norm`: 1.0
|
315 |
+
- `num_train_epochs`: 3
|
316 |
+
- `max_steps`: -1
|
317 |
+
- `lr_scheduler_type`: cosine
|
318 |
+
- `lr_scheduler_kwargs`: {}
|
319 |
+
- `warmup_ratio`: 0.1
|
320 |
+
- `warmup_steps`: 0
|
321 |
+
- `log_level`: passive
|
322 |
+
- `log_level_replica`: warning
|
323 |
+
- `log_on_each_node`: True
|
324 |
+
- `logging_nan_inf_filter`: True
|
325 |
+
- `save_safetensors`: True
|
326 |
+
- `save_on_each_node`: False
|
327 |
+
- `save_only_model`: False
|
328 |
+
- `restore_callback_states_from_checkpoint`: False
|
329 |
+
- `no_cuda`: False
|
330 |
+
- `use_cpu`: False
|
331 |
+
- `use_mps_device`: False
|
332 |
+
- `seed`: 42
|
333 |
+
- `data_seed`: None
|
334 |
+
- `jit_mode_eval`: False
|
335 |
+
- `use_ipex`: False
|
336 |
+
- `bf16`: False
|
337 |
+
- `fp16`: False
|
338 |
+
- `fp16_opt_level`: O1
|
339 |
+
- `half_precision_backend`: auto
|
340 |
+
- `bf16_full_eval`: False
|
341 |
+
- `fp16_full_eval`: False
|
342 |
+
- `tf32`: None
|
343 |
+
- `local_rank`: 0
|
344 |
+
- `ddp_backend`: None
|
345 |
+
- `tpu_num_cores`: None
|
346 |
+
- `tpu_metrics_debug`: False
|
347 |
+
- `debug`: []
|
348 |
+
- `dataloader_drop_last`: False
|
349 |
+
- `dataloader_num_workers`: 0
|
350 |
+
- `dataloader_prefetch_factor`: None
|
351 |
+
- `past_index`: -1
|
352 |
+
- `disable_tqdm`: False
|
353 |
+
- `remove_unused_columns`: True
|
354 |
+
- `label_names`: None
|
355 |
+
- `load_best_model_at_end`: True
|
356 |
+
- `ignore_data_skip`: False
|
357 |
+
- `fsdp`: []
|
358 |
+
- `fsdp_min_num_params`: 0
|
359 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
360 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
361 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
362 |
+
- `deepspeed`: None
|
363 |
+
- `label_smoothing_factor`: 0.0
|
364 |
+
- `optim`: adamw_torch
|
365 |
+
- `optim_args`: None
|
366 |
+
- `adafactor`: False
|
367 |
+
- `group_by_length`: False
|
368 |
+
- `length_column_name`: length
|
369 |
+
- `ddp_find_unused_parameters`: False
|
370 |
+
- `ddp_bucket_cap_mb`: None
|
371 |
+
- `ddp_broadcast_buffers`: False
|
372 |
+
- `dataloader_pin_memory`: True
|
373 |
+
- `dataloader_persistent_workers`: False
|
374 |
+
- `skip_memory_metrics`: True
|
375 |
+
- `use_legacy_prediction_loop`: False
|
376 |
+
- `push_to_hub`: False
|
377 |
+
- `resume_from_checkpoint`: None
|
378 |
+
- `hub_model_id`: None
|
379 |
+
- `hub_strategy`: every_save
|
380 |
+
- `hub_private_repo`: False
|
381 |
+
- `hub_always_push`: False
|
382 |
+
- `gradient_checkpointing`: False
|
383 |
+
- `gradient_checkpointing_kwargs`: None
|
384 |
+
- `include_inputs_for_metrics`: False
|
385 |
+
- `eval_do_concat_batches`: True
|
386 |
+
- `fp16_backend`: auto
|
387 |
+
- `push_to_hub_model_id`: None
|
388 |
+
- `push_to_hub_organization`: None
|
389 |
+
- `mp_parameters`:
|
390 |
+
- `auto_find_batch_size`: False
|
391 |
+
- `full_determinism`: False
|
392 |
+
- `torchdynamo`: None
|
393 |
+
- `ray_scope`: last
|
394 |
+
- `ddp_timeout`: 1800
|
395 |
+
- `torch_compile`: False
|
396 |
+
- `torch_compile_backend`: None
|
397 |
+
- `torch_compile_mode`: None
|
398 |
+
- `dispatch_batches`: None
|
399 |
+
- `split_batches`: None
|
400 |
+
- `include_tokens_per_second`: False
|
401 |
+
- `include_num_input_tokens_seen`: False
|
402 |
+
- `neftune_noise_alpha`: None
|
403 |
+
- `optim_target_modules`: None
|
404 |
+
- `batch_eval_metrics`: False
|
405 |
+
- `eval_on_start`: False
|
406 |
+
- `batch_sampler`: batch_sampler
|
407 |
+
- `multi_dataset_batch_sampler`: proportional
|
408 |
+
|
409 |
+
</details>
|
410 |
+
|
411 |
+
### Training Logs
|
412 |
+
| Epoch | Step | Training Loss | loss | max_accuracy |
|
413 |
+
|:------:|:----:|:-------------:|:------:|:------------:|
|
414 |
+
| 0.0862 | 25 | 0.3631 | - | - |
|
415 |
+
| 0.1724 | 50 | 0.1219 | - | - |
|
416 |
+
| 0.2586 | 75 | 0.1909 | - | - |
|
417 |
+
| 0.3448 | 100 | 0.24 | - | - |
|
418 |
+
| 0.4310 | 125 | 0.1607 | - | - |
|
419 |
+
| 0.5172 | 150 | 0.1103 | - | - |
|
420 |
+
| 0.6034 | 175 | 0.0952 | - | - |
|
421 |
+
| 0.6897 | 200 | 0.1139 | - | - |
|
422 |
+
| 0.7759 | 225 | 0.1335 | - | - |
|
423 |
+
| 0.8621 | 250 | 0.0758 | - | - |
|
424 |
+
| 0.9483 | 275 | 0.0902 | - | - |
|
425 |
+
| 1.0 | 290 | - | 0.0700 | 1.0 |
|
426 |
+
| 1.0345 | 300 | 0.0951 | - | - |
|
427 |
+
| 1.1207 | 325 | 0.0373 | - | - |
|
428 |
+
| 1.2069 | 350 | 0.086 | - | - |
|
429 |
+
| 1.2931 | 375 | 0.0418 | - | - |
|
430 |
+
| 1.3793 | 400 | 0.0522 | - | - |
|
431 |
+
| 1.4655 | 425 | 0.0387 | - | - |
|
432 |
+
| 1.5517 | 450 | 0.0217 | - | - |
|
433 |
+
| 1.6379 | 475 | 0.0455 | - | - |
|
434 |
+
| 1.7241 | 500 | 0.0424 | - | - |
|
435 |
+
| 1.8103 | 525 | 0.0238 | - | - |
|
436 |
+
| 1.8966 | 550 | 0.0355 | - | - |
|
437 |
+
| 1.9828 | 575 | 0.0283 | - | - |
|
438 |
+
| 2.0 | 580 | - | 0.0597 | 1.0 |
|
439 |
+
| 2.0690 | 600 | 0.0213 | - | - |
|
440 |
+
| 2.1552 | 625 | 0.0219 | - | - |
|
441 |
+
| 2.2414 | 650 | 0.0254 | - | - |
|
442 |
+
| 2.3276 | 675 | 0.0204 | - | - |
|
443 |
+
| 2.4138 | 700 | 0.0052 | - | - |
|
444 |
+
| 2.5 | 725 | 0.0248 | - | - |
|
445 |
+
| 2.5862 | 750 | 0.0507 | - | - |
|
446 |
+
| 2.6724 | 775 | 0.0191 | - | - |
|
447 |
+
| 2.7586 | 800 | 0.018 | - | - |
|
448 |
+
| 2.8448 | 825 | 0.0176 | - | - |
|
449 |
+
| 2.9310 | 850 | 0.0193 | - | - |
|
450 |
+
| 3.0 | 870 | - | 0.0566 | 1.0 |
|
451 |
+
|
452 |
+
|
453 |
+
### Framework Versions
|
454 |
+
- Python: 3.10.14
|
455 |
+
- Sentence Transformers: 3.0.1
|
456 |
+
- Transformers: 4.42.2
|
457 |
+
- PyTorch: 2.3.0
|
458 |
+
- Accelerate: 0.31.0
|
459 |
+
- Datasets: 2.19.1
|
460 |
+
- Tokenizers: 0.19.1
|
461 |
+
|
462 |
+
## Citation
|
463 |
+
|
464 |
+
### BibTeX
|
465 |
+
|
466 |
+
#### Sentence Transformers
|
467 |
+
```bibtex
|
468 |
+
@inproceedings{reimers-2019-sentence-bert,
|
469 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
470 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
471 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
472 |
+
month = "11",
|
473 |
+
year = "2019",
|
474 |
+
publisher = "Association for Computational Linguistics",
|
475 |
+
url = "https://arxiv.org/abs/1908.10084",
|
476 |
+
}
|
477 |
+
```
|
478 |
+
|
479 |
+
#### MultipleNegativesRankingLoss
|
480 |
+
```bibtex
|
481 |
+
@misc{henderson2017efficient,
|
482 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
483 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
484 |
+
year={2017},
|
485 |
+
eprint={1705.00652},
|
486 |
+
archivePrefix={arXiv},
|
487 |
+
primaryClass={cs.CL}
|
488 |
+
}
|
489 |
+
```
|
490 |
+
|
491 |
+
<!--
|
492 |
+
## Glossary
|
493 |
+
|
494 |
+
*Clearly define terms in order to be accessible across audiences.*
|
495 |
+
-->
|
496 |
+
|
497 |
+
<!--
|
498 |
+
## Model Card Authors
|
499 |
+
|
500 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
501 |
+
-->
|
502 |
+
|
503 |
+
<!--
|
504 |
+
## Model Card Contact
|
505 |
+
|
506 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
507 |
+
-->
|
checkpoint-870/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.42.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
checkpoint-870/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.2",
|
5 |
+
"pytorch": "2.3.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
checkpoint-870/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69780659bede833deee14ca959b873c99913be07d30b0c0c73b280eef5c9a9fd
|
3 |
+
size 90864192
|
checkpoint-870/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
checkpoint-870/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e03f6d8282b128bb663573387512f87aab2b554007f0a8a97403b6e47645874
|
3 |
+
size 180604922
|
checkpoint-870/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10aec0abd52399d06400eeec3b930f163c21faa6bc2fee365117253ee1fc74d3
|
3 |
+
size 13990
|
checkpoint-870/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:699b01bf70f7f8c2baf866acb81eb20509ed86cb04c73c7831fc212ff22c8b46
|
3 |
+
size 1064
|
checkpoint-870/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
checkpoint-870/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
checkpoint-870/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-870/tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 256,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
checkpoint-870/trainer_state.json
ADDED
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.056603044271469116,
|
3 |
+
"best_model_checkpoint": "autotrain-rp16o-pxwa0/checkpoint-870",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 870,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08620689655172414,
|
13 |
+
"grad_norm": 3.290644645690918,
|
14 |
+
"learning_rate": 8.620689655172414e-06,
|
15 |
+
"loss": 0.3631,
|
16 |
+
"step": 25
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.1724137931034483,
|
20 |
+
"grad_norm": 10.116286277770996,
|
21 |
+
"learning_rate": 1.7241379310344828e-05,
|
22 |
+
"loss": 0.1219,
|
23 |
+
"step": 50
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.25862068965517243,
|
27 |
+
"grad_norm": 0.6143497228622437,
|
28 |
+
"learning_rate": 2.586206896551724e-05,
|
29 |
+
"loss": 0.1909,
|
30 |
+
"step": 75
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.3448275862068966,
|
34 |
+
"grad_norm": 0.007788954768329859,
|
35 |
+
"learning_rate": 2.9979600208641352e-05,
|
36 |
+
"loss": 0.24,
|
37 |
+
"step": 100
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.43103448275862066,
|
41 |
+
"grad_norm": 6.688470840454102,
|
42 |
+
"learning_rate": 2.9825994400778473e-05,
|
43 |
+
"loss": 0.1607,
|
44 |
+
"step": 125
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5172413793103449,
|
48 |
+
"grad_norm": 6.3299055099487305,
|
49 |
+
"learning_rate": 2.952334410903845e-05,
|
50 |
+
"loss": 0.1103,
|
51 |
+
"step": 150
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.603448275862069,
|
55 |
+
"grad_norm": 0.13505378365516663,
|
56 |
+
"learning_rate": 2.907469185153564e-05,
|
57 |
+
"loss": 0.0952,
|
58 |
+
"step": 175
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.6896551724137931,
|
62 |
+
"grad_norm": 6.772790431976318,
|
63 |
+
"learning_rate": 2.8484547891956387e-05,
|
64 |
+
"loss": 0.1139,
|
65 |
+
"step": 200
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.7758620689655172,
|
69 |
+
"grad_norm": 2.550690174102783,
|
70 |
+
"learning_rate": 2.775884489825476e-05,
|
71 |
+
"loss": 0.1335,
|
72 |
+
"step": 225
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8620689655172413,
|
76 |
+
"grad_norm": 11.232149124145508,
|
77 |
+
"learning_rate": 2.6904878302036937e-05,
|
78 |
+
"loss": 0.0758,
|
79 |
+
"step": 250
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.9482758620689655,
|
83 |
+
"grad_norm": 1.9099515676498413,
|
84 |
+
"learning_rate": 2.5931232958196343e-05,
|
85 |
+
"loss": 0.0902,
|
86 |
+
"step": 275
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.0,
|
90 |
+
"eval_cosine_accuracy": 1.0,
|
91 |
+
"eval_dot_accuracy": 0.0,
|
92 |
+
"eval_euclidean_accuracy": 1.0,
|
93 |
+
"eval_loss": 0.06996160000562668,
|
94 |
+
"eval_manhattan_accuracy": 1.0,
|
95 |
+
"eval_max_accuracy": 1.0,
|
96 |
+
"eval_runtime": 44.8595,
|
97 |
+
"eval_samples_per_second": 12.929,
|
98 |
+
"eval_steps_per_second": 0.825,
|
99 |
+
"step": 290
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 1.0344827586206897,
|
103 |
+
"grad_norm": 0.029423370957374573,
|
104 |
+
"learning_rate": 2.48476968420842e-05,
|
105 |
+
"loss": 0.0951,
|
106 |
+
"step": 300
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"epoch": 1.1206896551724137,
|
110 |
+
"grad_norm": 0.830912709236145,
|
111 |
+
"learning_rate": 2.3665162651810512e-05,
|
112 |
+
"loss": 0.0373,
|
113 |
+
"step": 325
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 1.206896551724138,
|
117 |
+
"grad_norm": 8.924799919128418,
|
118 |
+
"learning_rate": 2.2395518304859387e-05,
|
119 |
+
"loss": 0.086,
|
120 |
+
"step": 350
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 1.293103448275862,
|
124 |
+
"grad_norm": 8.515233039855957,
|
125 |
+
"learning_rate": 2.105152742984713e-05,
|
126 |
+
"loss": 0.0418,
|
127 |
+
"step": 375
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"epoch": 1.3793103448275863,
|
131 |
+
"grad_norm": 0.07946328073740005,
|
132 |
+
"learning_rate": 1.964670105482938e-05,
|
133 |
+
"loss": 0.0522,
|
134 |
+
"step": 400
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 1.4655172413793103,
|
138 |
+
"grad_norm": 0.027938440442085266,
|
139 |
+
"learning_rate": 1.8195161782064143e-05,
|
140 |
+
"loss": 0.0387,
|
141 |
+
"step": 425
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"epoch": 1.5517241379310345,
|
145 |
+
"grad_norm": 0.20512813329696655,
|
146 |
+
"learning_rate": 1.6711501814670373e-05,
|
147 |
+
"loss": 0.0217,
|
148 |
+
"step": 450
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"epoch": 1.6379310344827587,
|
152 |
+
"grad_norm": 0.04595763236284256,
|
153 |
+
"learning_rate": 1.5210636262428347e-05,
|
154 |
+
"loss": 0.0455,
|
155 |
+
"step": 475
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"epoch": 1.7241379310344827,
|
159 |
+
"grad_norm": 0.002501540817320347,
|
160 |
+
"learning_rate": 1.3707653201426321e-05,
|
161 |
+
"loss": 0.0424,
|
162 |
+
"step": 500
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"epoch": 1.8103448275862069,
|
166 |
+
"grad_norm": 0.4840604364871979,
|
167 |
+
"learning_rate": 1.2217661994891308e-05,
|
168 |
+
"loss": 0.0238,
|
169 |
+
"step": 525
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"epoch": 1.896551724137931,
|
173 |
+
"grad_norm": 13.134848594665527,
|
174 |
+
"learning_rate": 1.075564140002207e-05,
|
175 |
+
"loss": 0.0355,
|
176 |
+
"step": 550
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.9827586206896552,
|
180 |
+
"grad_norm": 1.2007379531860352,
|
181 |
+
"learning_rate": 9.33628898779359e-06,
|
182 |
+
"loss": 0.0283,
|
183 |
+
"step": 575
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"epoch": 2.0,
|
187 |
+
"eval_cosine_accuracy": 1.0,
|
188 |
+
"eval_dot_accuracy": 0.0,
|
189 |
+
"eval_euclidean_accuracy": 1.0,
|
190 |
+
"eval_loss": 0.059745438396930695,
|
191 |
+
"eval_manhattan_accuracy": 1.0,
|
192 |
+
"eval_max_accuracy": 1.0,
|
193 |
+
"eval_runtime": 45.055,
|
194 |
+
"eval_samples_per_second": 12.873,
|
195 |
+
"eval_steps_per_second": 0.821,
|
196 |
+
"step": 580
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 2.0689655172413794,
|
200 |
+
"grad_norm": 0.010960499756038189,
|
201 |
+
"learning_rate": 7.97387338950315e-06,
|
202 |
+
"loss": 0.0213,
|
203 |
+
"step": 600
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 2.1551724137931036,
|
207 |
+
"grad_norm": 0.004453401546925306,
|
208 |
+
"learning_rate": 6.682090855411221e-06,
|
209 |
+
"loss": 0.0219,
|
210 |
+
"step": 625
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"epoch": 2.2413793103448274,
|
214 |
+
"grad_norm": 7.022181987762451,
|
215 |
+
"learning_rate": 5.473927567481096e-06,
|
216 |
+
"loss": 0.0254,
|
217 |
+
"step": 650
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"epoch": 2.3275862068965516,
|
221 |
+
"grad_norm": 1.2289260625839233,
|
222 |
+
"learning_rate": 4.361529090375834e-06,
|
223 |
+
"loss": 0.0204,
|
224 |
+
"step": 675
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 2.413793103448276,
|
228 |
+
"grad_norm": 0.006588762626051903,
|
229 |
+
"learning_rate": 3.35607827311076e-06,
|
230 |
+
"loss": 0.0052,
|
231 |
+
"step": 700
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"epoch": 2.5,
|
235 |
+
"grad_norm": 2.5081865787506104,
|
236 |
+
"learning_rate": 2.467682828805956e-06,
|
237 |
+
"loss": 0.0248,
|
238 |
+
"step": 725
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"epoch": 2.586206896551724,
|
242 |
+
"grad_norm": 17.075759887695312,
|
243 |
+
"learning_rate": 1.7052737226901876e-06,
|
244 |
+
"loss": 0.0507,
|
245 |
+
"step": 750
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"epoch": 2.6724137931034484,
|
249 |
+
"grad_norm": 0.9192395806312561,
|
250 |
+
"learning_rate": 1.0765153898531083e-06,
|
251 |
+
"loss": 0.0191,
|
252 |
+
"step": 775
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"epoch": 2.7586206896551726,
|
256 |
+
"grad_norm": 0.03410585597157478,
|
257 |
+
"learning_rate": 5.877286853191999e-07,
|
258 |
+
"loss": 0.018,
|
259 |
+
"step": 800
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"epoch": 2.844827586206897,
|
263 |
+
"grad_norm": 0.09378518909215927,
|
264 |
+
"learning_rate": 2.438273410199598e-07,
|
265 |
+
"loss": 0.0176,
|
266 |
+
"step": 825
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"epoch": 2.9310344827586206,
|
270 |
+
"grad_norm": 0.024510715156793594,
|
271 |
+
"learning_rate": 4.826856845703165e-08,
|
272 |
+
"loss": 0.0193,
|
273 |
+
"step": 850
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"epoch": 3.0,
|
277 |
+
"eval_cosine_accuracy": 1.0,
|
278 |
+
"eval_dot_accuracy": 0.0,
|
279 |
+
"eval_euclidean_accuracy": 1.0,
|
280 |
+
"eval_loss": 0.056603044271469116,
|
281 |
+
"eval_manhattan_accuracy": 1.0,
|
282 |
+
"eval_max_accuracy": 1.0,
|
283 |
+
"eval_runtime": 43.366,
|
284 |
+
"eval_samples_per_second": 13.375,
|
285 |
+
"eval_steps_per_second": 0.853,
|
286 |
+
"step": 870
|
287 |
+
}
|
288 |
+
],
|
289 |
+
"logging_steps": 25,
|
290 |
+
"max_steps": 870,
|
291 |
+
"num_input_tokens_seen": 0,
|
292 |
+
"num_train_epochs": 3,
|
293 |
+
"save_steps": 500,
|
294 |
+
"stateful_callbacks": {
|
295 |
+
"EarlyStoppingCallback": {
|
296 |
+
"args": {
|
297 |
+
"early_stopping_patience": 5,
|
298 |
+
"early_stopping_threshold": 0.01
|
299 |
+
},
|
300 |
+
"attributes": {
|
301 |
+
"early_stopping_patience_counter": 0
|
302 |
+
}
|
303 |
+
},
|
304 |
+
"TrainerControl": {
|
305 |
+
"args": {
|
306 |
+
"should_epoch_stop": false,
|
307 |
+
"should_evaluate": false,
|
308 |
+
"should_log": false,
|
309 |
+
"should_save": true,
|
310 |
+
"should_training_stop": true
|
311 |
+
},
|
312 |
+
"attributes": {}
|
313 |
+
}
|
314 |
+
},
|
315 |
+
"total_flos": 0.0,
|
316 |
+
"train_batch_size": 8,
|
317 |
+
"trial_name": null,
|
318 |
+
"trial_params": null
|
319 |
+
}
|
checkpoint-870/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a06674a005f23df1e09ea7e154679be6e3365d4bc0690dd5d156589705444192
|
3 |
+
size 5368
|
checkpoint-870/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.42.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.42.2",
|
5 |
+
"pytorch": "2.3.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69780659bede833deee14ca959b873c99913be07d30b0c0c73b280eef5c9a9fd
|
3 |
+
size 90864192
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
runs/Jun28_17-15-52_r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg/events.out.tfevents.1719594954.r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg.100.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57386aed1a8b3c333cd78582f5bebebea4f2d00fc1d6a82bcfd30d20531ddb23
|
3 |
+
size 13439
|
runs/Jun28_17-15-52_r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg/events.out.tfevents.1719596615.r-shauryanova-fashio-f0iyutqz-dc8a7-17rpg.100.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75de8ae06f52c3a1b48b5e34503358ba4d841c6c3faa31748042d8d1222f1e96
|
3 |
+
size 654
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 256,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a06674a005f23df1e09ea7e154679be6e3365d4bc0690dd5d156589705444192
|
3 |
+
size 5368
|
training_params.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"data_path": "autotrain-rp16o-pxwa0/autotrain-data",
|
3 |
+
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
4 |
+
"lr": 3e-05,
|
5 |
+
"epochs": 3,
|
6 |
+
"max_seq_length": 128,
|
7 |
+
"batch_size": 8,
|
8 |
+
"warmup_ratio": 0.1,
|
9 |
+
"gradient_accumulation": 1,
|
10 |
+
"optimizer": "adamw_torch",
|
11 |
+
"scheduler": "cosine",
|
12 |
+
"weight_decay": 0.0,
|
13 |
+
"max_grad_norm": 1.0,
|
14 |
+
"seed": 42,
|
15 |
+
"train_split": "train",
|
16 |
+
"valid_split": "validation",
|
17 |
+
"logging_steps": -1,
|
18 |
+
"project_name": "autotrain-rp16o-pxwa0",
|
19 |
+
"auto_find_batch_size": false,
|
20 |
+
"mixed_precision": "none",
|
21 |
+
"save_total_limit": 1,
|
22 |
+
"push_to_hub": true,
|
23 |
+
"eval_strategy": "epoch",
|
24 |
+
"username": "ShauryaNova",
|
25 |
+
"log": "tensorboard",
|
26 |
+
"early_stopping_patience": 5,
|
27 |
+
"early_stopping_threshold": 0.01,
|
28 |
+
"trainer": "triplet",
|
29 |
+
"sentence1_column": "autotrain_sentence1",
|
30 |
+
"sentence2_column": "autotrain_sentence2",
|
31 |
+
"sentence3_column": "autotrain_sentence3",
|
32 |
+
"target_column": "autotrain_target"
|
33 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|