Upload 13 files
Browse files- README.md +247 -211
- config_sentence_transformers.json +2 -2
- model.onnx +1 -1
- model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
---
|
2 |
base_model: Alibaba-NLP/gte-base-en-v1.5
|
3 |
-
datasets: []
|
4 |
language:
|
5 |
- en
|
6 |
library_name: sentence-transformers
|
@@ -75,7 +74,7 @@ widget:
|
|
75 |
- What lessons can be learned from the historical context of employee relations
|
76 |
in large corporations?
|
77 |
model-index:
|
78 |
-
- name:
|
79 |
results:
|
80 |
- task:
|
81 |
type: information-retrieval
|
@@ -85,49 +84,49 @@ model-index:
|
|
85 |
type: dim_768
|
86 |
metrics:
|
87 |
- type: cosine_accuracy@1
|
88 |
-
value: 0.
|
89 |
name: Cosine Accuracy@1
|
90 |
- type: cosine_accuracy@3
|
91 |
-
value: 0.
|
92 |
name: Cosine Accuracy@3
|
93 |
- type: cosine_accuracy@5
|
94 |
-
value: 0.
|
95 |
name: Cosine Accuracy@5
|
96 |
- type: cosine_accuracy@10
|
97 |
-
value: 0.
|
98 |
name: Cosine Accuracy@10
|
99 |
- type: cosine_precision@1
|
100 |
-
value: 0.
|
101 |
name: Cosine Precision@1
|
102 |
- type: cosine_precision@3
|
103 |
-
value: 0.
|
104 |
name: Cosine Precision@3
|
105 |
- type: cosine_precision@5
|
106 |
-
value: 0.
|
107 |
name: Cosine Precision@5
|
108 |
- type: cosine_precision@10
|
109 |
-
value: 0.
|
110 |
name: Cosine Precision@10
|
111 |
- type: cosine_recall@1
|
112 |
-
value: 0.
|
113 |
name: Cosine Recall@1
|
114 |
- type: cosine_recall@3
|
115 |
-
value: 0.
|
116 |
name: Cosine Recall@3
|
117 |
- type: cosine_recall@5
|
118 |
-
value: 0.
|
119 |
name: Cosine Recall@5
|
120 |
- type: cosine_recall@10
|
121 |
-
value: 0.
|
122 |
name: Cosine Recall@10
|
123 |
- type: cosine_ndcg@10
|
124 |
-
value: 0.
|
125 |
name: Cosine Ndcg@10
|
126 |
- type: cosine_mrr@10
|
127 |
-
value: 0.
|
128 |
name: Cosine Mrr@10
|
129 |
- type: cosine_map@100
|
130 |
-
value: 0.
|
131 |
name: Cosine Map@100
|
132 |
- task:
|
133 |
type: information-retrieval
|
@@ -137,49 +136,49 @@ model-index:
|
|
137 |
type: dim_512
|
138 |
metrics:
|
139 |
- type: cosine_accuracy@1
|
140 |
-
value: 0.
|
141 |
name: Cosine Accuracy@1
|
142 |
- type: cosine_accuracy@3
|
143 |
-
value: 0.
|
144 |
name: Cosine Accuracy@3
|
145 |
- type: cosine_accuracy@5
|
146 |
-
value: 0.
|
147 |
name: Cosine Accuracy@5
|
148 |
- type: cosine_accuracy@10
|
149 |
-
value: 0.
|
150 |
name: Cosine Accuracy@10
|
151 |
- type: cosine_precision@1
|
152 |
-
value: 0.
|
153 |
name: Cosine Precision@1
|
154 |
- type: cosine_precision@3
|
155 |
-
value: 0.
|
156 |
name: Cosine Precision@3
|
157 |
- type: cosine_precision@5
|
158 |
-
value: 0.
|
159 |
name: Cosine Precision@5
|
160 |
- type: cosine_precision@10
|
161 |
-
value: 0.
|
162 |
name: Cosine Precision@10
|
163 |
- type: cosine_recall@1
|
164 |
-
value: 0.
|
165 |
name: Cosine Recall@1
|
166 |
- type: cosine_recall@3
|
167 |
-
value: 0.
|
168 |
name: Cosine Recall@3
|
169 |
- type: cosine_recall@5
|
170 |
-
value: 0.
|
171 |
name: Cosine Recall@5
|
172 |
- type: cosine_recall@10
|
173 |
-
value: 0.
|
174 |
name: Cosine Recall@10
|
175 |
- type: cosine_ndcg@10
|
176 |
-
value: 0.
|
177 |
name: Cosine Ndcg@10
|
178 |
- type: cosine_mrr@10
|
179 |
-
value: 0.
|
180 |
name: Cosine Mrr@10
|
181 |
- type: cosine_map@100
|
182 |
-
value: 0.
|
183 |
name: Cosine Map@100
|
184 |
- task:
|
185 |
type: information-retrieval
|
@@ -189,49 +188,49 @@ model-index:
|
|
189 |
type: dim_256
|
190 |
metrics:
|
191 |
- type: cosine_accuracy@1
|
192 |
-
value: 0.
|
193 |
name: Cosine Accuracy@1
|
194 |
- type: cosine_accuracy@3
|
195 |
-
value: 0.
|
196 |
name: Cosine Accuracy@3
|
197 |
- type: cosine_accuracy@5
|
198 |
-
value: 0.
|
199 |
name: Cosine Accuracy@5
|
200 |
- type: cosine_accuracy@10
|
201 |
-
value: 0.
|
202 |
name: Cosine Accuracy@10
|
203 |
- type: cosine_precision@1
|
204 |
-
value: 0.
|
205 |
name: Cosine Precision@1
|
206 |
- type: cosine_precision@3
|
207 |
-
value: 0.
|
208 |
name: Cosine Precision@3
|
209 |
- type: cosine_precision@5
|
210 |
-
value: 0.
|
211 |
name: Cosine Precision@5
|
212 |
- type: cosine_precision@10
|
213 |
-
value: 0.
|
214 |
name: Cosine Precision@10
|
215 |
- type: cosine_recall@1
|
216 |
-
value: 0.
|
217 |
name: Cosine Recall@1
|
218 |
- type: cosine_recall@3
|
219 |
-
value: 0.
|
220 |
name: Cosine Recall@3
|
221 |
- type: cosine_recall@5
|
222 |
-
value: 0.
|
223 |
name: Cosine Recall@5
|
224 |
- type: cosine_recall@10
|
225 |
-
value: 0.
|
226 |
name: Cosine Recall@10
|
227 |
- type: cosine_ndcg@10
|
228 |
-
value: 0.
|
229 |
name: Cosine Ndcg@10
|
230 |
- type: cosine_mrr@10
|
231 |
-
value: 0.
|
232 |
name: Cosine Mrr@10
|
233 |
- type: cosine_map@100
|
234 |
-
value: 0.
|
235 |
name: Cosine Map@100
|
236 |
- task:
|
237 |
type: information-retrieval
|
@@ -241,49 +240,49 @@ model-index:
|
|
241 |
type: dim_128
|
242 |
metrics:
|
243 |
- type: cosine_accuracy@1
|
244 |
-
value: 0.
|
245 |
name: Cosine Accuracy@1
|
246 |
- type: cosine_accuracy@3
|
247 |
-
value: 0.
|
248 |
name: Cosine Accuracy@3
|
249 |
- type: cosine_accuracy@5
|
250 |
-
value: 0.
|
251 |
name: Cosine Accuracy@5
|
252 |
- type: cosine_accuracy@10
|
253 |
-
value: 0.
|
254 |
name: Cosine Accuracy@10
|
255 |
- type: cosine_precision@1
|
256 |
-
value: 0.
|
257 |
name: Cosine Precision@1
|
258 |
- type: cosine_precision@3
|
259 |
-
value: 0.
|
260 |
name: Cosine Precision@3
|
261 |
- type: cosine_precision@5
|
262 |
-
value: 0.
|
263 |
name: Cosine Precision@5
|
264 |
- type: cosine_precision@10
|
265 |
-
value: 0.
|
266 |
name: Cosine Precision@10
|
267 |
- type: cosine_recall@1
|
268 |
-
value: 0.
|
269 |
name: Cosine Recall@1
|
270 |
- type: cosine_recall@3
|
271 |
-
value: 0.
|
272 |
name: Cosine Recall@3
|
273 |
- type: cosine_recall@5
|
274 |
-
value: 0.
|
275 |
name: Cosine Recall@5
|
276 |
- type: cosine_recall@10
|
277 |
-
value: 0.
|
278 |
name: Cosine Recall@10
|
279 |
- type: cosine_ndcg@10
|
280 |
-
value: 0.
|
281 |
name: Cosine Ndcg@10
|
282 |
- type: cosine_mrr@10
|
283 |
-
value: 0.
|
284 |
name: Cosine Mrr@10
|
285 |
- type: cosine_map@100
|
286 |
-
value: 0.
|
287 |
name: Cosine Map@100
|
288 |
- task:
|
289 |
type: information-retrieval
|
@@ -293,55 +292,55 @@ model-index:
|
|
293 |
type: dim_64
|
294 |
metrics:
|
295 |
- type: cosine_accuracy@1
|
296 |
-
value: 0.
|
297 |
name: Cosine Accuracy@1
|
298 |
- type: cosine_accuracy@3
|
299 |
-
value: 0.
|
300 |
name: Cosine Accuracy@3
|
301 |
- type: cosine_accuracy@5
|
302 |
-
value: 0.
|
303 |
name: Cosine Accuracy@5
|
304 |
- type: cosine_accuracy@10
|
305 |
-
value: 0.
|
306 |
name: Cosine Accuracy@10
|
307 |
- type: cosine_precision@1
|
308 |
-
value: 0.
|
309 |
name: Cosine Precision@1
|
310 |
- type: cosine_precision@3
|
311 |
-
value: 0.
|
312 |
name: Cosine Precision@3
|
313 |
- type: cosine_precision@5
|
314 |
-
value: 0.
|
315 |
name: Cosine Precision@5
|
316 |
- type: cosine_precision@10
|
317 |
-
value: 0.
|
318 |
name: Cosine Precision@10
|
319 |
- type: cosine_recall@1
|
320 |
-
value: 0.
|
321 |
name: Cosine Recall@1
|
322 |
- type: cosine_recall@3
|
323 |
-
value: 0.
|
324 |
name: Cosine Recall@3
|
325 |
- type: cosine_recall@5
|
326 |
-
value: 0.
|
327 |
name: Cosine Recall@5
|
328 |
- type: cosine_recall@10
|
329 |
-
value: 0.
|
330 |
name: Cosine Recall@10
|
331 |
- type: cosine_ndcg@10
|
332 |
-
value: 0.
|
333 |
name: Cosine Ndcg@10
|
334 |
- type: cosine_mrr@10
|
335 |
-
value: 0.
|
336 |
name: Cosine Mrr@10
|
337 |
- type: cosine_map@100
|
338 |
-
value: 0.
|
339 |
name: Cosine Map@100
|
340 |
---
|
341 |
|
342 |
-
#
|
343 |
|
344 |
-
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
345 |
|
346 |
## Model Details
|
347 |
|
@@ -351,7 +350,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
|
|
351 |
- **Maximum Sequence Length:** 8192 tokens
|
352 |
- **Output Dimensionality:** 768 tokens
|
353 |
- **Similarity Function:** Cosine Similarity
|
354 |
-
|
|
|
355 |
- **Language:** en
|
356 |
- **License:** apache-2.0
|
357 |
|
@@ -436,21 +436,21 @@ You can finetune this model on your own dataset.
|
|
436 |
|
437 |
| Metric | Value |
|
438 |
|:--------------------|:-----------|
|
439 |
-
| cosine_accuracy@1 | 0.
|
440 |
-
| cosine_accuracy@3 | 0.
|
441 |
-
| cosine_accuracy@5 | 0.
|
442 |
-
| cosine_accuracy@10 | 0.
|
443 |
-
| cosine_precision@1 | 0.
|
444 |
-
| cosine_precision@3 | 0.
|
445 |
-
| cosine_precision@5 | 0.
|
446 |
-
| cosine_precision@10 | 0.
|
447 |
-
| cosine_recall@1 | 0.
|
448 |
-
| cosine_recall@3 | 0.
|
449 |
-
| cosine_recall@5 | 0.
|
450 |
-
| cosine_recall@10 | 0.
|
451 |
-
| cosine_ndcg@10 | 0.
|
452 |
-
| cosine_mrr@10 | 0.
|
453 |
-
| **cosine_map@100** | **0.
|
454 |
|
455 |
#### Information Retrieval
|
456 |
* Dataset: `dim_512`
|
@@ -458,43 +458,43 @@ You can finetune this model on your own dataset.
|
|
458 |
|
459 |
| Metric | Value |
|
460 |
|:--------------------|:-----------|
|
461 |
-
| cosine_accuracy@1 | 0.
|
462 |
-
| cosine_accuracy@3 | 0.
|
463 |
-
| cosine_accuracy@5 | 0.
|
464 |
-
| cosine_accuracy@10 | 0.
|
465 |
-
| cosine_precision@1 | 0.
|
466 |
-
| cosine_precision@3 | 0.
|
467 |
-
| cosine_precision@5 | 0.
|
468 |
-
| cosine_precision@10 | 0.
|
469 |
-
| cosine_recall@1 | 0.
|
470 |
-
| cosine_recall@3 | 0.
|
471 |
-
| cosine_recall@5 | 0.
|
472 |
-
| cosine_recall@10 | 0.
|
473 |
-
| cosine_ndcg@10 | 0.
|
474 |
-
| cosine_mrr@10 | 0.
|
475 |
-
| **cosine_map@100** | **0.
|
476 |
|
477 |
#### Information Retrieval
|
478 |
* Dataset: `dim_256`
|
479 |
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
480 |
|
481 |
-
| Metric | Value
|
482 |
-
|
483 |
-
| cosine_accuracy@1 | 0.
|
484 |
-
| cosine_accuracy@3 | 0.
|
485 |
-
| cosine_accuracy@5 | 0.
|
486 |
-
| cosine_accuracy@10 | 0.
|
487 |
-
| cosine_precision@1 | 0.
|
488 |
-
| cosine_precision@3 | 0.
|
489 |
-
| cosine_precision@5 | 0.
|
490 |
-
| cosine_precision@10 | 0.
|
491 |
-
| cosine_recall@1 | 0.
|
492 |
-
| cosine_recall@3 | 0.
|
493 |
-
| cosine_recall@5 | 0.
|
494 |
-
| cosine_recall@10 | 0.
|
495 |
-
| cosine_ndcg@10 | 0.
|
496 |
-
| cosine_mrr@10 | 0.
|
497 |
-
| **cosine_map@100** | **0.
|
498 |
|
499 |
#### Information Retrieval
|
500 |
* Dataset: `dim_128`
|
@@ -502,21 +502,21 @@ You can finetune this model on your own dataset.
|
|
502 |
|
503 |
| Metric | Value |
|
504 |
|:--------------------|:-----------|
|
505 |
-
| cosine_accuracy@1 | 0.
|
506 |
-
| cosine_accuracy@3 | 0.
|
507 |
-
| cosine_accuracy@5 | 0.
|
508 |
-
| cosine_accuracy@10 | 0.
|
509 |
-
| cosine_precision@1 | 0.
|
510 |
-
| cosine_precision@3 | 0.
|
511 |
-
| cosine_precision@5 | 0.
|
512 |
-
| cosine_precision@10 | 0.
|
513 |
-
| cosine_recall@1 | 0.
|
514 |
-
| cosine_recall@3 | 0.
|
515 |
-
| cosine_recall@5 | 0.
|
516 |
-
| cosine_recall@10 | 0.
|
517 |
-
| cosine_ndcg@10 | 0.
|
518 |
-
| cosine_mrr@10 | 0.
|
519 |
-
| **cosine_map@100** | **0.
|
520 |
|
521 |
#### Information Retrieval
|
522 |
* Dataset: `dim_64`
|
@@ -524,21 +524,21 @@ You can finetune this model on your own dataset.
|
|
524 |
|
525 |
| Metric | Value |
|
526 |
|:--------------------|:-----------|
|
527 |
-
| cosine_accuracy@1 | 0.
|
528 |
-
| cosine_accuracy@3 | 0.
|
529 |
-
| cosine_accuracy@5 | 0.
|
530 |
-
| cosine_accuracy@10 | 0.
|
531 |
-
| cosine_precision@1 | 0.
|
532 |
-
| cosine_precision@3 | 0.
|
533 |
-
| cosine_precision@5 | 0.
|
534 |
-
| cosine_precision@10 | 0.
|
535 |
-
| cosine_recall@1 | 0.
|
536 |
-
| cosine_recall@3 | 0.
|
537 |
-
| cosine_recall@5 | 0.
|
538 |
-
| cosine_recall@10 | 0.
|
539 |
-
| cosine_ndcg@10 | 0.
|
540 |
-
| cosine_mrr@10 | 0.
|
541 |
-
| **cosine_map@100** | **0.
|
542 |
|
543 |
<!--
|
544 |
## Bias, Risks and Limitations
|
@@ -556,9 +556,9 @@ You can finetune this model on your own dataset.
|
|
556 |
|
557 |
### Training Dataset
|
558 |
|
559 |
-
####
|
560 |
-
|
561 |
|
|
|
562 |
* Size: 32,833 training samples
|
563 |
* Columns: <code>positive</code> and <code>anchor</code>
|
564 |
* Approximate statistics based on the first 1000 samples:
|
@@ -598,11 +598,11 @@ You can finetune this model on your own dataset.
|
|
598 |
#### Non-Default Hyperparameters
|
599 |
|
600 |
- `eval_strategy`: epoch
|
601 |
-
- `per_device_train_batch_size`:
|
602 |
-
- `per_device_eval_batch_size`:
|
603 |
-
- `gradient_accumulation_steps`:
|
604 |
-
- `learning_rate`:
|
605 |
-
- `num_train_epochs`:
|
606 |
- `lr_scheduler_type`: cosine
|
607 |
- `warmup_ratio`: 0.1
|
608 |
- `bf16`: True
|
@@ -616,20 +616,20 @@ You can finetune this model on your own dataset.
|
|
616 |
- `do_predict`: False
|
617 |
- `eval_strategy`: epoch
|
618 |
- `prediction_loss_only`: True
|
619 |
-
- `per_device_train_batch_size`:
|
620 |
-
- `per_device_eval_batch_size`:
|
621 |
- `per_gpu_train_batch_size`: None
|
622 |
- `per_gpu_eval_batch_size`: None
|
623 |
-
- `gradient_accumulation_steps`:
|
624 |
- `eval_accumulation_steps`: None
|
625 |
- `torch_empty_cache_steps`: None
|
626 |
-
- `learning_rate`:
|
627 |
- `weight_decay`: 0.0
|
628 |
- `adam_beta1`: 0.9
|
629 |
- `adam_beta2`: 0.999
|
630 |
- `adam_epsilon`: 1e-08
|
631 |
- `max_grad_norm`: 1.0
|
632 |
-
- `num_train_epochs`:
|
633 |
- `max_steps`: -1
|
634 |
- `lr_scheduler_type`: cosine
|
635 |
- `lr_scheduler_kwargs`: {}
|
@@ -727,52 +727,88 @@ You can finetune this model on your own dataset.
|
|
727 |
</details>
|
728 |
|
729 |
### Training Logs
|
730 |
-
| Epoch | Step
|
731 |
-
|
732 |
-
| 0.
|
733 |
-
| 0.
|
734 |
-
| 0.
|
735 |
-
| 0.
|
736 |
-
| 0.
|
737 |
-
| 0.
|
738 |
-
|
|
739 |
-
|
|
740 |
-
|
|
741 |
-
|
|
742 |
-
|
|
743 |
-
|
|
744 |
-
|
|
745 |
-
|
|
746 |
-
|
|
747 |
-
|
|
748 |
-
|
|
749 |
-
|
|
750 |
-
|
|
751 |
-
|
|
752 |
-
|
|
753 |
-
|
|
754 |
-
|
|
755 |
-
|
|
756 |
-
|
|
757 |
-
|
|
758 |
-
|
|
759 |
-
|
|
760 |
-
|
|
761 |
-
|
|
762 |
-
|
|
763 |
-
|
|
764 |
-
|
|
765 |
-
|
|
766 |
-
|
|
767 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
|
769 |
* The bold row denotes the saved checkpoint.
|
770 |
|
771 |
### Framework Versions
|
772 |
- Python: 3.12.5
|
773 |
-
- Sentence Transformers: 3.
|
774 |
- Transformers: 4.44.2
|
775 |
-
- PyTorch: 2.4.
|
776 |
- Accelerate: 0.33.0
|
777 |
- Datasets: 2.21.0
|
778 |
- Tokenizers: 0.19.1
|
@@ -797,7 +833,7 @@ You can finetune this model on your own dataset.
|
|
797 |
#### MatryoshkaLoss
|
798 |
```bibtex
|
799 |
@misc{kusupati2024matryoshka,
|
800 |
-
title={Matryoshka Representation Learning},
|
801 |
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
802 |
year={2024},
|
803 |
eprint={2205.13147},
|
@@ -809,7 +845,7 @@ You can finetune this model on your own dataset.
|
|
809 |
#### MultipleNegativesRankingLoss
|
810 |
```bibtex
|
811 |
@misc{henderson2017efficient,
|
812 |
-
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
813 |
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
814 |
year={2017},
|
815 |
eprint={1705.00652},
|
|
|
1 |
---
|
2 |
base_model: Alibaba-NLP/gte-base-en-v1.5
|
|
|
3 |
language:
|
4 |
- en
|
5 |
library_name: sentence-transformers
|
|
|
74 |
- What lessons can be learned from the historical context of employee relations
|
75 |
in large corporations?
|
76 |
model-index:
|
77 |
+
- name: Alchemy Embedding - Anudit Nagar
|
78 |
results:
|
79 |
- task:
|
80 |
type: information-retrieval
|
|
|
84 |
type: dim_768
|
85 |
metrics:
|
86 |
- type: cosine_accuracy@1
|
87 |
+
value: 0.782012613106663
|
88 |
name: Cosine Accuracy@1
|
89 |
- type: cosine_accuracy@3
|
90 |
+
value: 0.8889498217713189
|
91 |
name: Cosine Accuracy@3
|
92 |
- type: cosine_accuracy@5
|
93 |
+
value: 0.9248697559638058
|
94 |
name: Cosine Accuracy@5
|
95 |
- type: cosine_accuracy@10
|
96 |
+
value: 0.9520153550863724
|
97 |
name: Cosine Accuracy@10
|
98 |
- type: cosine_precision@1
|
99 |
+
value: 0.782012613106663
|
100 |
name: Cosine Precision@1
|
101 |
- type: cosine_precision@3
|
102 |
+
value: 0.29631660725710623
|
103 |
name: Cosine Precision@3
|
104 |
- type: cosine_precision@5
|
105 |
+
value: 0.1849739511927612
|
106 |
name: Cosine Precision@5
|
107 |
- type: cosine_precision@10
|
108 |
+
value: 0.09520153550863725
|
109 |
name: Cosine Precision@10
|
110 |
- type: cosine_recall@1
|
111 |
+
value: 0.782012613106663
|
112 |
name: Cosine Recall@1
|
113 |
- type: cosine_recall@3
|
114 |
+
value: 0.8889498217713189
|
115 |
name: Cosine Recall@3
|
116 |
- type: cosine_recall@5
|
117 |
+
value: 0.9248697559638058
|
118 |
name: Cosine Recall@5
|
119 |
- type: cosine_recall@10
|
120 |
+
value: 0.9520153550863724
|
121 |
name: Cosine Recall@10
|
122 |
- type: cosine_ndcg@10
|
123 |
+
value: 0.867555587052628
|
124 |
name: Cosine Ndcg@10
|
125 |
- type: cosine_mrr@10
|
126 |
+
value: 0.8402608580220322
|
127 |
name: Cosine Mrr@10
|
128 |
- type: cosine_map@100
|
129 |
+
value: 0.8422322227138224
|
130 |
name: Cosine Map@100
|
131 |
- task:
|
132 |
type: information-retrieval
|
|
|
136 |
type: dim_512
|
137 |
metrics:
|
138 |
- type: cosine_accuracy@1
|
139 |
+
value: 0.780367425281053
|
140 |
name: Cosine Accuracy@1
|
141 |
- type: cosine_accuracy@3
|
142 |
+
value: 0.8848368522072937
|
143 |
name: Cosine Accuracy@3
|
144 |
- type: cosine_accuracy@5
|
145 |
+
value: 0.9221277762544557
|
146 |
name: Cosine Accuracy@5
|
147 |
- type: cosine_accuracy@10
|
148 |
+
value: 0.9514669591445023
|
149 |
name: Cosine Accuracy@10
|
150 |
- type: cosine_precision@1
|
151 |
+
value: 0.780367425281053
|
152 |
name: Cosine Precision@1
|
153 |
- type: cosine_precision@3
|
154 |
+
value: 0.2949456174024312
|
155 |
name: Cosine Precision@3
|
156 |
- type: cosine_precision@5
|
157 |
+
value: 0.1844255552508912
|
158 |
name: Cosine Precision@5
|
159 |
- type: cosine_precision@10
|
160 |
+
value: 0.09514669591445023
|
161 |
name: Cosine Precision@10
|
162 |
- type: cosine_recall@1
|
163 |
+
value: 0.780367425281053
|
164 |
name: Cosine Recall@1
|
165 |
- type: cosine_recall@3
|
166 |
+
value: 0.8848368522072937
|
167 |
name: Cosine Recall@3
|
168 |
- type: cosine_recall@5
|
169 |
+
value: 0.9221277762544557
|
170 |
name: Cosine Recall@5
|
171 |
- type: cosine_recall@10
|
172 |
+
value: 0.9514669591445023
|
173 |
name: Cosine Recall@10
|
174 |
- type: cosine_ndcg@10
|
175 |
+
value: 0.8661558392165704
|
176 |
name: Cosine Ndcg@10
|
177 |
- type: cosine_mrr@10
|
178 |
+
value: 0.838656038231032
|
179 |
name: Cosine Mrr@10
|
180 |
- type: cosine_map@100
|
181 |
+
value: 0.8405372438205077
|
182 |
name: Cosine Map@100
|
183 |
- task:
|
184 |
type: information-retrieval
|
|
|
188 |
type: dim_256
|
189 |
metrics:
|
190 |
- type: cosine_accuracy@1
|
191 |
+
value: 0.7754318618042226
|
192 |
name: Cosine Accuracy@1
|
193 |
- type: cosine_accuracy@3
|
194 |
+
value: 0.8804496846723334
|
195 |
name: Cosine Accuracy@3
|
196 |
- type: cosine_accuracy@5
|
197 |
+
value: 0.9169180148066904
|
198 |
name: Cosine Accuracy@5
|
199 |
- type: cosine_accuracy@10
|
200 |
+
value: 0.9468055936386071
|
201 |
name: Cosine Accuracy@10
|
202 |
- type: cosine_precision@1
|
203 |
+
value: 0.7754318618042226
|
204 |
name: Cosine Precision@1
|
205 |
- type: cosine_precision@3
|
206 |
+
value: 0.2934832282241111
|
207 |
name: Cosine Precision@3
|
208 |
- type: cosine_precision@5
|
209 |
+
value: 0.18338360296133807
|
210 |
name: Cosine Precision@5
|
211 |
- type: cosine_precision@10
|
212 |
+
value: 0.09468055936386072
|
213 |
name: Cosine Precision@10
|
214 |
- type: cosine_recall@1
|
215 |
+
value: 0.7754318618042226
|
216 |
name: Cosine Recall@1
|
217 |
- type: cosine_recall@3
|
218 |
+
value: 0.8804496846723334
|
219 |
name: Cosine Recall@3
|
220 |
- type: cosine_recall@5
|
221 |
+
value: 0.9169180148066904
|
222 |
name: Cosine Recall@5
|
223 |
- type: cosine_recall@10
|
224 |
+
value: 0.9468055936386071
|
225 |
name: Cosine Recall@10
|
226 |
- type: cosine_ndcg@10
|
227 |
+
value: 0.8613819477350178
|
228 |
name: Cosine Ndcg@10
|
229 |
- type: cosine_mrr@10
|
230 |
+
value: 0.8338379881703168
|
231 |
name: Cosine Mrr@10
|
232 |
- type: cosine_map@100
|
233 |
+
value: 0.8360735900013385
|
234 |
name: Cosine Map@100
|
235 |
- task:
|
236 |
type: information-retrieval
|
|
|
240 |
type: dim_128
|
241 |
metrics:
|
242 |
- type: cosine_accuracy@1
|
243 |
+
value: 0.7617219632574719
|
244 |
name: Cosine Accuracy@1
|
245 |
- type: cosine_accuracy@3
|
246 |
+
value: 0.871675349602413
|
247 |
name: Cosine Accuracy@3
|
248 |
- type: cosine_accuracy@5
|
249 |
+
value: 0.9117082533589251
|
250 |
name: Cosine Accuracy@5
|
251 |
- type: cosine_accuracy@10
|
252 |
+
value: 0.9418700301617768
|
253 |
name: Cosine Accuracy@10
|
254 |
- type: cosine_precision@1
|
255 |
+
value: 0.7617219632574719
|
256 |
name: Cosine Precision@1
|
257 |
- type: cosine_precision@3
|
258 |
+
value: 0.2905584498674709
|
259 |
name: Cosine Precision@3
|
260 |
- type: cosine_precision@5
|
261 |
+
value: 0.18234165067178504
|
262 |
name: Cosine Precision@5
|
263 |
- type: cosine_precision@10
|
264 |
+
value: 0.09418700301617768
|
265 |
name: Cosine Precision@10
|
266 |
- type: cosine_recall@1
|
267 |
+
value: 0.7617219632574719
|
268 |
name: Cosine Recall@1
|
269 |
- type: cosine_recall@3
|
270 |
+
value: 0.871675349602413
|
271 |
name: Cosine Recall@3
|
272 |
- type: cosine_recall@5
|
273 |
+
value: 0.9117082533589251
|
274 |
name: Cosine Recall@5
|
275 |
- type: cosine_recall@10
|
276 |
+
value: 0.9418700301617768
|
277 |
name: Cosine Recall@10
|
278 |
- type: cosine_ndcg@10
|
279 |
+
value: 0.851649908463093
|
280 |
name: Cosine Ndcg@10
|
281 |
- type: cosine_mrr@10
|
282 |
+
value: 0.8225671458602635
|
283 |
name: Cosine Mrr@10
|
284 |
- type: cosine_map@100
|
285 |
+
value: 0.8248455884524328
|
286 |
name: Cosine Map@100
|
287 |
- task:
|
288 |
type: information-retrieval
|
|
|
292 |
type: dim_64
|
293 |
metrics:
|
294 |
- type: cosine_accuracy@1
|
295 |
+
value: 0.7408829174664108
|
296 |
name: Cosine Accuracy@1
|
297 |
- type: cosine_accuracy@3
|
298 |
+
value: 0.853852481491637
|
299 |
name: Cosine Accuracy@3
|
300 |
- type: cosine_accuracy@5
|
301 |
+
value: 0.8936111872772141
|
302 |
name: Cosine Accuracy@5
|
303 |
- type: cosine_accuracy@10
|
304 |
+
value: 0.9292569234987661
|
305 |
name: Cosine Accuracy@10
|
306 |
- type: cosine_precision@1
|
307 |
+
value: 0.7408829174664108
|
308 |
name: Cosine Precision@1
|
309 |
- type: cosine_precision@3
|
310 |
+
value: 0.28461749383054563
|
311 |
name: Cosine Precision@3
|
312 |
- type: cosine_precision@5
|
313 |
+
value: 0.17872223745544283
|
314 |
name: Cosine Precision@5
|
315 |
- type: cosine_precision@10
|
316 |
+
value: 0.0929256923498766
|
317 |
name: Cosine Precision@10
|
318 |
- type: cosine_recall@1
|
319 |
+
value: 0.7408829174664108
|
320 |
name: Cosine Recall@1
|
321 |
- type: cosine_recall@3
|
322 |
+
value: 0.853852481491637
|
323 |
name: Cosine Recall@3
|
324 |
- type: cosine_recall@5
|
325 |
+
value: 0.8936111872772141
|
326 |
name: Cosine Recall@5
|
327 |
- type: cosine_recall@10
|
328 |
+
value: 0.9292569234987661
|
329 |
name: Cosine Recall@10
|
330 |
- type: cosine_ndcg@10
|
331 |
+
value: 0.8338956659320366
|
332 |
name: Cosine Ndcg@10
|
333 |
- type: cosine_mrr@10
|
334 |
+
value: 0.8033378162525404
|
335 |
name: Cosine Mrr@10
|
336 |
- type: cosine_map@100
|
337 |
+
value: 0.8057702637208689
|
338 |
name: Cosine Map@100
|
339 |
---
|
340 |
|
341 |
+
# Alchemy Embedding - Anudit Nagar
|
342 |
|
343 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
344 |
|
345 |
## Model Details
|
346 |
|
|
|
350 |
- **Maximum Sequence Length:** 8192 tokens
|
351 |
- **Output Dimensionality:** 768 tokens
|
352 |
- **Similarity Function:** Cosine Similarity
|
353 |
+
- **Training Dataset:**
|
354 |
+
- json
|
355 |
- **Language:** en
|
356 |
- **License:** apache-2.0
|
357 |
|
|
|
436 |
|
437 |
| Metric | Value |
|
438 |
|:--------------------|:-----------|
|
439 |
+
| cosine_accuracy@1 | 0.782 |
|
440 |
+
| cosine_accuracy@3 | 0.8889 |
|
441 |
+
| cosine_accuracy@5 | 0.9249 |
|
442 |
+
| cosine_accuracy@10 | 0.952 |
|
443 |
+
| cosine_precision@1 | 0.782 |
|
444 |
+
| cosine_precision@3 | 0.2963 |
|
445 |
+
| cosine_precision@5 | 0.185 |
|
446 |
+
| cosine_precision@10 | 0.0952 |
|
447 |
+
| cosine_recall@1 | 0.782 |
|
448 |
+
| cosine_recall@3 | 0.8889 |
|
449 |
+
| cosine_recall@5 | 0.9249 |
|
450 |
+
| cosine_recall@10 | 0.952 |
|
451 |
+
| cosine_ndcg@10 | 0.8676 |
|
452 |
+
| cosine_mrr@10 | 0.8403 |
|
453 |
+
| **cosine_map@100** | **0.8422** |
|
454 |
|
455 |
#### Information Retrieval
|
456 |
* Dataset: `dim_512`
|
|
|
458 |
|
459 |
| Metric | Value |
|
460 |
|:--------------------|:-----------|
|
461 |
+
| cosine_accuracy@1 | 0.7804 |
|
462 |
+
| cosine_accuracy@3 | 0.8848 |
|
463 |
+
| cosine_accuracy@5 | 0.9221 |
|
464 |
+
| cosine_accuracy@10 | 0.9515 |
|
465 |
+
| cosine_precision@1 | 0.7804 |
|
466 |
+
| cosine_precision@3 | 0.2949 |
|
467 |
+
| cosine_precision@5 | 0.1844 |
|
468 |
+
| cosine_precision@10 | 0.0951 |
|
469 |
+
| cosine_recall@1 | 0.7804 |
|
470 |
+
| cosine_recall@3 | 0.8848 |
|
471 |
+
| cosine_recall@5 | 0.9221 |
|
472 |
+
| cosine_recall@10 | 0.9515 |
|
473 |
+
| cosine_ndcg@10 | 0.8662 |
|
474 |
+
| cosine_mrr@10 | 0.8387 |
|
475 |
+
| **cosine_map@100** | **0.8405** |
|
476 |
|
477 |
#### Information Retrieval
|
478 |
* Dataset: `dim_256`
|
479 |
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
480 |
|
481 |
+
| Metric | Value |
|
482 |
+
|:--------------------|:-----------|
|
483 |
+
| cosine_accuracy@1 | 0.7754 |
|
484 |
+
| cosine_accuracy@3 | 0.8804 |
|
485 |
+
| cosine_accuracy@5 | 0.9169 |
|
486 |
+
| cosine_accuracy@10 | 0.9468 |
|
487 |
+
| cosine_precision@1 | 0.7754 |
|
488 |
+
| cosine_precision@3 | 0.2935 |
|
489 |
+
| cosine_precision@5 | 0.1834 |
|
490 |
+
| cosine_precision@10 | 0.0947 |
|
491 |
+
| cosine_recall@1 | 0.7754 |
|
492 |
+
| cosine_recall@3 | 0.8804 |
|
493 |
+
| cosine_recall@5 | 0.9169 |
|
494 |
+
| cosine_recall@10 | 0.9468 |
|
495 |
+
| cosine_ndcg@10 | 0.8614 |
|
496 |
+
| cosine_mrr@10 | 0.8338 |
|
497 |
+
| **cosine_map@100** | **0.8361** |
|
498 |
|
499 |
#### Information Retrieval
|
500 |
* Dataset: `dim_128`
|
|
|
502 |
|
503 |
| Metric | Value |
|
504 |
|:--------------------|:-----------|
|
505 |
+
| cosine_accuracy@1 | 0.7617 |
|
506 |
+
| cosine_accuracy@3 | 0.8717 |
|
507 |
+
| cosine_accuracy@5 | 0.9117 |
|
508 |
+
| cosine_accuracy@10 | 0.9419 |
|
509 |
+
| cosine_precision@1 | 0.7617 |
|
510 |
+
| cosine_precision@3 | 0.2906 |
|
511 |
+
| cosine_precision@5 | 0.1823 |
|
512 |
+
| cosine_precision@10 | 0.0942 |
|
513 |
+
| cosine_recall@1 | 0.7617 |
|
514 |
+
| cosine_recall@3 | 0.8717 |
|
515 |
+
| cosine_recall@5 | 0.9117 |
|
516 |
+
| cosine_recall@10 | 0.9419 |
|
517 |
+
| cosine_ndcg@10 | 0.8516 |
|
518 |
+
| cosine_mrr@10 | 0.8226 |
|
519 |
+
| **cosine_map@100** | **0.8248** |
|
520 |
|
521 |
#### Information Retrieval
|
522 |
* Dataset: `dim_64`
|
|
|
524 |
|
525 |
| Metric | Value |
|
526 |
|:--------------------|:-----------|
|
527 |
+
| cosine_accuracy@1 | 0.7409 |
|
528 |
+
| cosine_accuracy@3 | 0.8539 |
|
529 |
+
| cosine_accuracy@5 | 0.8936 |
|
530 |
+
| cosine_accuracy@10 | 0.9293 |
|
531 |
+
| cosine_precision@1 | 0.7409 |
|
532 |
+
| cosine_precision@3 | 0.2846 |
|
533 |
+
| cosine_precision@5 | 0.1787 |
|
534 |
+
| cosine_precision@10 | 0.0929 |
|
535 |
+
| cosine_recall@1 | 0.7409 |
|
536 |
+
| cosine_recall@3 | 0.8539 |
|
537 |
+
| cosine_recall@5 | 0.8936 |
|
538 |
+
| cosine_recall@10 | 0.9293 |
|
539 |
+
| cosine_ndcg@10 | 0.8339 |
|
540 |
+
| cosine_mrr@10 | 0.8033 |
|
541 |
+
| **cosine_map@100** | **0.8058** |
|
542 |
|
543 |
<!--
|
544 |
## Bias, Risks and Limitations
|
|
|
556 |
|
557 |
### Training Dataset
|
558 |
|
559 |
+
#### json
|
|
|
560 |
|
561 |
+
* Dataset: json
|
562 |
* Size: 32,833 training samples
|
563 |
* Columns: <code>positive</code> and <code>anchor</code>
|
564 |
* Approximate statistics based on the first 1000 samples:
|
|
|
598 |
#### Non-Default Hyperparameters
|
599 |
|
600 |
- `eval_strategy`: epoch
|
601 |
+
- `per_device_train_batch_size`: 24
|
602 |
+
- `per_device_eval_batch_size`: 24
|
603 |
+
- `gradient_accumulation_steps`: 8
|
604 |
+
- `learning_rate`: 2e-05
|
605 |
+
- `num_train_epochs`: 4
|
606 |
- `lr_scheduler_type`: cosine
|
607 |
- `warmup_ratio`: 0.1
|
608 |
- `bf16`: True
|
|
|
616 |
- `do_predict`: False
|
617 |
- `eval_strategy`: epoch
|
618 |
- `prediction_loss_only`: True
|
619 |
+
- `per_device_train_batch_size`: 24
|
620 |
+
- `per_device_eval_batch_size`: 24
|
621 |
- `per_gpu_train_batch_size`: None
|
622 |
- `per_gpu_eval_batch_size`: None
|
623 |
+
- `gradient_accumulation_steps`: 8
|
624 |
- `eval_accumulation_steps`: None
|
625 |
- `torch_empty_cache_steps`: None
|
626 |
+
- `learning_rate`: 2e-05
|
627 |
- `weight_decay`: 0.0
|
628 |
- `adam_beta1`: 0.9
|
629 |
- `adam_beta2`: 0.999
|
630 |
- `adam_epsilon`: 1e-08
|
631 |
- `max_grad_norm`: 1.0
|
632 |
+
- `num_train_epochs`: 4
|
633 |
- `max_steps`: -1
|
634 |
- `lr_scheduler_type`: cosine
|
635 |
- `lr_scheduler_kwargs`: {}
|
|
|
727 |
</details>
|
728 |
|
729 |
### Training Logs
|
730 |
+
| Epoch | Step | Training Loss | dim_128_cosine_map@100 | dim_256_cosine_map@100 | dim_512_cosine_map@100 | dim_64_cosine_map@100 | dim_768_cosine_map@100 |
|
731 |
+
|:----------:|:-------:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:----------------------:|
|
732 |
+
| 0.0584 | 10 | 0.8567 | - | - | - | - | - |
|
733 |
+
| 0.1169 | 20 | 0.6549 | - | - | - | - | - |
|
734 |
+
| 0.1753 | 30 | 0.5407 | - | - | - | - | - |
|
735 |
+
| 0.2337 | 40 | 0.4586 | - | - | - | - | - |
|
736 |
+
| 0.2922 | 50 | 0.3914 | - | - | - | - | - |
|
737 |
+
| 0.3506 | 60 | 0.4104 | - | - | - | - | - |
|
738 |
+
| 0.4091 | 70 | 0.299 | - | - | - | - | - |
|
739 |
+
| 0.4675 | 80 | 0.2444 | - | - | - | - | - |
|
740 |
+
| 0.5259 | 90 | 0.2367 | - | - | - | - | - |
|
741 |
+
| 0.5844 | 100 | 0.2302 | - | - | - | - | - |
|
742 |
+
| 0.6428 | 110 | 0.2356 | - | - | - | - | - |
|
743 |
+
| 0.7012 | 120 | 0.1537 | - | - | - | - | - |
|
744 |
+
| 0.7597 | 130 | 0.2043 | - | - | - | - | - |
|
745 |
+
| 0.8181 | 140 | 0.1606 | - | - | - | - | - |
|
746 |
+
| 0.8766 | 150 | 0.1896 | - | - | - | - | - |
|
747 |
+
| 0.9350 | 160 | 0.1766 | - | - | - | - | - |
|
748 |
+
| 0.9934 | 170 | 0.1259 | - | - | - | - | - |
|
749 |
+
| 0.9993 | 171 | - | 0.8115 | 0.8233 | 0.8321 | 0.7829 | 0.8340 |
|
750 |
+
| 1.0519 | 180 | 0.1661 | - | - | - | - | - |
|
751 |
+
| 1.1103 | 190 | 0.1632 | - | - | - | - | - |
|
752 |
+
| 1.1687 | 200 | 0.1032 | - | - | - | - | - |
|
753 |
+
| 1.2272 | 210 | 0.1037 | - | - | - | - | - |
|
754 |
+
| 1.2856 | 220 | 0.0708 | - | - | - | - | - |
|
755 |
+
| 1.3440 | 230 | 0.0827 | - | - | - | - | - |
|
756 |
+
| 1.4025 | 240 | 0.0505 | - | - | - | - | - |
|
757 |
+
| 1.4609 | 250 | 0.0468 | - | - | - | - | - |
|
758 |
+
| 1.5194 | 260 | 0.0371 | - | - | - | - | - |
|
759 |
+
| 1.5778 | 270 | 0.049 | - | - | - | - | - |
|
760 |
+
| 1.6362 | 280 | 0.0527 | - | - | - | - | - |
|
761 |
+
| 1.6947 | 290 | 0.0316 | - | - | - | - | - |
|
762 |
+
| 1.7531 | 300 | 0.052 | - | - | - | - | - |
|
763 |
+
| 1.8115 | 310 | 0.0298 | - | - | - | - | - |
|
764 |
+
| 1.8700 | 320 | 0.0334 | - | - | - | - | - |
|
765 |
+
| 1.9284 | 330 | 0.0431 | - | - | - | - | - |
|
766 |
+
| 1.9869 | 340 | 0.0316 | - | - | - | - | - |
|
767 |
+
| 1.9985 | 342 | - | 0.8216 | 0.8342 | 0.8397 | 0.8006 | 0.8408 |
|
768 |
+
| 2.0453 | 350 | 0.0275 | - | - | - | - | - |
|
769 |
+
| 2.1037 | 360 | 0.0461 | - | - | - | - | - |
|
770 |
+
| 2.1622 | 370 | 0.0341 | - | - | - | - | - |
|
771 |
+
| 2.2206 | 380 | 0.0323 | - | - | - | - | - |
|
772 |
+
| 2.2790 | 390 | 0.0205 | - | - | - | - | - |
|
773 |
+
| 2.3375 | 400 | 0.0223 | - | - | - | - | - |
|
774 |
+
| 2.3959 | 410 | 0.0189 | - | - | - | - | - |
|
775 |
+
| 2.4543 | 420 | 0.0181 | - | - | - | - | - |
|
776 |
+
| 2.5128 | 430 | 0.0144 | - | - | - | - | - |
|
777 |
+
| 2.5712 | 440 | 0.0179 | - | - | - | - | - |
|
778 |
+
| 2.6297 | 450 | 0.0217 | - | - | - | - | - |
|
779 |
+
| 2.6881 | 460 | 0.016 | - | - | - | - | - |
|
780 |
+
| 2.7465 | 470 | 0.0143 | - | - | - | - | - |
|
781 |
+
| 2.8050 | 480 | 0.0193 | - | - | - | - | - |
|
782 |
+
| 2.8634 | 490 | 0.0183 | - | - | - | - | - |
|
783 |
+
| 2.9218 | 500 | 0.0171 | - | - | - | - | - |
|
784 |
+
| 2.9803 | 510 | 0.0195 | - | - | - | - | - |
|
785 |
+
| 2.9978 | 513 | - | 0.8242 | 0.8350 | 0.8409 | 0.8051 | 0.8413 |
|
786 |
+
| 3.0387 | 520 | 0.0127 | - | - | - | - | - |
|
787 |
+
| 3.0972 | 530 | 0.0261 | - | - | - | - | - |
|
788 |
+
| 3.1556 | 540 | 0.017 | - | - | - | - | - |
|
789 |
+
| 3.2140 | 550 | 0.0198 | - | - | - | - | - |
|
790 |
+
| 3.2725 | 560 | 0.0131 | - | - | - | - | - |
|
791 |
+
| 3.3309 | 570 | 0.0156 | - | - | - | - | - |
|
792 |
+
| 3.3893 | 580 | 0.0107 | - | - | - | - | - |
|
793 |
+
| 3.4478 | 590 | 0.0123 | - | - | - | - | - |
|
794 |
+
| 3.5062 | 600 | 0.0111 | - | - | - | - | - |
|
795 |
+
| 3.5646 | 610 | 0.0112 | - | - | - | - | - |
|
796 |
+
| 3.6231 | 620 | 0.0143 | - | - | - | - | - |
|
797 |
+
| 3.6815 | 630 | 0.013 | - | - | - | - | - |
|
798 |
+
| 3.7400 | 640 | 0.0105 | - | - | - | - | - |
|
799 |
+
| 3.7984 | 650 | 0.0126 | - | - | - | - | - |
|
800 |
+
| 3.8568 | 660 | 0.0118 | - | - | - | - | - |
|
801 |
+
| 3.9153 | 670 | 0.0163 | - | - | - | - | - |
|
802 |
+
| 3.9737 | 680 | 0.0187 | - | - | - | - | - |
|
803 |
+
| **3.9971** | **684** | **-** | **0.8248** | **0.8361** | **0.8405** | **0.8058** | **0.8422** |
|
804 |
|
805 |
* The bold row denotes the saved checkpoint.
|
806 |
|
807 |
### Framework Versions
|
808 |
- Python: 3.12.5
|
809 |
+
- Sentence Transformers: 3.1.1
|
810 |
- Transformers: 4.44.2
|
811 |
+
- PyTorch: 2.4.1
|
812 |
- Accelerate: 0.33.0
|
813 |
- Datasets: 2.21.0
|
814 |
- Tokenizers: 0.19.1
|
|
|
833 |
#### MatryoshkaLoss
|
834 |
```bibtex
|
835 |
@misc{kusupati2024matryoshka,
|
836 |
+
title={Matryoshka Representation Learning},
|
837 |
author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
|
838 |
year={2024},
|
839 |
eprint={2205.13147},
|
|
|
845 |
#### MultipleNegativesRankingLoss
|
846 |
```bibtex
|
847 |
@misc{henderson2017efficient,
|
848 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
849 |
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
850 |
year={2017},
|
851 |
eprint={1705.00652},
|
config_sentence_transformers.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
-
"sentence_transformers": "3.
|
4 |
"transformers": "4.44.2",
|
5 |
-
"pytorch": "2.4.
|
6 |
},
|
7 |
"prompts": {},
|
8 |
"default_prompt_name": null,
|
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.4.1"
|
6 |
},
|
7 |
"prompts": {},
|
8 |
"default_prompt_name": null,
|
model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 555896347
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50557ba011052514c2e8d048257176d199c9b609b15d5c39dfe8a18383aadb60
|
3 |
size 555896347
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 547119128
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d13bedb560025363d482eb733dc9f18a0bfe5b342490240b9e9faabccf22983
|
3 |
size 547119128
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d2ca4f4a8289d13a34ff02bc7fb1d07d6772ee0b83178f7a016e3de1e0ba45e
|
3 |
size 5496
|