Mollel's picture
Add new SentenceTransformer model.
ff57059 verified
metadata
language: []
library_name: sentence-transformers
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:1115700
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
base_model: mixedbread-ai/mxbai-embed-large-v1
datasets: []
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
widget:
  - source_sentence: Ndege mwenye mdomo mrefu katikati ya ndege.
    sentences:
      - Panya anayekimbia juu ya gurudumu.
      - Mtu anashindana katika mashindano ya mbio.
      - Ndege anayeruka.
  - source_sentence: >-
      Msichana mchanga mwenye nywele nyeusi anakabili kamera na kushikilia mfuko
      wa karatasi wakati amevaa shati la machungwa na mabawa ya kipepeo yenye
      rangi nyingi.
    sentences:
      - Mwanamke mzee anakataa kupigwa picha.
      - mtu akila na mvulana mdogo kwenye kijia cha jiji
      - Msichana mchanga anakabili kamera.
  - source_sentence: >-
      Wanawake na watoto wameketi nje katika kivuli wakati kikundi cha watoto
      wadogo wameketi ndani katika kivuli.
    sentences:
      - Mwanamke na watoto na kukaa chini.
      - Mwanamke huyo anakimbia.
      - Watu wanasafiri kwa baiskeli.
  - source_sentence: >-
      Mtoto mdogo anaruka mikononi mwa mwanamke aliyevalia suti nyeusi ya
      kuogelea akiwa kwenye dimbwi.
    sentences:
      - >-
        Mtoto akiruka mikononi mwa mwanamke aliyevalia suti ya kuogelea kwenye
        dimbwi.
      - Someone is holding oranges and walking
      - Mama na binti wakinunua viatu.
  - source_sentence: >-
      Mwanamume na mwanamke wachanga waliovaa mikoba wanaweka au kuondoa kitu
      kutoka kwenye mti mweupe wa zamani, huku watu wengine wamesimama au
      wameketi nyuma.
    sentences:
      - tai huruka
      - mwanamume na mwanamke wenye mikoba
      - Wanaume wawili wameketi karibu na mwanamke.
pipeline_tag: sentence-similarity
model-index:
  - name: SentenceTransformer based on mixedbread-ai/mxbai-embed-large-v1
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: 0.7132706238512434
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.7051536841043449
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6350557885817543
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6244954371574937
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6378177587771076
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.62660657495158
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5703890363847545
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5603263508842454
            name: Spearman Dot
          - type: pearson_max
            value: 0.7132706238512434
            name: Pearson Max
          - type: spearman_max
            value: 0.7051536841043449
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: 0.7123126668825692
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.703609966898051
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6388434483972429
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6281398975795567
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6419247701070586
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6310772735048756
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5490282729432092
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5413067160939415
            name: Spearman Dot
          - type: pearson_max
            value: 0.7123126668825692
            name: Pearson Max
          - type: spearman_max
            value: 0.703609966898051
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: 0.7077861691807766
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.7000862774499549
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.643288835639384
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6325033715865666
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6460218727916103
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6343987601663327
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5115397990320991
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5059807217044437
            name: Spearman Dot
          - type: pearson_max
            value: 0.7077861691807766
            name: Pearson Max
          - type: spearman_max
            value: 0.7000862774499549
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: 0.7028807205576924
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.6967519700533644
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6497250338362586
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6388633921530281
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.650616035583963
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6388752538429412
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.473211586813894
            name: Pearson Dot
          - type: spearman_dot
            value: 0.468867985238822
            name: Spearman Dot
          - type: pearson_max
            value: 0.7028807205576924
            name: Pearson Max
          - type: spearman_max
            value: 0.6967519700533644
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: 0.6904004410097948
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.684874855155489
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.6498424787891348
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.6359659710580793
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.6513241092538908
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.6369881684130174
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.42134226096367267
            name: Pearson Dot
          - type: spearman_dot
            value: 0.4179675632105097
            name: Spearman Dot
          - type: pearson_max
            value: 0.6904004410097948
            name: Pearson Max
          - type: spearman_max
            value: 0.684874855155489
            name: Spearman Max

SentenceTransformer based on mixedbread-ai/mxbai-embed-large-v1

This is a sentence-transformers model finetuned from mixedbread-ai/mxbai-embed-large-v1 on the Mollel/swahili-n_li-triplet-swh-eng dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: mixedbread-ai/mxbai-embed-large-v1
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 1024 tokens
  • Similarity Function: Cosine Similarity
  • Training Dataset:
    • Mollel/swahili-n_li-triplet-swh-eng

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("Mollel/MultiLinguSwahili-mxbai-embed-large-v1-nli-matryoshka")
# Run inference
sentences = [
    'Mwanamume na mwanamke wachanga waliovaa mikoba wanaweka au kuondoa kitu kutoka kwenye mti mweupe wa zamani, huku watu wengine wamesimama au wameketi nyuma.',
    'mwanamume na mwanamke wenye mikoba',
    'tai huruka',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1024]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.7133
spearman_cosine 0.7052
pearson_manhattan 0.6351
spearman_manhattan 0.6245
pearson_euclidean 0.6378
spearman_euclidean 0.6266
pearson_dot 0.5704
spearman_dot 0.5603
pearson_max 0.7133
spearman_max 0.7052

Semantic Similarity

Metric Value
pearson_cosine 0.7123
spearman_cosine 0.7036
pearson_manhattan 0.6388
spearman_manhattan 0.6281
pearson_euclidean 0.6419
spearman_euclidean 0.6311
pearson_dot 0.549
spearman_dot 0.5413
pearson_max 0.7123
spearman_max 0.7036

Semantic Similarity

Metric Value
pearson_cosine 0.7078
spearman_cosine 0.7001
pearson_manhattan 0.6433
spearman_manhattan 0.6325
pearson_euclidean 0.646
spearman_euclidean 0.6344
pearson_dot 0.5115
spearman_dot 0.506
pearson_max 0.7078
spearman_max 0.7001

Semantic Similarity

Metric Value
pearson_cosine 0.7029
spearman_cosine 0.6968
pearson_manhattan 0.6497
spearman_manhattan 0.6389
pearson_euclidean 0.6506
spearman_euclidean 0.6389
pearson_dot 0.4732
spearman_dot 0.4689
pearson_max 0.7029
spearman_max 0.6968

Semantic Similarity

Metric Value
pearson_cosine 0.6904
spearman_cosine 0.6849
pearson_manhattan 0.6498
spearman_manhattan 0.636
pearson_euclidean 0.6513
spearman_euclidean 0.637
pearson_dot 0.4213
spearman_dot 0.418
pearson_max 0.6904
spearman_max 0.6849

Training Details

Training Dataset

Mollel/swahili-n_li-triplet-swh-eng

  • Dataset: Mollel/swahili-n_li-triplet-swh-eng
  • Size: 1,115,700 training samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 7 tokens
    • mean: 15.18 tokens
    • max: 80 tokens
    • min: 5 tokens
    • mean: 18.53 tokens
    • max: 52 tokens
    • min: 5 tokens
    • mean: 17.8 tokens
    • max: 53 tokens
  • Samples:
    anchor positive negative
    A person on a horse jumps over a broken down airplane. A person is outdoors, on a horse. A person is at a diner, ordering an omelette.
    Mtu aliyepanda farasi anaruka juu ya ndege iliyovunjika. Mtu yuko nje, juu ya farasi. Mtu yuko kwenye mkahawa, akiagiza omelette.
    Children smiling and waving at camera There are children present The kids are frowning
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

Mollel/swahili-n_li-triplet-swh-eng

  • Dataset: Mollel/swahili-n_li-triplet-swh-eng
  • Size: 13,168 evaluation samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 6 tokens
    • mean: 26.43 tokens
    • max: 94 tokens
    • min: 5 tokens
    • mean: 13.37 tokens
    • max: 65 tokens
    • min: 5 tokens
    • mean: 14.7 tokens
    • max: 54 tokens
  • Samples:
    anchor positive negative
    Two women are embracing while holding to go packages. Two woman are holding packages. The men are fighting outside a deli.
    Wanawake wawili wanakumbatiana huku wakishikilia vifurushi vya kwenda. Wanawake wawili wanashikilia vifurushi. Wanaume hao wanapigana nje ya duka la vyakula vitamu.
    Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink. Two kids in numbered jerseys wash their hands. Two kids in jackets walk to school.
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • learning_rate: 2e-05
  • num_train_epochs: 1
  • warmup_ratio: 0.1
  • bf16: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • prediction_loss_only: True
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • learning_rate: 2e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 1
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: True
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Click to expand
Epoch Step Training Loss sts-test-128_spearman_cosine sts-test-256_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
0.0029 100 9.6293 - - - - -
0.0057 200 8.1059 - - - - -
0.0086 300 8.6054 - - - - -
0.0115 400 6.8896 - - - - -
0.0143 500 6.9096 - - - - -
0.0172 600 6.7797 - - - - -
0.0201 700 6.8013 - - - - -
0.0229 800 7.49 - - - - -
0.0258 900 7.2888 - - - - -
0.0287 1000 7.3862 - - - - -
0.0315 1100 6.8292 - - - - -
0.0344 1200 6.2505 - - - - -
0.0373 1300 4.8736 - - - - -
0.0402 1400 4.7668 - - - - -
0.0430 1500 5.0843 - - - - -
0.0459 1600 3.8507 - - - - -
0.0488 1700 5.1235 - - - - -
0.0516 1800 4.6187 - - - - -
0.0545 1900 3.8704 - - - - -
0.0574 2000 3.3635 - - - - -
0.0602 2100 3.4204 - - - - -
0.0631 2200 3.5258 - - - - -
0.0660 2300 3.6726 - - - - -
0.0688 2400 3.8007 - - - - -
0.0717 2500 3.5593 - - - - -
0.0746 2600 3.3407 - - - - -
0.0774 2700 4.6645 - - - - -
0.0803 2800 4.5431 - - - - -
0.0832 2900 4.0496 - - - - -
0.0860 3000 3.8313 - - - - -
0.0889 3100 3.6324 - - - - -
0.0918 3200 3.3442 - - - - -
0.0946 3300 2.9437 - - - - -
0.0975 3400 2.8352 - - - - -
0.1004 3500 2.8069 - - - - -
0.1033 3600 2.9686 - - - - -
0.1061 3700 2.8355 - - - - -
0.1090 3800 2.9827 - - - - -
0.1119 3900 3.1181 - - - - -
0.1147 4000 4.1636 - - - - -
0.1176 4100 5.4112 - - - - -
0.1205 4200 5.3505 - - - - -
0.1233 4300 3.8779 - - - - -
0.1262 4400 3.7439 - - - - -
0.1291 4500 3.3232 - - - - -
0.1319 4600 3.6257 - - - - -
0.1348 4700 3.8231 - - - - -
0.1377 4800 3.4048 - - - - -
0.1405 4900 3.0996 - - - - -
0.1434 5000 3.386 - - - - -
0.1463 5100 2.8902 - - - - -
0.1491 5200 3.2461 - - - - -
0.1520 5300 2.6888 - - - - -
0.1549 5400 3.2005 - - - - -
0.1577 5500 3.1291 - - - - -
0.1606 5600 2.993 - - - - -
0.1635 5700 3.3405 - - - - -
0.1664 5800 3.3929 - - - - -
0.1692 5900 4.0071 - - - - -
0.1721 6000 3.8775 - - - - -
0.1750 6100 4.0725 - - - - -
0.1778 6200 4.3434 - - - - -
0.1807 6300 4.0734 - - - - -
0.1836 6400 3.805 - - - - -
0.1864 6500 3.9273 - - - - -
0.1893 6600 3.9514 - - - - -
0.1922 6700 3.8316 - - - - -
0.1950 6800 3.2888 - - - - -
0.1979 6900 3.4367 - - - - -
0.2008 7000 3.0205 - - - - -
0.2036 7100 3.404 - - - - -
0.2065 7200 3.225 - - - - -
0.2094 7300 3.8446 - - - - -
0.2122 7400 3.2551 - - - - -
0.2151 7500 3.35 - - - - -
0.2180 7600 3.5524 - - - - -
0.2208 7700 3.7775 - - - - -
0.2237 7800 3.2797 - - - - -
0.2266 7900 3.96 - - - - -
0.2294 8000 3.7124 - - - - -
0.2323 8100 3.2713 - - - - -
0.2352 8200 3.8838 - - - - -
0.2381 8300 3.3932 - - - - -
0.2409 8400 3.3798 - - - - -
0.2438 8500 3.2386 - - - - -
0.2467 8600 3.1264 - - - - -
0.2495 8700 3.9248 - - - - -
0.2524 8800 3.5402 - - - - -
0.2553 8900 3.688 - - - - -
0.2581 9000 4.0903 - - - - -
0.2610 9100 4.4358 - - - - -
0.2639 9200 4.1334 - - - - -
0.2667 9300 3.4894 - - - - -
0.2696 9400 4.0032 - - - - -
0.2725 9500 4.1421 - - - - -
0.2753 9600 3.6995 - - - - -
0.2782 9700 3.8307 - - - - -
0.2811 9800 3.7448 - - - - -
0.2839 9900 3.6962 - - - - -
0.2868 10000 3.3733 - - - - -
0.2897 10100 3.4597 - - - - -
0.2925 10200 3.6834 - - - - -
0.2954 10300 3.7873 - - - - -
0.2983 10400 3.1388 - - - - -
0.3012 10500 3.9492 - - - - -
0.3040 10600 3.5991 - - - - -
0.3069 10700 4.2448 - - - - -
0.3098 10800 3.92 - - - - -
0.3126 10900 3.8442 - - - - -
0.3155 11000 4.3227 - - - - -
0.3184 11100 3.6447 - - - - -
0.3212 11200 3.8106 - - - - -
0.3241 11300 3.3499 - - - - -
0.3270 11400 3.8586 - - - - -
0.3298 11500 3.4284 - - - - -
0.3327 11600 3.2439 - - - - -
0.3356 11700 3.6645 - - - - -
0.3384 11800 3.9315 - - - - -
0.3413 11900 3.6439 - - - - -
0.3442 12000 3.6706 - - - - -
0.3470 12100 3.5084 - - - - -
0.3499 12200 3.9352 - - - - -
0.3528 12300 3.7615 - - - - -
0.3556 12400 3.7642 - - - - -
0.3585 12500 3.8085 - - - - -
0.3614 12600 3.411 - - - - -
0.3643 12700 3.8521 - - - - -
0.3671 12800 3.5473 - - - - -
0.3700 12900 3.5322 - - - - -
0.3729 13000 3.1496 - - - - -
0.3757 13100 3.5285 - - - - -
0.3786 13200 4.4428 - - - - -
0.3815 13300 3.4391 - - - - -
0.3843 13400 3.6457 - - - - -
0.3872 13500 3.2051 - - - - -
0.3901 13600 3.3738 - - - - -
0.3929 13700 3.5465 - - - - -
0.3958 13800 3.5853 - - - - -
0.3987 13900 3.297 - - - - -
0.4015 14000 3.3994 - - - - -
0.4044 14100 3.542 - - - - -
0.4073 14200 3.8516 - - - - -
0.4101 14300 3.6002 - - - - -
0.4130 14400 3.7251 - - - - -
0.4159 14500 3.4421 - - - - -
0.4187 14600 3.365 - - - - -
0.4216 14700 3.5327 - - - - -
0.4245 14800 3.1557 - - - - -
0.4274 14900 3.7096 - - - - -
0.4302 15000 3.9073 - - - - -
0.4331 15100 3.2662 - - - - -
0.4360 15200 3.3979 - - - - -
0.4388 15300 3.1515 - - - - -
0.4417 15400 3.247 - - - - -
0.4446 15500 3.3723 - - - - -
0.4474 15600 3.6837 - - - - -
0.4503 15700 3.4302 - - - - -
0.4532 15800 3.8231 - - - - -
0.4560 15900 3.1679 - - - - -
0.4589 16000 3.2766 - - - - -
0.4618 16100 3.3 - - - - -
0.4646 16200 3.557 - - - - -
0.4675 16300 3.5876 - - - - -
0.4704 16400 3.0928 - - - - -
0.4732 16500 2.9105 - - - - -
0.4761 16600 3.254 - - - - -
0.4790 16700 3.8005 - - - - -
0.4818 16800 3.1539 - - - - -
0.4847 16900 3.0174 - - - - -
0.4876 17000 3.4317 - - - - -
0.4904 17100 3.6292 - - - - -
0.4933 17200 3.7037 - - - - -
0.4962 17300 3.5144 - - - - -
0.4991 17400 3.7012 - - - - -
0.5019 17500 3.2587 - - - - -
0.5048 17600 3.1335 - - - - -
0.5077 17700 3.4027 - - - - -
0.5105 17800 3.6637 - - - - -
0.5134 17900 3.1682 - - - - -
0.5163 18000 3.2303 - - - - -
0.5191 18100 3.2155 - - - - -
0.5220 18200 3.431 - - - - -
0.5249 18300 3.1019 - - - - -
0.5277 18400 3.5245 - - - - -
0.5306 18500 3.1072 - - - - -
0.5335 18600 2.9673 - - - - -
0.5363 18700 3.0401 - - - - -
0.5392 18800 3.0617 - - - - -
0.5421 18900 3.6658 - - - - -
0.5449 19000 3.5137 - - - - -
0.5478 19100 3.5897 - - - - -
0.5507 19200 2.8309 - - - - -
0.5535 19300 3.7047 - - - - -
0.5564 19400 3.3343 - - - - -
0.5593 19500 3.3689 - - - - -
0.5622 19600 3.1783 - - - - -
0.5650 19700 3.6135 - - - - -
0.5679 19800 3.5106 - - - - -
0.5708 19900 3.8416 - - - - -
0.5736 20000 3.1559 - - - - -
0.5765 20100 3.2931 - - - - -
0.5794 20200 3.2411 - - - - -
0.5822 20300 3.5898 - - - - -
0.5851 20400 3.2916 - - - - -
0.5880 20500 3.619 - - - - -
0.5908 20600 3.8023 - - - - -
0.5937 20700 3.1023 - - - - -
0.5966 20800 3.2682 - - - - -
0.5994 20900 2.9783 - - - - -
0.6023 21000 3.1373 - - - - -
0.6052 21100 3.5358 - - - - -
0.6080 21200 3.2374 - - - - -
0.6109 21300 3.6793 - - - - -
0.6138 21400 3.388 - - - - -
0.6166 21500 3.1295 - - - - -
0.6195 21600 3.7971 - - - - -
0.6224 21700 3.4638 - - - - -
0.6253 21800 3.1254 - - - - -
0.6281 21900 3.705 - - - - -
0.6310 22000 2.9319 - - - - -
0.6339 22100 3.6908 - - - - -
0.6367 22200 3.3938 - - - - -
0.6396 22300 3.389 - - - - -
0.6425 22400 2.9946 - - - - -
0.6453 22500 3.9109 - - - - -
0.6482 22600 3.4698 - - - - -
0.6511 22700 3.1229 - - - - -
0.6539 22800 3.3769 - - - - -
0.6568 22900 3.1849 - - - - -
0.6597 23000 3.4464 - - - - -
0.6625 23100 2.9192 - - - - -
0.6654 23200 3.0796 - - - - -
0.6683 23300 3.4603 - - - - -
0.6711 23400 3.6775 - - - - -
0.6740 23500 3.5132 - - - - -
0.6769 23600 3.7764 - - - - -
0.6797 23700 3.0643 - - - - -
0.6826 23800 3.1545 - - - - -
0.6855 23900 2.997 - - - - -
0.6883 24000 3.1385 - - - - -
0.6912 24100 3.3879 - - - - -
0.6941 24200 3.5442 - - - - -
0.6970 24300 3.3687 - - - - -
0.6998 24400 3.4195 - - - - -
0.7027 24500 3.4057 - - - - -
0.7056 24600 3.2503 - - - - -
0.7084 24700 3.3703 - - - - -
0.7113 24800 3.0839 - - - - -
0.7142 24900 3.11 - - - - -
0.7170 25000 3.1105 - - - - -
0.7199 25100 2.8735 - - - - -
0.7228 25200 3.0287 - - - - -
0.7256 25300 3.2992 - - - - -
0.7285 25400 3.2015 - - - - -
0.7314 25500 3.3135 - - - - -
0.7342 25600 3.1618 - - - - -
0.7371 25700 3.5939 - - - - -
0.7400 25800 2.9016 - - - - -
0.7428 25900 3.2528 - - - - -
0.7457 26000 3.5005 - - - - -
0.7486 26100 3.2494 - - - - -
0.7514 26200 2.618 - - - - -
0.7543 26300 4.3413 - - - - -
0.7572 26400 4.0215 - - - - -
0.7601 26500 3.6406 - - - - -
0.7629 26600 3.6815 - - - - -
0.7658 26700 3.6911 - - - - -
0.7687 26800 3.3901 - - - - -
0.7715 26900 3.7262 - - - - -
0.7744 27000 3.3099 - - - - -
0.7773 27100 3.2131 - - - - -
0.7801 27200 3.1818 - - - - -
0.7830 27300 3.3306 - - - - -
0.7859 27400 3.4347 - - - - -
0.7887 27500 3.1169 - - - - -
0.7916 27600 3.2788 - - - - -
0.7945 27700 3.3876 - - - - -
0.7973 27800 3.0329 - - - - -
0.8002 27900 2.9935 - - - - -
0.8031 28000 3.0313 - - - - -
0.8059 28100 3.0293 - - - - -
0.8088 28200 3.0225 - - - - -
0.8117 28300 2.9378 - - - - -
0.8145 28400 2.8588 - - - - -
0.8174 28500 3.0936 - - - - -
0.8203 28600 2.9192 - - - - -
0.8232 28700 3.0259 - - - - -
0.8260 28800 2.76 - - - - -
0.8289 28900 3.0673 - - - - -
0.8318 29000 2.9333 - - - - -
0.8346 29100 2.9847 - - - - -
0.8375 29200 2.9882 - - - - -
0.8404 29300 2.9578 - - - - -
0.8432 29400 2.8535 - - - - -
0.8461 29500 3.012 - - - - -
0.8490 29600 2.6693 - - - - -
0.8518 29700 2.9026 - - - - -
0.8547 29800 2.7965 - - - - -
0.8576 29900 2.8402 - - - - -
0.8604 30000 2.6286 - - - - -
0.8633 30100 2.6588 - - - - -
0.8662 30200 2.6185 - - - - -
0.8690 30300 2.785 - - - - -
0.8719 30400 2.7637 - - - - -
0.8748 30500 2.8271 - - - - -
0.8776 30600 2.6788 - - - - -
0.8805 30700 2.5934 - - - - -
0.8834 30800 2.7782 - - - - -
0.8863 30900 2.7925 - - - - -
0.8891 31000 2.6091 - - - - -
0.8920 31100 2.7123 - - - - -
0.8949 31200 2.6067 - - - - -
0.8977 31300 2.65 - - - - -
0.9006 31400 2.7695 - - - - -
0.9035 31500 2.7075 - - - - -
0.9063 31600 2.5539 - - - - -
0.9092 31700 2.5283 - - - - -
0.9121 31800 2.7156 - - - - -
0.9149 31900 2.4318 - - - - -
0.9178 32000 2.7335 - - - - -
0.9207 32100 2.4435 - - - - -
0.9235 32200 2.6529 - - - - -
0.9264 32300 2.568 - - - - -
0.9293 32400 2.5639 - - - - -
0.9321 32500 2.6727 - - - - -
0.9350 32600 2.5063 - - - - -
0.9379 32700 2.5447 - - - - -
0.9407 32800 2.5767 - - - - -
0.9436 32900 2.5155 - - - - -
0.9465 33000 2.4016 - - - - -
0.9493 33100 2.7624 - - - - -
0.9522 33200 2.5887 - - - - -
0.9551 33300 2.5945 - - - - -
0.9580 33400 2.4295 - - - - -
0.9608 33500 2.6082 - - - - -
0.9637 33600 2.5034 - - - - -
0.9666 33700 2.5149 - - - - -
0.9694 33800 2.5311 - - - - -
0.9723 33900 2.6413 - - - - -
0.9752 34000 2.6304 - - - - -
0.9780 34100 2.5159 - - - - -
0.9809 34200 2.701 - - - - -
0.9838 34300 2.3928 - - - - -
0.9866 34400 2.5428 - - - - -
0.9895 34500 2.4652 - - - - -
0.9924 34600 2.7281 - - - - -
0.9952 34700 2.4693 - - - - -
0.9981 34800 2.4129 - - - - -
1.0 34866 - 0.6968 0.7001 0.7036 0.6849 0.7052

Framework Versions

  • Python: 3.11.9
  • Sentence Transformers: 3.0.1
  • Transformers: 4.40.1
  • PyTorch: 2.3.0+cu121
  • Accelerate: 0.29.3
  • Datasets: 2.19.0
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}