|
--- |
|
tags: |
|
- mteb |
|
model-index: |
|
- name: xiaobu-embedding |
|
results: |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/AFQMC |
|
name: MTEB AFQMC |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 49.37874132528482 |
|
- type: cos_sim_spearman |
|
value: 54.84722470052176 |
|
- type: euclidean_pearson |
|
value: 53.0495882931575 |
|
- type: euclidean_spearman |
|
value: 54.847727301700665 |
|
- type: manhattan_pearson |
|
value: 53.0632140838278 |
|
- type: manhattan_spearman |
|
value: 54.8744258024692 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/ATEC |
|
name: MTEB ATEC |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 48.15992903013723 |
|
- type: cos_sim_spearman |
|
value: 55.13198035464577 |
|
- type: euclidean_pearson |
|
value: 55.435876753245715 |
|
- type: euclidean_spearman |
|
value: 55.13215936702871 |
|
- type: manhattan_pearson |
|
value: 55.41429518223402 |
|
- type: manhattan_spearman |
|
value: 55.13363087679285 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (zh) |
|
config: zh |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 46.722 |
|
- type: f1 |
|
value: 45.039340641893205 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/BQ |
|
name: MTEB BQ |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 63.517830355554224 |
|
- type: cos_sim_spearman |
|
value: 65.57007801018649 |
|
- type: euclidean_pearson |
|
value: 64.05153340906585 |
|
- type: euclidean_spearman |
|
value: 65.5696865661119 |
|
- type: manhattan_pearson |
|
value: 63.95710619755406 |
|
- type: manhattan_spearman |
|
value: 65.48565785379489 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/CLSClusteringP2P |
|
name: MTEB CLSClusteringP2P |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 43.24046498507819 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/CLSClusteringS2S |
|
name: MTEB CLSClusteringS2S |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 41.22618199372116 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/CMedQAv1-reranking |
|
name: MTEB CMedQAv1 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 87.12213224673621 |
|
- type: mrr |
|
value: 89.57150793650794 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/CMedQAv2-reranking |
|
name: MTEB CMedQAv2 |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 87.57290061886421 |
|
- type: mrr |
|
value: 90.19202380952382 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CmedqaRetrieval |
|
name: MTEB CmedqaRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 25.22 |
|
- type: map_at_10 |
|
value: 37.604 |
|
- type: map_at_100 |
|
value: 39.501 |
|
- type: map_at_1000 |
|
value: 39.614 |
|
- type: map_at_3 |
|
value: 33.378 |
|
- type: map_at_5 |
|
value: 35.774 |
|
- type: mrr_at_1 |
|
value: 38.385000000000005 |
|
- type: mrr_at_10 |
|
value: 46.487 |
|
- type: mrr_at_100 |
|
value: 47.504999999999995 |
|
- type: mrr_at_1000 |
|
value: 47.548 |
|
- type: mrr_at_3 |
|
value: 43.885999999999996 |
|
- type: mrr_at_5 |
|
value: 45.373000000000005 |
|
- type: ndcg_at_1 |
|
value: 38.385000000000005 |
|
- type: ndcg_at_10 |
|
value: 44.224999999999994 |
|
- type: ndcg_at_100 |
|
value: 51.637 |
|
- type: ndcg_at_1000 |
|
value: 53.55799999999999 |
|
- type: ndcg_at_3 |
|
value: 38.845 |
|
- type: ndcg_at_5 |
|
value: 41.163 |
|
- type: precision_at_1 |
|
value: 38.385000000000005 |
|
- type: precision_at_10 |
|
value: 9.812 |
|
- type: precision_at_100 |
|
value: 1.58 |
|
- type: precision_at_1000 |
|
value: 0.183 |
|
- type: precision_at_3 |
|
value: 21.88 |
|
- type: precision_at_5 |
|
value: 15.974 |
|
- type: recall_at_1 |
|
value: 25.22 |
|
- type: recall_at_10 |
|
value: 54.897 |
|
- type: recall_at_100 |
|
value: 85.469 |
|
- type: recall_at_1000 |
|
value: 98.18599999999999 |
|
- type: recall_at_3 |
|
value: 38.815 |
|
- type: recall_at_5 |
|
value: 45.885 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: C-MTEB/CMNLI |
|
name: MTEB Cmnli |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 83.22309079975948 |
|
- type: cos_sim_ap |
|
value: 89.94833400328307 |
|
- type: cos_sim_f1 |
|
value: 84.39319055464031 |
|
- type: cos_sim_precision |
|
value: 79.5774647887324 |
|
- type: cos_sim_recall |
|
value: 89.82931961655366 |
|
- type: dot_accuracy |
|
value: 83.22309079975948 |
|
- type: dot_ap |
|
value: 89.95618559578415 |
|
- type: dot_f1 |
|
value: 84.41173239591345 |
|
- type: dot_precision |
|
value: 79.61044343141317 |
|
- type: dot_recall |
|
value: 89.82931961655366 |
|
- type: euclidean_accuracy |
|
value: 83.23511725796753 |
|
- type: euclidean_ap |
|
value: 89.94836342787318 |
|
- type: euclidean_f1 |
|
value: 84.40550133096718 |
|
- type: euclidean_precision |
|
value: 80.29120067524794 |
|
- type: euclidean_recall |
|
value: 88.9642272620996 |
|
- type: manhattan_accuracy |
|
value: 83.23511725796753 |
|
- type: manhattan_ap |
|
value: 89.9450103956978 |
|
- type: manhattan_f1 |
|
value: 84.44444444444444 |
|
- type: manhattan_precision |
|
value: 80.09647651006712 |
|
- type: manhattan_recall |
|
value: 89.29155950432546 |
|
- type: max_accuracy |
|
value: 83.23511725796753 |
|
- type: max_ap |
|
value: 89.95618559578415 |
|
- type: max_f1 |
|
value: 84.44444444444444 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/CovidRetrieval |
|
name: MTEB CovidRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 76.87 |
|
- type: map_at_10 |
|
value: 84.502 |
|
- type: map_at_100 |
|
value: 84.615 |
|
- type: map_at_1000 |
|
value: 84.617 |
|
- type: map_at_3 |
|
value: 83.127 |
|
- type: map_at_5 |
|
value: 83.99600000000001 |
|
- type: mrr_at_1 |
|
value: 77.02799999999999 |
|
- type: mrr_at_10 |
|
value: 84.487 |
|
- type: mrr_at_100 |
|
value: 84.59299999999999 |
|
- type: mrr_at_1000 |
|
value: 84.59400000000001 |
|
- type: mrr_at_3 |
|
value: 83.193 |
|
- type: mrr_at_5 |
|
value: 83.994 |
|
- type: ndcg_at_1 |
|
value: 77.134 |
|
- type: ndcg_at_10 |
|
value: 87.68599999999999 |
|
- type: ndcg_at_100 |
|
value: 88.17099999999999 |
|
- type: ndcg_at_1000 |
|
value: 88.21 |
|
- type: ndcg_at_3 |
|
value: 84.993 |
|
- type: ndcg_at_5 |
|
value: 86.519 |
|
- type: precision_at_1 |
|
value: 77.134 |
|
- type: precision_at_10 |
|
value: 9.841999999999999 |
|
- type: precision_at_100 |
|
value: 1.006 |
|
- type: precision_at_1000 |
|
value: 0.101 |
|
- type: precision_at_3 |
|
value: 30.313000000000002 |
|
- type: precision_at_5 |
|
value: 18.945999999999998 |
|
- type: recall_at_1 |
|
value: 76.87 |
|
- type: recall_at_10 |
|
value: 97.418 |
|
- type: recall_at_100 |
|
value: 99.579 |
|
- type: recall_at_1000 |
|
value: 99.895 |
|
- type: recall_at_3 |
|
value: 90.227 |
|
- type: recall_at_5 |
|
value: 93.888 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/DuRetrieval |
|
name: MTEB DuRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 25.941 |
|
- type: map_at_10 |
|
value: 78.793 |
|
- type: map_at_100 |
|
value: 81.57799999999999 |
|
- type: map_at_1000 |
|
value: 81.626 |
|
- type: map_at_3 |
|
value: 54.749 |
|
- type: map_at_5 |
|
value: 69.16 |
|
- type: mrr_at_1 |
|
value: 90.45 |
|
- type: mrr_at_10 |
|
value: 93.406 |
|
- type: mrr_at_100 |
|
value: 93.453 |
|
- type: mrr_at_1000 |
|
value: 93.45700000000001 |
|
- type: mrr_at_3 |
|
value: 93.10000000000001 |
|
- type: mrr_at_5 |
|
value: 93.27499999999999 |
|
- type: ndcg_at_1 |
|
value: 90.45 |
|
- type: ndcg_at_10 |
|
value: 86.44500000000001 |
|
- type: ndcg_at_100 |
|
value: 89.28399999999999 |
|
- type: ndcg_at_1000 |
|
value: 89.739 |
|
- type: ndcg_at_3 |
|
value: 85.62100000000001 |
|
- type: ndcg_at_5 |
|
value: 84.441 |
|
- type: precision_at_1 |
|
value: 90.45 |
|
- type: precision_at_10 |
|
value: 41.19 |
|
- type: precision_at_100 |
|
value: 4.761 |
|
- type: precision_at_1000 |
|
value: 0.48700000000000004 |
|
- type: precision_at_3 |
|
value: 76.583 |
|
- type: precision_at_5 |
|
value: 64.68 |
|
- type: recall_at_1 |
|
value: 25.941 |
|
- type: recall_at_10 |
|
value: 87.443 |
|
- type: recall_at_100 |
|
value: 96.54 |
|
- type: recall_at_1000 |
|
value: 98.906 |
|
- type: recall_at_3 |
|
value: 56.947 |
|
- type: recall_at_5 |
|
value: 73.714 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/EcomRetrieval |
|
name: MTEB EcomRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 52.900000000000006 |
|
- type: map_at_10 |
|
value: 63.144 |
|
- type: map_at_100 |
|
value: 63.634 |
|
- type: map_at_1000 |
|
value: 63.644999999999996 |
|
- type: map_at_3 |
|
value: 60.817 |
|
- type: map_at_5 |
|
value: 62.202 |
|
- type: mrr_at_1 |
|
value: 52.900000000000006 |
|
- type: mrr_at_10 |
|
value: 63.144 |
|
- type: mrr_at_100 |
|
value: 63.634 |
|
- type: mrr_at_1000 |
|
value: 63.644999999999996 |
|
- type: mrr_at_3 |
|
value: 60.817 |
|
- type: mrr_at_5 |
|
value: 62.202 |
|
- type: ndcg_at_1 |
|
value: 52.900000000000006 |
|
- type: ndcg_at_10 |
|
value: 68.042 |
|
- type: ndcg_at_100 |
|
value: 70.417 |
|
- type: ndcg_at_1000 |
|
value: 70.722 |
|
- type: ndcg_at_3 |
|
value: 63.287000000000006 |
|
- type: ndcg_at_5 |
|
value: 65.77 |
|
- type: precision_at_1 |
|
value: 52.900000000000006 |
|
- type: precision_at_10 |
|
value: 8.34 |
|
- type: precision_at_100 |
|
value: 0.9450000000000001 |
|
- type: precision_at_1000 |
|
value: 0.097 |
|
- type: precision_at_3 |
|
value: 23.467 |
|
- type: precision_at_5 |
|
value: 15.28 |
|
- type: recall_at_1 |
|
value: 52.900000000000006 |
|
- type: recall_at_10 |
|
value: 83.39999999999999 |
|
- type: recall_at_100 |
|
value: 94.5 |
|
- type: recall_at_1000 |
|
value: 96.89999999999999 |
|
- type: recall_at_3 |
|
value: 70.39999999999999 |
|
- type: recall_at_5 |
|
value: 76.4 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/IFlyTek-classification |
|
name: MTEB IFlyTek |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 49.74220854174683 |
|
- type: f1 |
|
value: 38.01399980618159 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/JDReview-classification |
|
name: MTEB JDReview |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 86.73545966228893 |
|
- type: ap |
|
value: 55.72394235169542 |
|
- type: f1 |
|
value: 81.58550390953492 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/LCQMC |
|
name: MTEB LCQMC |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 69.96711977441642 |
|
- type: cos_sim_spearman |
|
value: 75.54747609685569 |
|
- type: euclidean_pearson |
|
value: 74.62663478056035 |
|
- type: euclidean_spearman |
|
value: 75.54761576699639 |
|
- type: manhattan_pearson |
|
value: 74.60983904582241 |
|
- type: manhattan_spearman |
|
value: 75.52758938061503 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/Mmarco-reranking |
|
name: MTEB MMarcoReranking |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 28.076927649720986 |
|
- type: mrr |
|
value: 26.98015873015873 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MMarcoRetrieval |
|
name: MTEB MMarcoRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 65.58 |
|
- type: map_at_10 |
|
value: 74.763 |
|
- type: map_at_100 |
|
value: 75.077 |
|
- type: map_at_1000 |
|
value: 75.091 |
|
- type: map_at_3 |
|
value: 72.982 |
|
- type: map_at_5 |
|
value: 74.155 |
|
- type: mrr_at_1 |
|
value: 67.822 |
|
- type: mrr_at_10 |
|
value: 75.437 |
|
- type: mrr_at_100 |
|
value: 75.702 |
|
- type: mrr_at_1000 |
|
value: 75.715 |
|
- type: mrr_at_3 |
|
value: 73.91799999999999 |
|
- type: mrr_at_5 |
|
value: 74.909 |
|
- type: ndcg_at_1 |
|
value: 67.822 |
|
- type: ndcg_at_10 |
|
value: 78.472 |
|
- type: ndcg_at_100 |
|
value: 79.891 |
|
- type: ndcg_at_1000 |
|
value: 80.262 |
|
- type: ndcg_at_3 |
|
value: 75.138 |
|
- type: ndcg_at_5 |
|
value: 77.094 |
|
- type: precision_at_1 |
|
value: 67.822 |
|
- type: precision_at_10 |
|
value: 9.474 |
|
- type: precision_at_100 |
|
value: 1.019 |
|
- type: precision_at_1000 |
|
value: 0.105 |
|
- type: precision_at_3 |
|
value: 28.281 |
|
- type: precision_at_5 |
|
value: 18.017 |
|
- type: recall_at_1 |
|
value: 65.58 |
|
- type: recall_at_10 |
|
value: 89.18599999999999 |
|
- type: recall_at_100 |
|
value: 95.64399999999999 |
|
- type: recall_at_1000 |
|
value: 98.541 |
|
- type: recall_at_3 |
|
value: 80.455 |
|
- type: recall_at_5 |
|
value: 85.063 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (zh-CN) |
|
config: zh-CN |
|
split: test |
|
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
|
metrics: |
|
- type: accuracy |
|
value: 72.86819098856758 |
|
- type: f1 |
|
value: 70.25369778283451 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (zh-CN) |
|
config: zh-CN |
|
split: test |
|
revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
|
metrics: |
|
- type: accuracy |
|
value: 75.46738399462004 |
|
- type: f1 |
|
value: 75.02466838130249 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/MedicalRetrieval |
|
name: MTEB MedicalRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 53.300000000000004 |
|
- type: map_at_10 |
|
value: 60.072 |
|
- type: map_at_100 |
|
value: 60.618 |
|
- type: map_at_1000 |
|
value: 60.659 |
|
- type: map_at_3 |
|
value: 58.550000000000004 |
|
- type: map_at_5 |
|
value: 59.425 |
|
- type: mrr_at_1 |
|
value: 53.5 |
|
- type: mrr_at_10 |
|
value: 60.187999999999995 |
|
- type: mrr_at_100 |
|
value: 60.73499999999999 |
|
- type: mrr_at_1000 |
|
value: 60.775999999999996 |
|
- type: mrr_at_3 |
|
value: 58.667 |
|
- type: mrr_at_5 |
|
value: 59.541999999999994 |
|
- type: ndcg_at_1 |
|
value: 53.300000000000004 |
|
- type: ndcg_at_10 |
|
value: 63.376999999999995 |
|
- type: ndcg_at_100 |
|
value: 66.24600000000001 |
|
- type: ndcg_at_1000 |
|
value: 67.408 |
|
- type: ndcg_at_3 |
|
value: 60.211000000000006 |
|
- type: ndcg_at_5 |
|
value: 61.781 |
|
- type: precision_at_1 |
|
value: 53.300000000000004 |
|
- type: precision_at_10 |
|
value: 7.380000000000001 |
|
- type: precision_at_100 |
|
value: 0.877 |
|
- type: precision_at_1000 |
|
value: 0.097 |
|
- type: precision_at_3 |
|
value: 21.667 |
|
- type: precision_at_5 |
|
value: 13.76 |
|
- type: recall_at_1 |
|
value: 53.300000000000004 |
|
- type: recall_at_10 |
|
value: 73.8 |
|
- type: recall_at_100 |
|
value: 87.7 |
|
- type: recall_at_1000 |
|
value: 97.0 |
|
- type: recall_at_3 |
|
value: 65.0 |
|
- type: recall_at_5 |
|
value: 68.8 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/MultilingualSentiment-classification |
|
name: MTEB MultilingualSentiment |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 76.27666666666667 |
|
- type: f1 |
|
value: 76.31280038435165 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: C-MTEB/OCNLI |
|
name: MTEB Ocnli |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 78.72225230102869 |
|
- type: cos_sim_ap |
|
value: 80.63941899467723 |
|
- type: cos_sim_f1 |
|
value: 80.52190121155638 |
|
- type: cos_sim_precision |
|
value: 72.06005004170142 |
|
- type: cos_sim_recall |
|
value: 91.23548046462513 |
|
- type: dot_accuracy |
|
value: 78.72225230102869 |
|
- type: dot_ap |
|
value: 80.63913939812744 |
|
- type: dot_f1 |
|
value: 80.51948051948052 |
|
- type: dot_precision |
|
value: 71.7948717948718 |
|
- type: dot_recall |
|
value: 91.65786694825766 |
|
- type: euclidean_accuracy |
|
value: 78.72225230102869 |
|
- type: euclidean_ap |
|
value: 80.64403797436798 |
|
- type: euclidean_f1 |
|
value: 80.52190121155638 |
|
- type: euclidean_precision |
|
value: 72.06005004170142 |
|
- type: euclidean_recall |
|
value: 91.23548046462513 |
|
- type: manhattan_accuracy |
|
value: 78.18083378451544 |
|
- type: manhattan_ap |
|
value: 80.5241189302444 |
|
- type: manhattan_f1 |
|
value: 80.43478260869566 |
|
- type: manhattan_precision |
|
value: 72.7972626176219 |
|
- type: manhattan_recall |
|
value: 89.86272439281943 |
|
- type: max_accuracy |
|
value: 78.72225230102869 |
|
- type: max_ap |
|
value: 80.64403797436798 |
|
- type: max_f1 |
|
value: 80.52190121155638 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/OnlineShopping-classification |
|
name: MTEB OnlineShopping |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 92.49000000000001 |
|
- type: ap |
|
value: 90.66330807324402 |
|
- type: f1 |
|
value: 92.48245049107115 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/PAWSX |
|
name: MTEB PAWSX |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 33.6275431596535 |
|
- type: cos_sim_spearman |
|
value: 37.865700050451494 |
|
- type: euclidean_pearson |
|
value: 38.1050665279388 |
|
- type: euclidean_spearman |
|
value: 37.864125056066364 |
|
- type: manhattan_pearson |
|
value: 38.11206873232881 |
|
- type: manhattan_spearman |
|
value: 37.852977098473936 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/QBQTC |
|
name: MTEB QBQTC |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 32.137955501231104 |
|
- type: cos_sim_spearman |
|
value: 33.68610910423116 |
|
- type: euclidean_pearson |
|
value: 32.155444753547926 |
|
- type: euclidean_spearman |
|
value: 33.685799252964124 |
|
- type: manhattan_pearson |
|
value: 32.14490855334317 |
|
- type: manhattan_spearman |
|
value: 33.656549820048554 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (zh) |
|
config: zh |
|
split: test |
|
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 63.63884916818661 |
|
- type: cos_sim_spearman |
|
value: 64.3217581571435 |
|
- type: euclidean_pearson |
|
value: 63.475760085926055 |
|
- type: euclidean_spearman |
|
value: 64.31638169371887 |
|
- type: manhattan_pearson |
|
value: 64.39677572604752 |
|
- type: manhattan_spearman |
|
value: 64.85585019406021 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: C-MTEB/STSB |
|
name: MTEB STSB |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 79.74698333415277 |
|
- type: cos_sim_spearman |
|
value: 81.1850043859317 |
|
- type: euclidean_pearson |
|
value: 80.94512578669881 |
|
- type: euclidean_spearman |
|
value: 81.18825478390181 |
|
- type: manhattan_pearson |
|
value: 80.88114336824758 |
|
- type: manhattan_spearman |
|
value: 81.12266715583868 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: C-MTEB/T2Reranking |
|
name: MTEB T2Reranking |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map |
|
value: 66.59971552953814 |
|
- type: mrr |
|
value: 76.42177408088038 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/T2Retrieval |
|
name: MTEB T2Retrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.825 |
|
- type: map_at_10 |
|
value: 77.48899999999999 |
|
- type: map_at_100 |
|
value: 81.144 |
|
- type: map_at_1000 |
|
value: 81.216 |
|
- type: map_at_3 |
|
value: 55.435 |
|
- type: map_at_5 |
|
value: 67.496 |
|
- type: mrr_at_1 |
|
value: 91.377 |
|
- type: mrr_at_10 |
|
value: 94.062 |
|
- type: mrr_at_100 |
|
value: 94.122 |
|
- type: mrr_at_1000 |
|
value: 94.123 |
|
- type: mrr_at_3 |
|
value: 93.709 |
|
- type: mrr_at_5 |
|
value: 93.932 |
|
- type: ndcg_at_1 |
|
value: 91.377 |
|
- type: ndcg_at_10 |
|
value: 85.44800000000001 |
|
- type: ndcg_at_100 |
|
value: 89.11099999999999 |
|
- type: ndcg_at_1000 |
|
value: 89.752 |
|
- type: ndcg_at_3 |
|
value: 87.262 |
|
- type: ndcg_at_5 |
|
value: 85.668 |
|
- type: precision_at_1 |
|
value: 91.377 |
|
- type: precision_at_10 |
|
value: 41.525 |
|
- type: precision_at_100 |
|
value: 4.989 |
|
- type: precision_at_1000 |
|
value: 0.516 |
|
- type: precision_at_3 |
|
value: 75.452 |
|
- type: precision_at_5 |
|
value: 62.785000000000004 |
|
- type: recall_at_1 |
|
value: 28.825 |
|
- type: recall_at_10 |
|
value: 84.202 |
|
- type: recall_at_100 |
|
value: 95.768 |
|
- type: recall_at_1000 |
|
value: 98.791 |
|
- type: recall_at_3 |
|
value: 57.284 |
|
- type: recall_at_5 |
|
value: 71.071 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/TNews-classification |
|
name: MTEB TNews |
|
config: default |
|
split: validation |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 52.160000000000004 |
|
- type: f1 |
|
value: 50.49492950548829 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/ThuNewsClusteringP2P |
|
name: MTEB ThuNewsClusteringP2P |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 70.06019845009966 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: C-MTEB/ThuNewsClusteringS2S |
|
name: MTEB ThuNewsClusteringS2S |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: v_measure |
|
value: 63.9370959228245 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: C-MTEB/VideoRetrieval |
|
name: MTEB VideoRetrieval |
|
config: default |
|
split: dev |
|
revision: None |
|
metrics: |
|
- type: map_at_1 |
|
value: 60.0 |
|
- type: map_at_10 |
|
value: 69.362 |
|
- type: map_at_100 |
|
value: 69.819 |
|
- type: map_at_1000 |
|
value: 69.833 |
|
- type: map_at_3 |
|
value: 67.783 |
|
- type: map_at_5 |
|
value: 68.71300000000001 |
|
- type: mrr_at_1 |
|
value: 60.0 |
|
- type: mrr_at_10 |
|
value: 69.362 |
|
- type: mrr_at_100 |
|
value: 69.819 |
|
- type: mrr_at_1000 |
|
value: 69.833 |
|
- type: mrr_at_3 |
|
value: 67.783 |
|
- type: mrr_at_5 |
|
value: 68.71300000000001 |
|
- type: ndcg_at_1 |
|
value: 60.0 |
|
- type: ndcg_at_10 |
|
value: 73.59400000000001 |
|
- type: ndcg_at_100 |
|
value: 75.734 |
|
- type: ndcg_at_1000 |
|
value: 76.049 |
|
- type: ndcg_at_3 |
|
value: 70.33 |
|
- type: ndcg_at_5 |
|
value: 72.033 |
|
- type: precision_at_1 |
|
value: 60.0 |
|
- type: precision_at_10 |
|
value: 8.67 |
|
- type: precision_at_100 |
|
value: 0.9650000000000001 |
|
- type: precision_at_1000 |
|
value: 0.099 |
|
- type: precision_at_3 |
|
value: 25.900000000000002 |
|
- type: precision_at_5 |
|
value: 16.38 |
|
- type: recall_at_1 |
|
value: 60.0 |
|
- type: recall_at_10 |
|
value: 86.7 |
|
- type: recall_at_100 |
|
value: 96.5 |
|
- type: recall_at_1000 |
|
value: 98.9 |
|
- type: recall_at_3 |
|
value: 77.7 |
|
- type: recall_at_5 |
|
value: 81.89999999999999 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: C-MTEB/waimai-classification |
|
name: MTEB Waimai |
|
config: default |
|
split: test |
|
revision: None |
|
metrics: |
|
- type: accuracy |
|
value: 88.36 |
|
- type: ap |
|
value: 73.25144216855439 |
|
- type: f1 |
|
value: 86.75076261442027 |
|
--- |
|
# xiaobu-embedding |
|
|
|
模型:基于GTE模型[1]多任务微调。 |
|
数据:闲聊类Query-Query、知识类Query-Doc、BGE开源Query-Doc[2];清洗正例,挖掘中等难度负例;累计6M(质量更重要)。 |
|
|
|
## Usage (Sentence-Transformers) |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
相似度计算: |
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
sentences_1 = ["样例数据-1", "样例数据-2"] |
|
sentences_2 = ["样例数据-3", "样例数据-4"] |
|
model = SentenceTransformer('lier007/xiaobu-embedding') |
|
embeddings_1 = model.encode(sentences_1, normalize_embeddings=True) |
|
embeddings_2 = model.encode(sentences_2, normalize_embeddings=True) |
|
similarity = embeddings_1 @ embeddings_2.T |
|
print(similarity) |
|
``` |
|
|
|
## Evaluation |
|
参考BGE中文CMTEB评估[2] |
|
|
|
## Finetune |
|
参考BGE微调模块[2] |
|
|
|
## Reference |
|
1. https://huggingface.co/thenlper/gte-large-zh |
|
2. https://github.com/FlagOpen/FlagEmbedding |