Skip to content

Commit

Permalink
Add BuiltBench results (a benchmark proposed for engineering domain) (#…
Browse files Browse the repository at this point in the history
…110)

* Add BuiltBench results (related mteb PR: embeddings-benchmark/mteb#1911)

* add initial  results for proposed tasks
* update paths.json

* Update model_meta files modified in BuiltBench PR: #110

* rollback paths.json (see PR: #110)
  • Loading branch information
mehrzadshm authored Feb 7, 2025
1 parent e70c872 commit 619836b
Show file tree
Hide file tree
Showing 78 changed files with 4,008 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "919bb71053e9de62a68998161ce4f0cee8f786fb",
"task_name": "BuiltBenchClusteringP2P",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"v_measure": 0.517757,
"v_measure_std": 0.158396,
"v_measures": [
0.388885,
0.517974,
0.461875,
0.342251,
0.389993,
0.517478,
0.549504,
0.523524,
0.732158,
0.08921,
0.635176,
0.799711,
0.633224,
0.46089,
0.409979,
0.495442,
0.531105,
0.582254,
0.498222,
0.796294
],
"main_score": 0.517757,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 30.256697416305542,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"dataset_revision": "1aaeb2ece89ea0a8c64e215c95c4cfaf7e891149",
"task_name": "BuiltBenchClusteringS2S",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"v_measure": 0.430005,
"v_measure_std": 0.103333,
"v_measures": [
0.461463,
0.547957,
0.400753,
0.367827,
0.288828,
0.443991,
0.424877,
0.560499,
0.389418,
0.440851,
0.470388,
0.608706,
0.54972,
0.48226,
0.241198,
0.287907,
0.278523,
0.49492
],
"main_score": 0.430005,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 5.229267597198486,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"dataset_revision": "fd33b0b3454deb256be06a57e8147b32ba078ff9",
"task_name": "BuiltBenchReranking",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"map": 0.633789,
"mrr": 0.800539,
"nAUC_map_max": 0.385108,
"nAUC_map_std": 0.61852,
"nAUC_map_diff1": 0.033195,
"nAUC_mrr_max": 0.368781,
"nAUC_mrr_std": 0.489594,
"nAUC_mrr_diff1": 0.137947,
"main_score": 0.633789,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 18.513643503189087,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"dataset_revision": "ae611238a58dae85f3130563fe9f9e995444a8d6",
"task_name": "BuiltBenchRetrieval",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"ndcg_at_1": 0.56886,
"ndcg_at_3": 0.58011,
"ndcg_at_5": 0.59022,
"ndcg_at_10": 0.61648,
"ndcg_at_20": 0.6495,
"ndcg_at_100": 0.70197,
"ndcg_at_1000": 0.72556,
"map_at_1": 0.28683,
"map_at_3": 0.40405,
"map_at_5": 0.44666,
"map_at_10": 0.49715,
"map_at_20": 0.53351,
"map_at_100": 0.56928,
"map_at_1000": 0.57555,
"recall_at_1": 0.28683,
"recall_at_3": 0.44487,
"recall_at_5": 0.52117,
"recall_at_10": 0.63682,
"recall_at_20": 0.7531,
"recall_at_100": 0.91812,
"recall_at_1000": 0.99875,
"precision_at_1": 0.56886,
"precision_at_3": 0.41018,
"precision_at_5": 0.33832,
"precision_at_10": 0.25,
"precision_at_20": 0.17979,
"precision_at_100": 0.06186,
"precision_at_1000": 0.0082,
"mrr_at_1": 0.568862,
"mrr_at_3": 0.641717,
"mrr_at_5": 0.655639,
"mrr_at_10": 0.664422,
"mrr_at_20": 0.668248,
"mrr_at_100": 0.669901,
"mrr_at_1000": 0.670052,
"nauc_ndcg_at_1_max": 0.657615,
"nauc_ndcg_at_1_std": -0.042279,
"nauc_ndcg_at_1_diff1": 0.460099,
"nauc_ndcg_at_3_max": 0.614084,
"nauc_ndcg_at_3_std": -0.076575,
"nauc_ndcg_at_3_diff1": 0.39682,
"nauc_ndcg_at_5_max": 0.610741,
"nauc_ndcg_at_5_std": -0.092122,
"nauc_ndcg_at_5_diff1": 0.417567,
"nauc_ndcg_at_10_max": 0.624393,
"nauc_ndcg_at_10_std": -0.077477,
"nauc_ndcg_at_10_diff1": 0.40211,
"nauc_ndcg_at_20_max": 0.634024,
"nauc_ndcg_at_20_std": -0.047231,
"nauc_ndcg_at_20_diff1": 0.416927,
"nauc_ndcg_at_100_max": 0.641933,
"nauc_ndcg_at_100_std": 0.002091,
"nauc_ndcg_at_100_diff1": 0.421078,
"nauc_ndcg_at_1000_max": 0.62616,
"nauc_ndcg_at_1000_std": -0.029943,
"nauc_ndcg_at_1000_diff1": 0.408794,
"nauc_map_at_1_max": 0.46745,
"nauc_map_at_1_std": -0.181416,
"nauc_map_at_1_diff1": 0.54144,
"nauc_map_at_3_max": 0.551756,
"nauc_map_at_3_std": -0.170018,
"nauc_map_at_3_diff1": 0.494867,
"nauc_map_at_5_max": 0.575965,
"nauc_map_at_5_std": -0.156605,
"nauc_map_at_5_diff1": 0.483961,
"nauc_map_at_10_max": 0.600074,
"nauc_map_at_10_std": -0.126368,
"nauc_map_at_10_diff1": 0.453,
"nauc_map_at_20_max": 0.619331,
"nauc_map_at_20_std": -0.100292,
"nauc_map_at_20_diff1": 0.447259,
"nauc_map_at_100_max": 0.632346,
"nauc_map_at_100_std": -0.061719,
"nauc_map_at_100_diff1": 0.434954,
"nauc_map_at_1000_max": 0.627738,
"nauc_map_at_1000_std": -0.063215,
"nauc_map_at_1000_diff1": 0.430271,
"nauc_recall_at_1_max": 0.46745,
"nauc_recall_at_1_std": -0.181416,
"nauc_recall_at_1_diff1": 0.54144,
"nauc_recall_at_3_max": 0.506602,
"nauc_recall_at_3_std": -0.184331,
"nauc_recall_at_3_diff1": 0.45311,
"nauc_recall_at_5_max": 0.52077,
"nauc_recall_at_5_std": -0.176895,
"nauc_recall_at_5_diff1": 0.447712,
"nauc_recall_at_10_max": 0.515653,
"nauc_recall_at_10_std": -0.124786,
"nauc_recall_at_10_diff1": 0.371061,
"nauc_recall_at_20_max": 0.534895,
"nauc_recall_at_20_std": -0.040026,
"nauc_recall_at_20_diff1": 0.368425,
"nauc_recall_at_100_max": 0.639816,
"nauc_recall_at_100_std": 0.308272,
"nauc_recall_at_100_diff1": 0.395279,
"nauc_recall_at_1000_max": 0.896105,
"nauc_recall_at_1000_std": 0.781033,
"nauc_recall_at_1000_diff1": 0.267489,
"nauc_precision_at_1_max": 0.657615,
"nauc_precision_at_1_std": -0.042279,
"nauc_precision_at_1_diff1": 0.460099,
"nauc_precision_at_3_max": 0.364233,
"nauc_precision_at_3_std": 0.074636,
"nauc_precision_at_3_diff1": 0.02891,
"nauc_precision_at_5_max": 0.236649,
"nauc_precision_at_5_std": 0.108568,
"nauc_precision_at_5_diff1": -0.066509,
"nauc_precision_at_10_max": 0.122793,
"nauc_precision_at_10_std": 0.158997,
"nauc_precision_at_10_diff1": -0.175302,
"nauc_precision_at_20_max": 0.029816,
"nauc_precision_at_20_std": 0.199538,
"nauc_precision_at_20_diff1": -0.201348,
"nauc_precision_at_100_max": -0.116511,
"nauc_precision_at_100_std": 0.214691,
"nauc_precision_at_100_diff1": -0.239711,
"nauc_precision_at_1000_max": -0.224998,
"nauc_precision_at_1000_std": 0.082654,
"nauc_precision_at_1000_diff1": -0.27228,
"nauc_mrr_at_1_max": 0.657615,
"nauc_mrr_at_1_std": -0.042279,
"nauc_mrr_at_1_diff1": 0.460099,
"nauc_mrr_at_3_max": 0.647651,
"nauc_mrr_at_3_std": -0.031469,
"nauc_mrr_at_3_diff1": 0.421722,
"nauc_mrr_at_5_max": 0.653044,
"nauc_mrr_at_5_std": -0.03276,
"nauc_mrr_at_5_diff1": 0.427097,
"nauc_mrr_at_10_max": 0.651649,
"nauc_mrr_at_10_std": -0.025527,
"nauc_mrr_at_10_diff1": 0.425706,
"nauc_mrr_at_20_max": 0.650775,
"nauc_mrr_at_20_std": -0.024984,
"nauc_mrr_at_20_diff1": 0.425565,
"nauc_mrr_at_100_max": 0.651302,
"nauc_mrr_at_100_std": -0.026192,
"nauc_mrr_at_100_diff1": 0.42767,
"nauc_mrr_at_1000_max": 0.651396,
"nauc_mrr_at_1000_std": -0.02623,
"nauc_mrr_at_1000_diff1": 0.42765,
"main_score": 0.61648,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 21.189420461654663,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "BAAI/bge-base-en-v1.5", "revision": "a5beb1e3e68b9ab74eb54cfd186867f64f240e1a", "release_date": "2023-09-11", "languages": ["eng_Latn"], "n_parameters": 438000000, "max_tokens": 512.0, "embed_dim": 768, "license": "mit", "open_weights": true, "public_training_code": null, "framework": ["Sentence Transformers", "PyTorch"], "reference": "https://huggingface.co/BAAI/bge-base-en-v1.5", "similarity_fn_name": "cosine", "use_instructions": true, "training_datasets": {"NQ": ["test"], "NQHardNegatives": ["test"], "AmazonReviewsClassification": ["validation", "test"], "MLQARetrieval": ["validation", "test"]}, "adapted_from": null, "superseded_by": null, "loader": "sentence_transformers_loader"}
{"name": "BAAI/bge-base-en-v1.5", "revision": "a5beb1e3e68b9ab74eb54cfd186867f64f240e1a", "release_date": "2023-09-11", "languages": ["eng_Latn"], "n_parameters": 109000000, "max_tokens": 512.0, "embed_dim": 768, "license": "mit", "open_weights": true, "public_training_code": null, "public_training_data": "https://data.baai.ac.cn/details/BAAI-MTP", "framework": ["Sentence Transformers", "PyTorch"], "reference": "https://huggingface.co/BAAI/bge-base-en-v1.5", "similarity_fn_name": "cosine", "use_instructions": true, "training_datasets": {"NQ": ["test"], "NQ-NL": ["test"], "NQHardNegatives": ["test"], "AmazonReviewsClassification": ["validation", "test"], "MLQARetrieval": ["validation", "test"]}, "adapted_from": null, "superseded_by": null, "modalities": ["text"], "loader": "sentence_transformers_loader"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"dataset_revision": "919bb71053e9de62a68998161ce4f0cee8f786fb",
"task_name": "BuiltBenchClusteringP2P",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"v_measure": 0.524083,
"v_measure_std": 0.131864,
"v_measures": [
0.363593,
0.502413,
0.45769,
0.468297,
0.386542,
0.404564,
0.623353,
0.675208,
0.706091,
0.386532,
0.490226,
0.799711,
0.537188,
0.424919,
0.35574,
0.510183,
0.73475,
0.403612,
0.603789,
0.647252
],
"main_score": 0.524083,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 94.42164897918701,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"dataset_revision": "1aaeb2ece89ea0a8c64e215c95c4cfaf7e891149",
"task_name": "BuiltBenchClusteringS2S",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"v_measure": 0.466924,
"v_measure_std": 0.126581,
"v_measures": [
0.301491,
0.545938,
0.460741,
0.528819,
0.341869,
0.437427,
0.380774,
0.589232,
0.499215,
0.655186,
0.575787,
0.741065,
0.538618,
0.473277,
0.253986,
0.32315,
0.348168,
0.409886
],
"main_score": 0.466924,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 14.017496347427368,
"kg_co2_emissions": null
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"dataset_revision": "fd33b0b3454deb256be06a57e8147b32ba078ff9",
"task_name": "BuiltBenchReranking",
"mteb_version": "1.31.5",
"scores": {
"test": [
{
"map": 0.654691,
"mrr": 0.851443,
"nAUC_map_max": 0.415067,
"nAUC_map_std": 0.442405,
"nAUC_map_diff1": 0.053061,
"nAUC_mrr_max": 0.301693,
"nAUC_mrr_std": 0.138653,
"nAUC_mrr_diff1": 0.11613,
"main_score": 0.654691,
"hf_subset": "default",
"languages": [
"eng-Latn"
]
}
]
},
"evaluation_time": 59.27251124382019,
"kg_co2_emissions": null
}
Loading

0 comments on commit 619836b

Please sign in to comment.