Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add datasets for a benchmark newly introduced for "Engineering" domain #1911

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"Blog",
"Constructed",
"Encyclopaedic",
"Engineering",
"Fiction",
"Government",
"Legal",
Expand Down Expand Up @@ -193,6 +194,7 @@
"cc-by-nc-sa-3.0",
"cc-by-nc-sa-4.0",
"cc-by-nc-nd-4.0",
"cc-by-nd-4.0",
"openrail",
"openrail++",
"odc-by",
Expand Down
21 changes: 21 additions & 0 deletions mteb/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1434,3 +1434,24 @@ def load_results(
url={https://arxiv.org/abs/2412.08329},
}""",
)

BUILT_MTEB = Benchmark(
name="BuiltBench(eng)",
tasks=get_tasks(
tasks=[
"BuiltBenchClusteringP2P",
"BuiltBenchClusteringS2S",
"BuiltBenchRetrieval",
"BuiltBenchReranking",
],
),
description="\"Built-Bench\" is an ongoing effort aimed at evaluating text embedding models in the context of buit asset management, spanning over various dicsiplines such as architeture, engineering, constrcution, and operations management of the built environment.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny typo

Suggested change
description="\"Built-Bench\" is an ongoing effort aimed at evaluating text embedding models in the context of buit asset management, spanning over various dicsiplines such as architeture, engineering, constrcution, and operations management of the built environment.",
description="\"Built-Bench\" is an ongoing effort aimed at evaluating text embedding models in the context of built asset management, spanning over various dicsiplines such as architeture, engineering, constrcution, and operations management of the built environment.",

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sharp eyes!

reference="https://arxiv.org/abs/2411.12056",
citation="""@article{shahinmoghadam2024benchmarking,
title={Benchmarking pre-trained text embedding models in aligning built asset information},
author={Shahinmoghadam, Mehrzad and Motamedi, Ali},
journal={arXiv preprint arXiv:2411.12056},
year={2024}
}""",
mehrzadshm marked this conversation as resolved.
Show resolved Hide resolved
contacts=["mehrzadshm"],
)
2 changes: 2 additions & 0 deletions mteb/tasks/Clustering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .eng.BigPatentClustering import *
from .eng.BiorxivClusteringP2P import *
from .eng.BiorxivClusteringS2S import *
from .eng.BuiltBenchClusteringP2P import *
from .eng.BuiltBenchClusteringS2S import *
from .eng.MedrxivClusteringP2P import *
from .eng.MedrxivClusteringS2S import *
from .eng.RedditClustering import *
Expand Down
36 changes: 36 additions & 0 deletions mteb/tasks/Clustering/eng/BuiltBenchClusteringP2P.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
from mteb.abstasks.TaskMetadata import TaskMetadata


class BuiltBenchClusteringP2P(AbsTaskClustering):
metadata = TaskMetadata(
name="BuiltBenchClusteringP2P",
description="Clustering of built asset item descriptions based on categories identified within industry classification systems such as IFC, Uniclass, etc.",
reference="https://arxiv.org/abs/2411.12056",
dataset={
"path": "mehrzad-shahin/BuiltBench-clustering-p2p",
"revision": "919bb71053e9de62a68998161ce4f0cee8f786fb",
},
type="Clustering",
category="p2p",
modalities=["text"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=("2024-06-01", "2024-11-30"),
domains=["Engineering", "Written"],
task_subtypes=["Thematic clustering"],
license="cc-by-nd-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="created",
bibtex_citation="""@article{shahinmoghadam2024benchmarking,
title={Benchmarking pre-trained text embedding models in aligning built asset information},
author={Shahinmoghadam, Mehrzad and Motamedi, Ali},
journal={arXiv preprint arXiv:2411.12056},
year={2024}
}""",
prompt="Identify the category of the built asset entities based on the entity description",
)
36 changes: 36 additions & 0 deletions mteb/tasks/Clustering/eng/BuiltBenchClusteringS2S.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
from mteb.abstasks.TaskMetadata import TaskMetadata


class BuiltBenchClusteringS2S(AbsTaskClustering):
metadata = TaskMetadata(
name="BuiltBenchClusteringS2S",
description="Clustering of built asset names/titles based on categories identified within industry classification systems such as IFC, Uniclass, etc.",
reference="https://arxiv.org/abs/2411.12056",
dataset={
"path": "mehrzad-shahin/BuiltBench-clustering-s2s",
"revision": "1aaeb2ece89ea0a8c64e215c95c4cfaf7e891149",
},
type="Clustering",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="v_measure",
date=("2024-06-01", "2024-11-30"),
domains=["Engineering", "Written"],
task_subtypes=["Thematic clustering"],
license="cc-by-nd-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="created",
bibtex_citation="""@article{shahinmoghadam2024benchmarking,
title={Benchmarking pre-trained text embedding models in aligning built asset information},
author={Shahinmoghadam, Mehrzad and Motamedi, Ali},
journal={arXiv preprint arXiv:2411.12056},
year={2024}
}""",
prompt="Identify the category of the built asset entities based on the names or titles",
)
1 change: 1 addition & 0 deletions mteb/tasks/Reranking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .ara.NamaaMrTydiReranking import *
from .eng.AskUbuntuDupQuestions import *
from .eng.BuiltBenchReranking import *
from .eng.MindSmallReranking import *
from .eng.SciDocsReranking import *
from .eng.StackOverflowDupQuestions import *
Expand Down
39 changes: 39 additions & 0 deletions mteb/tasks/Reranking/eng/BuiltBenchReranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking


class BuiltBenchReranking(AbsTaskReranking):
metadata = TaskMetadata(
name="BuiltBenchReranking",
description="Reranking of built asset entity type/class descriptions given a query describing an entity as represented in well-established industry classification systems such as Uniclass, IFC, etc.",
reference="https://arxiv.org/abs/2411.12056",
dataset={
"path": "mehrzad-shahin/BuiltBench-reranking",
"revision": "fd33b0b3454deb256be06a57e8147b32ba078ff9",
},
type="Reranking",
category="p2p",
modalities=["text"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="map",
date=("2024-06-01", "2024-11-30"),
domains=["Engineering", "Written"],
task_subtypes=[],
license="cc-by-nd-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="created",
bibtex_citation="""@article{shahinmoghadam2024benchmarking,
title={Benchmarking pre-trained text embedding models in aligning built asset information},
author={Shahinmoghadam, Mehrzad and Motamedi, Ali},
journal={arXiv preprint arXiv:2411.12056},
year={2024}
}""",
prompt={
"query": "Given a query, retrieve relevant entity descriptions from buit asset classification systems such as IFC and Uniclass"
},
)
1 change: 1 addition & 0 deletions mteb/tasks/Retrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .eng.ARCChallengeRetrieval import *
from .eng.ArguAnaRetrieval import *
from .eng.BrightRetrieval import *
from .eng.BuiltBenchRetrieval import *
from .eng.ChemHotpotQARetrieval import *
from .eng.ChemNQRetrieval import *
from .eng.ClimateFEVERRetrieval import *
Expand Down
39 changes: 39 additions & 0 deletions mteb/tasks/Retrieval/eng/BuiltBenchRetrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BuiltBenchRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BuiltBenchRetrieval",
description="Retrieval of built asset entity type/class descriptions given a query describing an entity as represented in well-established industry classification systems such as Uniclass, IFC, etc.",
reference="https://arxiv.org/abs/2411.12056",
dataset={
"path": "mehrzad-shahin/BuiltBench-retrieval",
"revision": "ae611238a58dae85f3130563fe9f9e995444a8d6",
},
type="Retrieval",
category="p2p",
modalities=["text"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=("2024-06-01", "2024-11-30"),
domains=["Engineering", "Written"],
task_subtypes=["Question answering"],
license="cc-by-nd-4.0",
annotations_creators="derived",
dialect=[],
sample_creation="created",
bibtex_citation="""@article{shahinmoghadam2024benchmarking,
title={Benchmarking pre-trained text embedding models in aligning built asset information},
author={Shahinmoghadam, Mehrzad and Motamedi, Ali},
journal={arXiv preprint arXiv:2411.12056},
year={2024}
}""",
prompt={
"query": "Given a query, retrieve relevant entity descriptions from buit asset classification systems such as IFC and Uniclass"
},
)
Loading