Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated get_by_label() so that it now accepts label, name and full iri #582

Merged
merged 12 commits into from
Apr 15, 2023
Merged
8 changes: 7 additions & 1 deletion ontopy/excelparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ontopy import get_ontology
from ontopy.utils import EMMOntoPyException, NoSuchLabelError
from ontopy.utils import ReadCatalogError, read_catalog
from ontopy.ontology import LabelDefinitionError
from ontopy.manchester import evaluate
import owlready2 # pylint: disable=C0411

Expand Down Expand Up @@ -276,7 +277,12 @@ def create_ontology_from_pandas( # pylint:disable=too-many-locals,too-many-bran
if not parents:
parents = [owlready2.Thing]

concept = onto.new_entity(name, parents)
try:
concept = onto.new_entity(name, parents)
except LabelDefinitionError:
concepts_with_errors["wrongly_defined"].append(name)
continue

added_rows.add(index)
# Add elucidation
try:
Expand Down
135 changes: 85 additions & 50 deletions ontopy/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,20 +186,28 @@ def __init__(self, *args, **kwargs):
doc="Whether to include imported ontologies in dir() listing.",
)

# Other settings
_colon_in_name = False
colon_in_name = property(
fget=lambda self: self._colon_in_name,
fset=lambda self, v: setattr(self, "_colon_in_name", bool(v)),
doc="Whether to accept colon in name-part of IRI. "
"If true, the name cannot be prefixed.",
)

def __dir__(self):
set_dir = set(super().__dir__())
dirset = set(super().__dir__())
lst = list(self.get_entities(imported=self._dir_imported))
if self._dir_preflabel:
set_dir.update(
dirset.update(
_.prefLabel.first() for _ in lst if hasattr(_, "prefLabel")
)
if self._dir_label:
set_dir.update(_.label.first() for _ in lst if hasattr(_, "label"))
dirset.update(_.label.first() for _ in lst if hasattr(_, "label"))
if self._dir_name:
set_dir.update(_.name for _ in lst if hasattr(_, "name"))

set_dir.difference_update({None}) # get rid of possible None
return sorted(set_dir)
dirset.update(_.name for _ in lst if hasattr(_, "name"))
dirset.difference_update({None}) # get rid of possible None
return sorted(dirset)

def __getitem__(self, name):
item = super().__getitem__(name)
Expand Down Expand Up @@ -257,7 +265,12 @@ def get_unabbreviated_triples(
)

def get_by_label(
self, label: str, label_annotations: str = None, prefix: str = None
self,
label: str,
label_annotations: str = None,
prefix: str = None,
imported: bool = True,
colon_in_name: bool = None,
):
"""Returns entity with label annotation `label`.

Expand All @@ -272,50 +285,52 @@ def get_by_label(
the base iri of an ontology (with trailing slash (/) or hash
(#) stripped off). The search for a matching label will be
limited to this namespace.
imported: Whether to also look for `label` in imported ontologies.
colon_in_name: Whether to accept colon (:) in name-part of IRI.
Defaults to the `colon_in_name` property of `self`.
A true value cannot be combined with `prefix`.

If several entities have the same label, only the one which is
found first is returned.Use get_by_label_all() to get all matches.

A NoSuchLabelError is raised if `label` cannot be found.
Note, if different prefixes are provided in the label and via
the `prefix` argument a warning will be issued and the
`prefix` argument will take precedence.

Note
----
The current implementation also supports "*" as a wildcard
matching any number of characters. This may change in the future.
A NoSuchLabelError is raised if `label` cannot be found.
"""
# pylint: disable=too-many-arguments,too-many-branches
# pylint: disable=too-many-arguments,too-many-branches,invalid-name
francescalb marked this conversation as resolved.
Show resolved Hide resolved
if not isinstance(label, str):
raise TypeError(
f"Invalid label definition, must be a string: {label!r}"
)
if " " in label:
raise ValueError(
f"Invalid label definition, {label!r} contains spaces."
)

if self._label_annotations is None:
for iri in DEFAULT_LABEL_ANNOTATIONS:
try:
self.add_label_annotation(iri)
except ValueError:
pass

splitlabel = label.split(":", 1)
if len(splitlabel) > 2:
raise ValueError(
f"Invalid label definition, {label!r}"
" contains more than one ':' ."
"The string before ':' indicates the prefix. "
"The string after ':' indicates the label."
)
francescalb marked this conversation as resolved.
Show resolved Hide resolved
if len(splitlabel) == 2:
label = splitlabel[1]
if prefix and prefix != splitlabel[0]:
warnings.warn(
f"Prefix given both as argument ({prefix}) "
f"and in label ({splitlabel[0]}). "
"Prefix given in label takes presendence "
if colon_in_name is None:
colon_in_name = self._colon_in_name
if colon_in_name:
if prefix:
raise ValueError(
"`prefix` cannot be combined with `colon_in_name`"
)
prefix = splitlabel[0]
else:
splitlabel = label.split(":", 1)
if len(splitlabel) == 2 and not splitlabel[1].startswith("//"):
label = splitlabel[1]
if prefix and prefix != splitlabel[0]:
warnings.warn(
f"Prefix given both as argument ({prefix}) "
f"and in label ({splitlabel[0]}). "
"Prefix given in argument takes presendence "
)
if not prefix:
prefix = splitlabel[0]

if prefix:
entitylist = self.get_by_label_all(
Expand All @@ -327,36 +342,56 @@ def get_by_label(
return entitylist[0]

raise NoSuchLabelError(
f"No label annotations matches {label!r} with prefix "
f"No label annotations matches {label!r} with prefix "
f"{prefix!r}"
)
# if label in self._namespaces:
# return self._namespaces[label]

if label_annotations is None:
annotations = (a.name for a in self.label_annotations)
else:
annotations = (
a.name if hasattr(a, "storid") else a for a in label_annotations
)
for key in annotations:
entity = self.search_one(**{key: label})
if entity:
return entity
# Label is a full IRI
entity = self.world[label]
if entity:
return entity

# First entity with matching label annotation
annotation_ids = (
(self._abbreviate(ann, False) for ann in label_annotations)
if label_annotations
else (ann.storid for ann in self.label_annotations)
)
get_triples = (
self.world._get_data_triples_spod_spod
if imported
else self._get_data_triples_spod_spod
)
for annotation_id in annotation_ids:
for s, _, _, _ in get_triples(None, annotation_id, label, None):
return self.world[self._unabbreviate(s)]

# Special labels
if self._special_labels and label in self._special_labels:
return self._special_labels[label]

# Check if label is a name under base_iri
entity = self.world[self.base_iri + label]
if entity:
return entity

raise NoSuchLabelError(f"No label annotations matches {label!r}")
# Check if label is a name in any namespace
for namespace in self._namespaces.keys():
entity = self.world[namespace + label]
if entity:
return entity

raise NoSuchLabelError(f"No label annotations matches '{label}'")

def get_by_label_all(self, label, label_annotations=None, prefix=None):
"""Like get_by_label(), but returns a list with all matching labels.

Returns an empty list if no matches could be found.

Note
----
The current implementation also supports "*" as a wildcard
matching any number of characters. This may change in the future.
"""
if not isinstance(label, str):
raise TypeError(
Expand Down Expand Up @@ -1582,7 +1617,7 @@ def new_entity(

Throws exception if name consists of more than one word.
"""
if len(name.split(" ")) > 1:
if " " in name:
raise LabelDefinitionError(
f"Error in label name definition '{name}': "
f"Label consists of more than one word."
Expand Down Expand Up @@ -1684,7 +1719,7 @@ def _get_unabbreviated_triples(
_unabbreviate(self, p, blank=blank),
_unabbreviate(self, o, blank=blank),
)
for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=""):
for s, p, o, d in self._get_data_triples_spod_spod(*abb, d=None):
yield (
_unabbreviate(self, s, blank=blank),
_unabbreviate(self, p, blank=blank),
Expand Down
28 changes: 28 additions & 0 deletions tests/test_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pathlib import Path

from ontopy import get_ontology


thisdir = Path(__file__).resolve().parent

onto = get_ontology(
thisdir / "test_excelparser/imported_onto/ontology.ttl"
).load()
onto.dir_imported = False
onto.dir_preflabel = False
onto.dir_label = False
onto.dir_name = False
assert "TestClass2" not in dir(onto)

onto.dir_imported = True
onto.dir_preflabel = True
assert onto._dir_imported
assert onto.TestClass2
assert "TestClass2" in dir(onto)
assert "testclass" not in dir(onto)
assert "testclass2" not in dir(onto)

onto.dir_name = True
assert "TestClass2" in dir(onto)
assert "testclass" in dir(onto)
assert "testclass2" in dir(onto)
13 changes: 13 additions & 0 deletions tests/test_get_by_label.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ontopy import get_ontology


# Loading emmo-inferred where everything is sqashed into one ontology
emmo = get_ontology().load()
assert emmo[emmo.Atom.name] == emmo.Atom
assert emmo[emmo.Atom.iri] == emmo.Atom

# Load an ontology with imported sub-ontologies
onto = get_ontology(
"https://raw.githubusercontent.com/BIG-MAP/BattINFO/master/battinfo.ttl"
).load()
assert onto.Electrolyte.prefLabel.first() == "Electrolyte"