Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issues with EMMO #520

Merged
merged 12 commits into from
Sep 24, 2020
17 changes: 12 additions & 5 deletions osp/core/ontology/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
logger = logging.getLogger(__name__)


BLACKLIST = {rdflib.OWL.bottomDataProperty, rdflib.OWL.topDataProperty}


class OntologyAttribute(OntologyEntity):
"""An attribute defined in the ontology."""

Expand Down Expand Up @@ -45,12 +48,14 @@ def datatype(self):
RuntimeError: More than one datatype associated with the attribute.
# TODO should be allowed
"""
blacklist = [rdflib.RDFS.Literal]
superclasses = self.superclasses
datatypes = set()
for superclass in superclasses:
triple = (superclass.iri, rdflib.RDFS.range, None)
for _, _, o in self.namespace._graph.triples(triple):
datatypes.add(o)
if o not in blacklist:
datatypes.add(o)
if len(datatypes) == 1:
return datatypes.pop()
if len(datatypes) == 0:
Expand Down Expand Up @@ -81,17 +86,19 @@ def convert_to_basic_type(self, value):
return convert_from(value, self.datatype)

def _direct_superclasses(self):
return self._directly_connected(rdflib.RDFS.subPropertyOf)
return self._directly_connected(rdflib.RDFS.subPropertyOf,
blacklist=BLACKLIST)

def _direct_subclasses(self):
return self._directly_connected(rdflib.RDFS.subPropertyOf,
inverse=True)
inverse=True, blacklist=BLACKLIST)

def _superclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf)
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf,
blacklist=BLACKLIST)

def _subclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subPropertyOf,
inverse=True)
inverse=True, blacklist=BLACKLIST)
7 changes: 6 additions & 1 deletion osp/core/ontology/docs/cuba.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,9 @@ cuba:Class a owl:Class ;
rdfs:isDefinedBy "The root of the ontology." .



cuba:Class rdfs:subClassOf owl:Thing .
cuba:relationship rdfs:subPropertyOf owl:topObjectProperty .
cuba:attribute rdfs:subPropertyOf owl:topDataProperty .
owl:Thing rdfs:subClassOf cuba:Class .
owl:topObjectProperty rdfs:subPropertyOf cuba:relationship .
owl:topDataProperty rdfs:subPropertyOf cuba:attribute .
83 changes: 60 additions & 23 deletions osp/core/ontology/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,55 +170,92 @@ def _superclasses(self):
def _subclasses(self):
pass

def _transitive_hull(self, predicate_iri, inverse=False):
def _transitive_hull(self, predicate_iri, inverse=False, blacklist=()):
"""Get all the entities connected with the given predicate.

Args:
predicate_iri (URIRef): The IRI of the predicate
inverse (bool, optional): Use the inverse instead.
Defaults to False.
blacklist (collection): A collection of IRIs not to return.

Yields:
OntologyEntity: The connected entities
"""
result = {self.iri}
visited = {self.iri}
frontier = {self.iri}
while frontier:
current = frontier.pop()
triple = (current, predicate_iri, None)
if inverse:
triple = (None, predicate_iri, current)
for x in self.namespace._graph.triples(triple):
o = x[0 if inverse else 2]
if o not in result and not isinstance(o, rdflib.BNode) \
and not str(o).startswith((str(rdflib.RDF),
str(rdflib.RDFS),
str(rdflib.OWL))):
frontier.add(o)
result.add(o)
yield self.namespace._namespace_registry.from_iri(o)
yield from self._directly_connected(predicate_iri=predicate_iri,
inverse=inverse,
blacklist=blacklist,
_frontier=frontier,
_visited=visited,
_iri=current)

def _special_cases(self, triple):
"""Some supclass statements are often omitted in the ontology.

Replace these with safer triple patterns.

Args:
triple (Tuple[rdflib.term]): A triple pattern to possibly replace.

def _directly_connected(self, predicate_iri, inverse=False):
Returns:
triple (Tuple[rdflib.term]): Possibly replaced triple.
"""
if triple == (None, rdflib.RDFS.subClassOf, rdflib.OWL.Thing):
return (None, rdflib.RDF.type, rdflib.OWL.Class)
if triple == (rdflib.OWL.Nothing, rdflib.RDFS.subClassOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.Class)

if triple == (None, rdflib.RDFS.subPropertyOf,
rdflib.OWL.topObjectProperty):
return (None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)
if triple == (rdflib.OWL.bottomObjectProperty,
rdflib.RDFS.subPropertyOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.ObjectProperty)

if triple == (None, rdflib.RDFS.subPropertyOf,
rdflib.OWL.topDataProperty):
return (None, rdflib.RDF.type, rdflib.OWL.DataProperty)
if triple == (rdflib.OWL.bottomDataProperty,
rdflib.RDFS.subPropertyOf, None):
return (None, rdflib.RDF.type, rdflib.OWL.DataProperty)
return triple

def _directly_connected(self, predicate_iri, inverse=False, blacklist=(),
_frontier=None, _visited=None, _iri=None):
"""Get all the entities directly connected with the given predicate.

Args:
predicate_iri (URIRef): The IRI of the predicate
inverse (bool, optional): Use the inverse instead.
Defaults to False.
blacklist (collection): A collection of IRIs not to return.
Others: Helper for _transitive_hull method.

Yields:
OntologyEntity: The connected entities
"""
triple = (self.iri, predicate_iri, None)
triple = (_iri or self.iri, predicate_iri, None)
if inverse:
triple = (None, predicate_iri, self.iri)
triple = (None, predicate_iri, _iri or self.iri)

if predicate_iri in [rdflib.RDFS.subClassOf,
rdflib.RDFS.subPropertyOf]:
triple = self._special_cases(triple)
for x in self.namespace._graph.triples(triple):
o = x[0 if inverse else 2]
if not isinstance(o, rdflib.BNode) \
and not str(o).startswith((str(rdflib.RDF),
str(rdflib.RDFS),
str(rdflib.OWL))):
yield self.namespace._namespace_registry.from_iri(o)
o = x[0 if triple[0] is None else 2]
if _visited and o in _visited:
continue
if not isinstance(o, rdflib.BNode):
if _visited is not None:
_visited.add(o)
if _frontier is not None:
_frontier.add(o)
if o not in blacklist:
yield self.namespace._namespace_registry.from_iri(o)

def __hash__(self):
"""Make the entity hashable."""
Expand Down
3 changes: 2 additions & 1 deletion osp/core/ontology/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def __getitem__(self, label):
if isinstance(label, tuple):
label = rdflib.term.Literal(label[0], lang=label[1])
result = list()
for s, p, o in self._graph.triples((None, rdflib.RDFS.label, label)):
pattern = (None, rdflib.SKOS.prefLabel, label)
for s, p, o in self._graph.triples(pattern):
if str(s).startswith(self._iri): # TODO more efficient
name = str(s)[len(self._iri):]
result.append(
Expand Down
3 changes: 2 additions & 1 deletion osp/core/ontology/namespace_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ def _get_entity_name(self, entity_iri, ns_iri):
str: The name of the entity with the given IRI
"""
if self._get_reference_by_label(ns_iri):
return self._graph.value(entity_iri, rdflib.RDFS.label).toPython()
return self._graph.value(entity_iri,
rdflib.SKOS.prefLabel).toPython()
return entity_iri[len(ns_iri):]

def clear(self):
Expand Down
79 changes: 55 additions & 24 deletions osp/core/ontology/oclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

logger = logging.getLogger(__name__)

BLACKLIST = {rdflib.OWL.Nothing, rdflib.OWL.Thing,
rdflib.OWL.NamedIndividual}


class OntologyClass(OntologyEntity):
"""A class defined in the ontology."""
Expand Down Expand Up @@ -34,7 +37,10 @@ def attributes(self):
"""
attributes = dict()
for superclass in self.superclasses:
attributes.update(self._get_attributes(superclass.iri))
for attr, v in self._get_attributes(superclass.iri).items():
x = attributes.get(attr, (None, None, None))
x = (x[0] or v[0], x[1] or v[1], x[2] or v[2])
attributes[attr] = x
return attributes

@property
Expand All @@ -57,28 +63,49 @@ def _get_attributes(self, iri):
"""
graph = self._namespace_registry._graph
attributes = dict()

blacklist = [rdflib.OWL.topDataProperty, rdflib.OWL.bottomDataProperty]
# Case 1: domain of Datatype
triple = (None, rdflib.RDFS.domain, iri)
for a_iri, _, _ in self.namespace._graph.triples(triple):
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple in graph \
and not isinstance(a_iri, rdflib.BNode):
a = self._namespace_registry.from_iri(a_iri)
attributes[a] = self._get_default(a_iri, iri)
if triple not in graph or isinstance(a_iri, rdflib.BNode) \
or a_iri in blacklist:
continue
a = self.namespace._namespace_registry.from_iri(a_iri)
default = self._get_default(a_iri, iri)
attributes[a] = (default, False, None)

# Case 2: restrictions
triple = (iri, rdflib.RDFS.subClassOf, None)
for _, _, o in self.namespace._graph.triples(triple):
if (o, rdflib.RDF.type, rdflib.OWL.Restriction) in graph:
a_iri = graph.value(o, rdflib.OWL.onProperty)
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple in graph \
and not isinstance(a_iri, rdflib.BNode):
a = self._namespace_registry.from_iri(a_iri)
attributes[a] = self._get_default(a_iri, iri)
if (o, rdflib.RDF.type, rdflib.OWL.Restriction) not in graph:
continue
a_iri = graph.value(o, rdflib.OWL.onProperty)
triple = (a_iri, rdflib.RDF.type, rdflib.OWL.DatatypeProperty)
if triple not in graph or isinstance(a_iri, rdflib.BNode):
continue
a = self.namespace._namespace_registry.from_iri(a_iri)
default = self._get_default(a_iri, iri)
dt, obligatory = self._get_datatype_for_restriction(o)
obligatory = default is None and obligatory
attributes[a] = (self._get_default(a_iri, iri), obligatory, dt)

# TODO more cases
return attributes

def _get_datatype_for_restriction(self, r):
obligatory = False
dt = None
g = self.namespace._graph

dt = g.value(r, rdflib.OWL.someValuesFrom)
obligatory = dt is not None
dt = dt or g.value(r, rdflib.OWL.allValuesFrom)
obligatory = obligatory or (r, rdflib.OWL.cardinality) != 0
obligatory = obligatory or (r, rdflib.OWL.minCardinality) != 0
return dt, obligatory

def _get_default(self, attribute_iri, superclass_iri):
"""Get the default of the attribute with the given iri.

Expand Down Expand Up @@ -139,7 +166,7 @@ def _get_attributes_values(self, kwargs, _force):
"""
kwargs = dict(kwargs)
attributes = dict()
for attribute, default in self.attributes.items():
for attribute, (default, obligatory, dt) in self.attributes.items():
if attribute.argname in kwargs:
attributes[attribute] = kwargs[attribute.argname]
del kwargs[attribute.argname]
Expand All @@ -155,33 +182,37 @@ def _get_attributes_values(self, kwargs, _force):
f"to be ALL_CAPS. You can use the yaml2camelcase "
f"commandline tool to transform entity names to CamelCase."
)
else:
elif not _force and obligatory:
raise TypeError("Missing keyword argument: %s" %
attribute.argname)
elif default is not None:
attributes[attribute] = default

# Check validity of arguments
if _force:
return {k: v for k, v in attributes.items() if v is not None}
if kwargs:
if not _force and kwargs:
raise TypeError("Unexpected keyword arguments: %s"
% kwargs.keys())
missing = [k.argname for k, v in attributes.items() if v is None]
if missing:
raise TypeError("Missing keyword arguments: %s" % missing)
return attributes

def _direct_superclasses(self):
return self._directly_connected(rdflib.RDFS.subClassOf)
return self._directly_connected(rdflib.RDFS.subClassOf,
blacklist=BLACKLIST)

def _direct_subclasses(self):
return self._directly_connected(rdflib.RDFS.subClassOf, inverse=True)
return self._directly_connected(rdflib.RDFS.subClassOf,
inverse=True, blacklist=BLACKLIST)

def _superclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subClassOf)
yield from self._transitive_hull(
rdflib.RDFS.subClassOf,
blacklist=BLACKLIST)

def _subclasses(self):
yield self
yield from self._transitive_hull(rdflib.RDFS.subClassOf, inverse=True)
yield from self._transitive_hull(
rdflib.RDFS.subClassOf, inverse=True,
blacklist=BLACKLIST)

def __call__(self, uid=None, session=None, _force=False, **kwargs):
"""Create a Cuds object from this ontology class.
Expand Down
9 changes: 5 additions & 4 deletions osp/core/ontology/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,20 @@ def _parse_rdf(self, **kwargs):
self.graph.parse(rdf_file, format=file_format)
default_rels = dict()
reference_styles = dict()
namespace_iris = set()
for namespace, iri in namespaces.items():
if not (
iri.endswith("#") or iri.endswith("/")
):
iri += "#"
namespace_iris.add(iri)
logger.info(f"You can now use `from osp.core.namespaces import "
f"{namespace}`.")
self.graph.bind(namespace, rdflib.URIRef(iri))
default_rels[iri] = default_rel
reference_styles[iri] = reference_style

self._check_namespaces()
self._check_namespaces(namespace_iris)
self._add_cuba_triples(active_rels)
self._add_default_rel_triples(default_rels)
self._add_reference_style_triples(reference_styles)
Expand Down Expand Up @@ -327,9 +329,8 @@ def _add_reference_style_triples(self, reference_styles):
rdflib.Literal(True)
))

def _check_namespaces(self):
namespaces = set(x for _, x in self.graph.namespaces()
if not x.startswith("http://www.w3.org/"))
def _check_namespaces(self, namespace_iris):
namespaces = set(namespace_iris)
for s, p, o in self.graph:
pop = None
for ns in namespaces:
Expand Down
Loading