diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 54d8e3811..8a5f81312 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -8,6 +8,7 @@ set(py_sources mappings.py datamodel.py rdf.py + dataset.py quantity.py testutils.py ) diff --git a/bindings/python/dataset.py b/bindings/python/dataset.py new file mode 100644 index 000000000..0ee2ef51d --- /dev/null +++ b/bindings/python/dataset.py @@ -0,0 +1,576 @@ +"""Module for representing DLite data models and instances with rdflib. + +DLite data models are represented as EMMO datasets. + +NOTE: This module depends on Tripper. +""" +import json +import re +import warnings +from collections import defaultdict +from typing import TYPE_CHECKING +from uuid import uuid4 + +from tripper import Literal, Namespace, Triplestore +from tripper import MAP, OTEIO, OWL, RDF, RDFS, SKOS, XSD +from tripper.utils import en +from tripper.errors import NoSuchIRIError + +import dlite + +if TYPE_CHECKING: # pragma: no cover + from typings import List, Optional, Sequence, Tuple + + # A triple with literal objects in n3 notation + Triple = Sequence[str, str, str] + + +# XXX TODO - Make a local cache of EMMO such that we only download it once +TS_EMMO = Triplestore("rdflib") +TS_EMMO.parse("https://w3id.org/emmo/1.0.0-rc1") + +EMMO_VERSIONIRI = TS_EMMO.value("https://w3id.org/emmo", OWL.versionIRI) + +EMMO = Namespace( + iri="https://w3id.org/emmo#", + label_annotations=True, + check=True, + triplestore=TS_EMMO, +) + +EMMO_TYPES = { + "blob": "BinaryData", + "bool": "BooleanData", + "int": "IntegerData", + "int8": "ByteData", + "int16": "ShortData", + "int32": "IntData", + "int64": "LongData", + "uint": "NonNegativeIntegerData", + "uint8": "UnsignedByteData", + "uint16": "UnsignedShortData", + "uint32": "UnsignedIntData", + "uint64": "UnsignedLongData", + "float": "FloatingPointData", + "float32": "FloatData", + "float64": "DoubleData", + "string": "StringData", + "ref": "DataSet", + #"dimension": "Dimension", + #"property": "Datum", + #"relation": NotImplemented, +} + +# Maps unit names to IRIs +unit_cache = {} + + +class MissingUnitError(ValueError): + "Unit not found in ontology." + +class UnsupportedTypeError(TypeError, NotImplementedError): + "The given type is not supported." + +class KBError(ValueError): + "Missing or inconsistent data in knowledge base." + + +def _string(s): + """Return `s` as a literal string.""" + return Literal(s, datatype="xsd:string") + + +def title(s): + """Capitalise first letter in `s`.""" + return s[0].upper() + s[1:] + + +def dlite2emmotype(dlitetype): + """Convert a DLite type string to corresponding EMMO class label.""" + dtype, ssize = re.match("([a-zA-Z]+)([0-9]*)", dlitetype).groups() + size = int(ssize) if ssize else None + if size and dtype in ("int", "uint", "float"): + size /= 8 + if dlitetype in EMMO_TYPES: + emmotype = EMMO_TYPES[dlitetype] + elif dtype in EMMO_TYPES: + emmotype = EMMO_TYPES[dtype] + else: + raise UnsupportedTypeError(dlitetype) + return emmotype, size + + +def emmo2dlitetype(emmotype, size=None): + """Convert EMMO type and size to dlite type.""" + dlitetypes = [k for k, v in EMMO_TYPES.items() if v == emmotype] + if not dlitetypes: + raise UnsupportedTypeError(emmotype) + dlitetype, = dlitetypes + typeno = dlite.to_typenumber(dlitetype.rstrip("0123456789")) + if size: + return dlite.to_typename(typeno, int(size)) + return dlite.to_typename(typeno) + + +def get_shape(ts, dimiri, dimensions=None, mappings=None, uri=None): + """Returns a shape list for a datum who's first dimension is `dimiri`. + + If `dimensions` is given, it should be a list that will be updated + with new dimensions. + + If `mappings` and `uri` are given, then `mappings` should be a + list that will be updated with new mappings. `uri` should be the + URI of the data model. + """ + shape = [] + while dimiri: + mapsto = [] + next = label = descr = None + for pred, obj in ts.predicate_objects(dimiri): + if pred == EMMO.hasNext: + next = obj + elif pred == EMMO.hasSymbolValue: + label = str(obj) + elif pred == EMMO.elucidation: + descr = str(obj) + elif pred == RDF.type and obj not in (EMMO.Dimension,): + mapsto.append(obj) + if not label: + raise KBError("dimension has no prefLabel:", dimiri) + if dimensions is not None: + if not descr: + raise KBError("dimension has no elucidation:", dimiri) + dimensions.append(dlite.Dimension(label, descr)) + shape.append(label) + if mappings and uri: + for obj in mapsto: + mappings.append((f"{uri}#{label}", MAP.mapsTo, obj)) + dimiri = next + return shape + + +def dimensional_string(unit_iri): + """Return the inferred dimensional string of the given unit IRI. Returns + None if no dimensional string can be inferred.""" + raise NotImplementedError() + + +def get_unit_symbol(iri): + """Return the unit symbol for .""" + symbol = TS_EMMO.value(iri, EMMO.unitSymbol) + if symbol: + return str(symbol) + for r in TS_EMMO.restrictions(iri, EMMO.hasSymbolValue, type="value"): + symbol = r["value"] + if symbol: + return str(symbol) + raise KBError("No symbol value is defined for unit:", iri) + + +def get_unit_iri(unit): + """Returns the IRI for the given unit.""" + if not unit_cache: + ts = TS_EMMO + for predicate in (EMMO.unitSymbol, EMMO.ucumCode, EMMO.uneceCommonCode): + for s, _, o in ts.triples(predicate=predicate): + if o.value in unit_cache and predicate == EMMO.unitSymbol: + warnings.warn( + f"more than one unit with symbol '{o.value}': " + f"{unit_cache[o.value]}" + ) + else: + unit_cache[o.value] = s + for o in ts.objects(s, SKOS.prefLabel): + unit_cache[o.value] = s + for o in ts.objects(s, SKOS.altLabel): + if o.value not in unit_cache: + unit_cache[o.value] = s + + for r, _, o in ts.triples(predicate=OWL.hasValue): + if ( + ts.has(r, RDF.type, OWL.Restriction) and + ts.has(r, OWL.onProperty, EMMO.hasSymbolValue) + ): + s = ts.value(predicate=RDFS.subClassOf, object=r) + unit_cache[o.value] = s + + if unit in unit_cache: + return unit_cache[unit] + + raise MissingUnitError(unit) + + +def metadata_to_rdf( + meta: dlite.Metadata, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> "List[Triple]": + """Serialise DLite metadata to RDF. + + Arguments: + meta: Metadata to serialise. + iri: IRI of the dataset in the triplestore. Defaults to `meta.uri`. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + A list of RDF triples. Literal objects are encoded in n3 notation. + """ + # Create lookup table + dct = meta.asdict() + + # For adding mappings + maps = defaultdict(list) + for s, p, o in mappings: + uri = str(s).rstrip("/#") + if p == MAP.mapsTo: + name = str(s).split("#", 1)[-1] + prep = RDF.type if name in meta.dimnames() else RDFS.subClassOf + else: + prep = p + maps[uri].append((prep, o)) + + def addmap(uri, iri): + """Add mapping relation to triples.""" + for p, o in maps[uri.rstrip("/#")]: + if p in (RDF.type, RDFS.subClassOf): + triples.append((iri, p, o)) + else: + restriction_iri = f"_:restriction_map_{iri}_{uuid4()}" + triples.extend([ + (iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, p), + (restriction_iri, OWL.someValuesFrom, o), + ]) + + # Dimension descriptions + dim_descr = {d.name: d.description for d in meta.properties['dimensions']} + + # Start populating triples + triples = [] + + # Add datamodel (emmo:DataSet) + if iri is None: + iri = meta.uri + iri = str(iri).rstrip("#/") + triples.extend([ + (iri, RDF.type, OWL.Class), + (iri, RDFS.subClassOf, EMMO.DataSet), + (iri, SKOS.prefLabel, en(title(meta.name))), + (iri, OTEIO.hasURI, Literal(meta.uri, datatype=XSD.anyURI)), + ]) + addmap(meta.uri, iri) + + if "description" in dct: + triples.append((iri, EMMO.elucidation, en(dct["description"]))) + + # Add properties (emmo:Datum) + for prop in meta.properties["properties"]: + prop_id = f"{meta.uri}#{prop.name}" + prop_iri = f"{iri}#{prop.name}" + addmap(prop_id, prop_iri) + restriction_iri = f"_:restriction_{prop_iri}" + prop_name = f"{prop.name[0].upper()}{prop.name[1:]}" + triples.extend([ + (iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasDatum), + (restriction_iri, OWL.onClass, prop_iri), + (restriction_iri, OWL.qualifiedCardinality, + Literal(1, datatype=XSD.nonNegativeInteger)), + (prop_iri, RDF.type, OWL.Class), + (prop_iri, RDFS.subClassOf, EMMO.Datum), + (prop_iri, SKOS.prefLabel, en(prop_name)), + ]) + + emmotype, size = dlite2emmotype(prop.type) + if prop.ndims: + restriction_iri = f"_:restriction_type_{prop_iri}" + triples.extend([ + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasScalarData), + (restriction_iri, OWL.someValuesFrom, EMMO[emmotype]), + ]) + else: + triples.append((prop_iri, RDFS.subClassOf, EMMO[emmotype])) + if size: + sizeval = Literal(size, datatype=XSD.nonNegativeInteger) + triples.append((prop_iri, OTEIO.datasize, sizeval)) + + if prop.shape.size: + restriction_iri = f"_:restriction_{prop_iri}_shape" + triples.extend([ + (prop_iri, RDFS.subClassOf, EMMO.Array), + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasDimension), + ]) + for i, dim in enumerate(prop.shape): + dim_iri = f"{iri}#{prop.name}_dimension{i}" + addmap(f"{meta.uri}#{dim}", dim_iri) + triples.extend([ + (dim_iri, RDF.type, EMMO.Dimension), + (dim_iri, EMMO.hasSymbolValue, + Literal(dim, datatype=XSD.string)), + (dim_iri, EMMO.elucidation, en(dim_descr[dim])), + (dim_iri, SKOS.prefLabel, en(f"{prop.name}_dimension{i}")), + ]) + if i == 0: + triples.append((restriction_iri, OWL.hasValue, dim_iri)) + else: + triples.append((source_iri, EMMO.hasNext, dim_iri)) + source_iri = dim_iri + + if prop.unit: + unit_iri = get_unit_iri(prop.unit) + if unit_iri: + restriction_iri = f"_:restriction_{prop_iri}_unit" + triples.extend([ + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasMeasurementUnit), + (restriction_iri, OWL.onClass, unit_iri), + (restriction_iri, OWL.qualifiedCardinality, + Literal(1, datatype=XSD.nonNegativeInteger)), + ]) + + if prop.description: + triples.append((prop_iri, EMMO.elucidation, en(prop.description))) + + return triples + + +def add_dataset( + ts: Triplestore, + meta: dlite.Metadata, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> str: + """Save DLite metadata as an EMMO dataset to a triplestore. + + Arguments: + ts: Triplestore to save to. + meta: DLite metadata to save. + iri: IRI of the dataset in the triplestore. Defaults to `meta.uri`. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + IRI of the saved dataset. + """ + if not meta.is_meta: + raise TypeError( + "Expected data model, got instance: {meta.uri or meta.uuid}" + ) + + if iri is None: + iri = meta.uri + iri = str(iri).rstrip("#/") + + ts.add_triples(metadata_to_rdf(meta, iri=iri, mappings=mappings)) + + used_namespaces = {"emmo": EMMO, "oteio": OTEIO} + for prefix, ns in used_namespaces.items(): + if prefix not in ts.namespaces: + ts.bind(prefix, ns) + + return iri + + +def get_dataset( + ts: Triplestore, + iri: str, + uri: "Optional[str]" = None, +) -> "Tuple[dlite.Metadata, List[Triple]]": + """Load dataset from triplestore. + + Arguments: + ts: Triplestore to load from. + iri: IRI of the dataset to load. + uri: URI of the DLite datamodel to load. The defaults is inferred + from `iri`. + + Returns: + A `(meta, mappings)` tuple, where `meta` is a DLite metadata and + `mappings` is a list of triples. + """ + if uri is None: + uri = str(ts.value(iri, OTEIO.hasURI, default=str(iri).rstrip("/#"))) + + emmotypes = {EMMO[v]: v for v in EMMO_TYPES.values()} + + mappings = [] + dimensions = [] + properties = [] + description = "" + datum_iris = [] + + for prop, obj in ts.predicate_objects(iri): + if prop == RDFS.subClassOf: + po = set(ts.predicate_objects(obj)) + if (RDF.type, OWL.Restriction) in po: + d = dict(po) + onprop = d.get(OWL.onProperty) + oncls = d.get(OWL.onClass) + someval = d.get(OWL.someValuesFrom) + if (OWL.onProperty, EMMO.hasDatum) in po: + datum_iris.append(oncls or someval) + elif onprop and oncls: + mappings.append((uri, onprop, oncls)) + elif onprop and someval: + mappings.append((uri, onprop, someval)) + elif obj not in (EMMO.DataSet, ): + mappings.append((uri, MAP.mapsTo, obj)) + elif prop == EMMO.elucidation: + description = str(obj) + + for datum_iri in datum_iris: + label = emmotype = size = None + unit = descr = "" + shape = [] + maps = [] + for pred, obj in ts.predicate_objects(datum_iri): + if pred == SKOS.prefLabel: + label = str(obj) + elif pred == EMMO.elucidation: + descr = str(obj) + elif pred == OTEIO.datasize: + size = int(obj) + elif RDFS.subClassOf: + if obj in emmotypes: + emmotype = emmotypes[obj] + else: + po = dict(ts.predicate_objects(obj)) + if po.get(RDF.type) == OWL.Restriction: + onprop = po.get(OWL.onProperty) + oncls = po.get(OWL.onClass) + onval = po.get(OWL.hasValue) + someval = po.get(OWL.someValuesFrom) + if onprop == EMMO.hasMeasurementUnit: + unit = get_unit_symbol(oncls) + elif onprop == EMMO.hasScalarData: + emmotype = emmotypes[someval] + elif onprop == EMMO.hasDimension: + shape = get_shape( + ts, onval, dimensions, mappings, uri + ) + else: + maps.append((onprop, oncls or someval)) + else: + maps.append((MAP.mapsTo, obj)) + if not label: + raise KBError("missing preferred label on datum:", datum_iri) + if not emmotype: + raise KBError("missing type on datum:", datum_iri) + for pred, obj in maps: + if pred and obj and obj not in (OWL.Class, EMMO.Datum, EMMO.Array): + mappings.append((f"{uri}#{label}", pred, obj)) + + dlitetype = emmo2dlitetype(emmotype, size) + properties.append(dlite.Property( + name=label, type=dlitetype, shape=shape, unit=unit, description=descr)) + + meta = dlite.Metadata(uri, dimensions, properties, description) + + return meta, mappings + + +def add_data( + ts: Triplestore, + inst: dlite.Instance, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> str: + """Save DLite instance as an EMMO dataset to a triplestore. + + Data instances are represented as individuals of the corresponding + EMMO DataSet. The corresponding metadata is also stored if it not + already exists in the triplestore. + + Arguments: + ts: Triplestore to save to. + inst: DLite instance to save. + iri: IRI of the dataset in the triplestore. The default is the + metadata IRI prepended with a slash and the UUID. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + IRI of the saved dataset. + """ + if inst.is_meta: + return add_dataset(ts, inst, iri, mappings) + + metairi = ts.value( + predicate=OTEIO.hasURI, + object=Literal(inst.meta.uri, datatype=XSD.anyURI), + ) + if not metairi: + metairi = add_dataset(ts, inst.meta) + + if not iri: + iri = f"{metairi}/{inst.uri or inst.uuid}" + + triples = [] + triples.extend([ + (iri, RDF.type, metairi), + (iri, OTEIO.hasUUID, inst.uuid), + (iri, RDF.value, Literal(inst.asjson(), datatype=RDF.JSON)), + ]) + if inst.uri: + triples.append((iri, OTEIO.hasURI, inst.uri)) + + ts.add_triples(triples) + + used_namespaces = {"oteio": OTEIO} + for prefix, ns in used_namespaces.items(): + if prefix not in ts.namespaces: + ts.bind(prefix, ns) + + return iri + + +def get_data( + ts: Triplestore, + iri: str, +) -> "Tuple[dlite.Metadata, List[Triple]]": + """Load dataset from triplestore. + + Arguments: + ts: Triplestore to load from. + iri: IRI of the dataset to load. + + Returns: + A `(meta, mappings)` tuple, where `meta` is a DLite metadata and + `mappings` is a list of triples. + """ + mappings = [] + + # Bypass the triplestore if the instance is already in cache + try: + return dlite.get_instance(iri, check_storages=False), mappings + except dlite.DLiteMissingInstanceError: + pass + + metairi = ts.value(iri, RDF.type, default=None) + + if not metairi: + # `iri` does not correspond to a data instance, check for metadata + if ts.has(iri, RDFS.subClassOf, EMMO.DataSet): + return get_dataset(ts, iri), mappings + raise KBError( + f"Cannot find neither a data instance nor metadata with IRI: {iri}" + ) + + if not dlite.has_instance(metairi, check_storages=False): + meta, maps = get_dataset(ts, metairi) + mappings.extend(maps) + else: + meta = dlite.get_instance(metairi, check_storages=False) + + json = ts.value(iri, RDF.value) + if not json: + raise KBError(f"cannot find JSON value for IRI: {iri}") + + inst = dlite.Instance.from_json(str(json)) + + return inst, mappings diff --git a/bindings/python/dlite-type.i b/bindings/python/dlite-type.i index 794740034..15327557b 100644 --- a/bindings/python/dlite-type.i +++ b/bindings/python/dlite-type.i @@ -11,9 +11,11 @@ char *to_typename(int type, int size) { char *s; - if (size < 0) return dlite_err(1, "size must be non-negative"), NULL; if (!(s = malloc(16))) return NULL; - if (dlite_type_set_typename(type, size, s, 16)) { + if (size < 0) { + //s = strdup(dlite_type_get_enum_name(type)); + s = strdup(dlite_type_get_dtypename(type)); + } else if (dlite_type_set_typename(type, size, s, 16)) { free(s); return NULL; } @@ -43,10 +45,27 @@ enum _DLiteType { %apply int *OUTPUT { int *type, int *size }; +%feature( + "docstring", + "Returns type number and size from given type name." +) from_typename; status_t from_typename(const char *typename, int *type, int *size); %newobject to_typename; -char *to_typename(int type, int size); +%feature( + "docstring", + "Returns type name for given type number and size. " + "If `size` is negative, only the name of `type` is returned." +) to_typename; +char *to_typename(int type, int size=-1); + +%feature( + "docstring", + "Returns DLite type number corresponding to `dtypename`." +) dlite_type_get_dtype; +%rename(to_typenumber) dlite_type_get_dtype; +int dlite_type_get_dtype(const char *dtypename); + %rename(get_alignment) dlite_type_get_alignment; size_t dlite_type_get_alignment(int type, size_t size); diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt index a9746189f..cbeaeb69a 100644 --- a/bindings/python/tests/CMakeLists.txt +++ b/bindings/python/tests/CMakeLists.txt @@ -25,6 +25,8 @@ set(tests test_pydantic test_relation test_iri + test_dataset1_save + test_dataset2_load ) foreach(test ${tests}) diff --git a/bindings/python/tests/entities/FluidData.json b/bindings/python/tests/entities/FluidData.json new file mode 100644 index 000000000..fcbb29841 --- /dev/null +++ b/bindings/python/tests/entities/FluidData.json @@ -0,0 +1,20 @@ +{ + "uri": "http://onto-ns.org/meta/dlite/0.1/FluidData", + "description": "A dataset describing a fluid.", + "dimensions": { + "ntimes": "The number of times the measurements are performed.", + "npositions": "The number of positions the measurements are performed.", + }, + "properties": { + "LJPotential": { + "type": "string", + "description": "Reference to Lennart-Jones potential." + }, + "TemperatureField": { + "type": "float64", + "shape": ["ntimes", "npositions"], + "unit": "°C", + "description": "Array of measured temperatures." + } + } +} diff --git a/bindings/python/tests/test_dataset1_save.py b/bindings/python/tests/test_dataset1_save.py new file mode 100644 index 000000000..6862cd698 --- /dev/null +++ b/bindings/python/tests/test_dataset1_save.py @@ -0,0 +1,109 @@ +from pathlib import Path + +try: + from tripper import DCTERMS, MAP, OWL, RDF, RDFS, XSD, Triplestore + from tripper.utils import en +except ModuleNotFoundError: + import sys + sys.exit(44) + +import dlite +from dlite.dataset import add_dataset, add_data +from dlite.dataset import EMMO, EMMO_VERSIONIRI +from dlite.testutils import raises + + +thisdir = Path(__file__).absolute().parent +outdir = thisdir / "output" +indir = thisdir / "input" +entitydir = thisdir / "entities" +dlite.storage_path.append(entitydir / "*.json") +dlite.storage_path.append(indir / "*.json") + + +# Test help functions +# =================== +from dlite.dataset import MissingUnitError, get_unit_iri + +assert get_unit_iri("Kelvin") == "https://w3id.org/emmo#Kelvin" +assert get_unit_iri("K") == "https://w3id.org/emmo#Kelvin" +assert get_unit_iri("°C") == "https://w3id.org/emmo#DegreeCelsius" +assert get_unit_iri("m/s") == "https://w3id.org/emmo#MetrePerSecond" + +with raises(MissingUnitError): + get_unit_iri("Atom") + +with raises(MissingUnitError): + # Because prefixed units are not in EMMO by default + # They can be including by importing https://w3id.org/emmo/1.0.0-rc1/disciplines/units/prefixedunits + get_unit_iri("cm") + + +# To be fixed in issue https://github.com/SINTEF/dlite/issues/878 +#from dlite.dataset import TS_EMMO +#TS_EMMO.parse("https://w3id.org/emmo/1.0.0-rc1/disciplines/units/prefixedunits", format="turtle") +#assert get_unit_iri("mm") == "https://w3id.org/emmo#MilliMetre" + + + +# Test serialising Metadata as an EMMO dataset +# ============================================ +Fluid = dlite.get_instance("http://onto-ns.org/meta/dlite/0.1/FluidData") + +assert Fluid.get_hash() == ( + '4739a3820ced457d07447c8916112021a0fbda9cbc97758e40b67369e34c00b4' +) + +ts = Triplestore(backend="rdflib") +EX = ts.bind("ex", "https://w3id.org/emmo/application/ex/0.2/") +FLUID = ts.bind("fluid", "http://onto-ns.org/meta/dlite/0.1/FluidData#") + +mappings = [ + (FLUID, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), +] +#add_dataset(ts, chem.meta, base_iri=base_iri) +add_dataset(ts, Fluid, iri=EX.FluidData, mappings=mappings) + + +# Test serialising data instances to KB +# ===================================== + +# Create instances +fluid1 = Fluid(dimensions={"ntimes":2, "npositions": 3}, id="fluid1") +fluid1.LJPotential = "WaterPot" +fluid1.TemperatureField = [[20., 24., 28.], [22, 26, 29]] + +uuid2 = dlite.get_uuid("fluid2") # just to ensure persistent uuid... +fluid2 = Fluid(dimensions={"ntimes":2, "npositions": 4}, id=uuid2) +fluid2.LJPotential = "AcetonePot" +fluid2.TemperatureField = [[20., 24., 28., 32.], [22, 26, 30, 34]] + +assert fluid1.get_hash() == ( + "412b7387f8c13c9d1aaa65ca21d59957be5635b41c7c3851b268de508817f7f8" +) +assert fluid2.get_hash() == ( + "c4289ff03f880526fc0f87038302673e44101c2b648be2c57a4db84fe6779f67" +) + +add_data(ts, fluid1) +add_data(ts, fluid2) + + + +# Add ontology and save to file +# ============================= + +# Make our ex: namespace an EMMO application ontology in the triplestore +iri = str(EX).rstrip("/#") +ts.add_triples([ + (iri, RDF.type, OWL.Ontology), + (iri, DCTERMS.title, en("Test application ontology with a dataset.")), + (iri, OWL.imports, EMMO_VERSIONIRI), +]) + +ts.serialize(outdir / "dataset.ttl") diff --git a/bindings/python/tests/test_dataset2_load.py b/bindings/python/tests/test_dataset2_load.py new file mode 100644 index 000000000..7ff879944 --- /dev/null +++ b/bindings/python/tests/test_dataset2_load.py @@ -0,0 +1,74 @@ +from pathlib import Path + +try: + from tripper import MAP, Triplestore +except ModuleNotFoundError: + import sys + sys.exit(44) + +import dlite +from dlite.dataset import EMMO, get_dataset, get_data +from dlite.testutils import raises + + +thisdir = Path(__file__).absolute().parent +outdir = thisdir / "output" +indir = thisdir / "input" +entitydir = thisdir / "entities" + + +# Test load Metadata from triplestore +# =================================== + +ts = Triplestore(backend="rdflib") +ts.parse(outdir / "dataset.ttl") +EX = ts.namespaces["ex"] +FLUID = ts.bind("fluid", "http://onto-ns.org/meta/dlite/0.1/FluidData#") + +Fluid, mappings = get_dataset(ts, iri=EX.FluidData) + +# Check that the loaded datamodel looks as expected +assert Fluid.uri == str(FLUID).rstrip("#") +assert Fluid.dimnames() == ["ntimes", "npositions"] +assert len(Fluid.props) == 2 +assert Fluid.props["TemperatureField"].unit == "°C" + +# Check that we get the exact same hash as in test_dataset1_save.py +assert Fluid.get_hash() == ( + '4739a3820ced457d07447c8916112021a0fbda9cbc97758e40b67369e34c00b4' +) + +# Check that we get the exact same mappings as provided +assert set(mappings) == { + (Fluid.uri, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), +} + + +# Test load data instances from triplestore +# ========================================= + +uuid2 = dlite.get_uuid("fluid2") # persistent uuid... +fluid1, mappings1 = get_data(ts, iri=f"{EX.FluidData}/fluid1") +fluid2, mappings2 = get_data(ts, iri=f"{EX.FluidData}/{uuid2}") + +print("---------") +print(fluid1) + +assert fluid1.meta == Fluid +assert fluid1.uri == "fluid1" +assert fluid2.meta == Fluid +assert fluid2.uri == None + +# Check that the instances have the exact same hash values as +# when they were created +assert fluid1.get_hash() == ( + "412b7387f8c13c9d1aaa65ca21d59957be5635b41c7c3851b268de508817f7f8" +) +assert fluid2.get_hash() == ( + "c4289ff03f880526fc0f87038302673e44101c2b648be2c57a4db84fe6779f67" +) diff --git a/doc/_static/dataset-v2.svg b/doc/_static/dataset-v2.svg new file mode 100644 index 000000000..96ff13340 --- /dev/null +++ b/doc/_static/dataset-v2.svg @@ -0,0 +1,4 @@ + + + +
Description of the semantic
meaning of array dimensions
Description of the semantic...
emmo:DataSet
emmo:DataSet
emmo:Material
emmo:Material
emmo:Fluid
emmo:Fluid
isDescriptionFor some
isDescriptionFor some
_:MyFluidDataSet
_:MyFluidDataSet
_:TemperatureField
_:TemperatureField
_:LJPotential
_:LJPotential
hasDatum exactly 1
hasDatum exactly 1
hasDatum exactly 1
hasDatum exactly 1
emmo:Datum
emmo:Datum
emmo:MolecularEntity
emmo:MolecularEntity
isConventionalFor some
isConventionalFor some
LJDataURI
LJDataURI
LJPotentialData
LJPotentialData
emmo:StringData
emmo:StringData
isDescriptionFor some
isDescriptionFor some
emmo:DataSet
emmo:DataSet
...
...
prefLabel
prefLabel
elucidation
elucidation
LJPotential
LJPotential
emmo:Temperature
emmo:Temperature
emmo:Array
emmo:Array
emmo:Kelvin
emmo:Kelvin
The temperature of a fluid measured at a set of times and positions.
The temperature of a...
prefLabel
prefLabel
elucidation
elucidation
TemperatureField
TemperatureField
hasMeasurementUnit exactly 1
hasMeasurementUnit exactly 1
...
...
hasDimension value
hasDimension value
hasNext
hasNext
_:dimension1
_:dimension1
_:dimension2
_:dimension2
elucidation
elucidation
...
...
elucidation
elucidation
emmo:Dimension
emmo:Dimension
emmo:Position
emmo:Position
emmo:Time
emmo:Time
FluidData
FluidData
prefLabel
prefLabel
elucidation
elucidation
...
...
...
...
prefLabel
prefLabel
...
...
prefLabel
prefLabel
Legend
Legend
Generated individual
Generated individual
Generated annotation
Generated annotation
Generated class (blank node)
Generated class (bla...
Existing class
Existing class
emmo:Sign
emmo:Sign
emmo:Symbolic
emmo:Symbolic
emmo:DoubleData
emmo:DoubleData
hasScalarData some
hasScalarData some
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/doc/_static/dataset.svg b/doc/_static/dataset.svg new file mode 100644 index 000000000..52017f4ef --- /dev/null +++ b/doc/_static/dataset.svg @@ -0,0 +1,4 @@ + + + +
DataSet
DataSet
LJPotentialData
LJPotentialData
MolecularEntity
MolecularEntity
LJDataUri
LJDataUri
String
String
...
...
Temperature
Temperature
Array
Array
Property
Property
Kelvin
Kelvin
...
...
TemperatureFieldShape
TemperatureFieldShape
emmo:Shape
emmo:Shape
isConventionalFor some
isConventionalFor some
Datum
Datum
prefLabel
prefLabel
LJPotential
LJPotential
prefLabel
prefLabel
TemperatureField
TemperatureField
hasMeasurementUnit exactly 1
hasMeasurementUnit exactly 1
elucidation
elucidation
hasShape exactly 1
hasShape exactly 1
elucidation
elucidation
isDescriptionFor some
isDescriptionFor some
FluidDataSet
FluidDataSet
emmo:Fluid
emmo:Fluid
emmo:Material
emmo:Material
hasDatum exactly 1
hasDatum exactly 1
hasDatum exactly 1
hasDatum exactly 1
emmo:DataSet
emmo:DataSet
isDescriptionFor some
isDescriptionFor some
emmo:Description
emmo:Description
"descr1..."@en
"descr1..."@en
...
...
elucidation
elucidation
emmo:Sequence
emmo:Sequence
emmo:Dimension
emmo:Dimension
hasBeginTile value
hasBeginTile value
hasNext
hasNext
_:dimension1
_:dimension1
_:dimension2
_:dimension2
elucidation
elucidation
"descr1..."@en
"descr1..."@en
emmo:Variable
emmo:Variable
emmo:Symbol
emmo:Symbol
"nx"^^xsd:string
"nx"^^xsd:string
hasSymbolValue
hasSymbolValue
"ny"^^xsd:string
"ny"^^xsd:string
hasSymbolValue
hasSymbolValue
"LJPotential"@en
"LJPotential"@en
"TemperatureField"@en
"TemperatureField"@en
elucidation
elucidation
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/examples/TEM_data/requirements.txt b/examples/TEM_data/requirements.txt index 2694c6710..caaef90ea 100644 --- a/examples/TEM_data/requirements.txt +++ b/examples/TEM_data/requirements.txt @@ -1,5 +1,6 @@ dlite-python>=0.4.1,<1 -tripper>=0.2.7,<1 +#tripper>=0.2.16,<1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@fix-for-rdflib-literals rdflib>=6,<8 #oteapi-dlite>=0.1.5,<1 -e git+https://github.com/EMMC-ASBL/oteapi-dlite.git@fa1b820383eb54a1c37f32f7b8ac9406b556dace#egg=oteapi_dlite diff --git a/examples/dataset/README.md b/examples/dataset/README.md new file mode 100644 index 000000000..29611ee82 --- /dev/null +++ b/examples/dataset/README.md @@ -0,0 +1,49 @@ +Representing DLite datamodels as EMMO Datasets +============================================== +The intention with this example is to show how to use the +`dlite.dataset` module to serialise and deserialise DLite datamodels +and instances to and from an EMMO-based RDF representation. + +![EMMO-based representation of a datamodel.](https://raw.githubusercontent.com/SINTEF/dlite/652-serialise-data-models-to-tbox/doc/_static/dataset-v2.svg) + +The figure above shown how the following simple [`FluidData`] +datamodel is represented with EMMO. + +```yaml +uri: http://onto-ns.org/meta/dlite/0.1/FluidData +meta: http://onto-ns.com/meta/0.3/EntitySchema +description: A dataset describing a fluid. +dimensions: + ntimes: The number of times the measurements are performed. + npositions: The number of positions the measurements are performed. +properties: + LJPotential: + type: string + description: Reference to Lennart-Jones potential. + TemperatureField: + type: float64 + shape: [ntimes, npositions] + unit: "°C" + description: Array of measured temperatures. +``` + +The datamodel is semantically enhanced using the following mappings + ```python + mappings = [ + (FLUID, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), + ] + ``` + +Some comments: +- Note the use of `emmo:isDescriptionFor` relations in the mappings. They are stored as-is in the triplestore. + +- The `map:mapsTo` are translated to `rdfs:subClassOf` when serialised to the triplestore. + + + +[`FluidData`]: https://github.com/SINTEF/dlite/blob/652-serialise-data-models-to-tbox/examples/dataset/datamodels/FluidData.json diff --git a/examples/dataset/data/.gitignore b/examples/dataset/data/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/examples/dataset/datamodels/FluidData.json b/examples/dataset/datamodels/FluidData.json new file mode 100644 index 000000000..fcbb29841 --- /dev/null +++ b/examples/dataset/datamodels/FluidData.json @@ -0,0 +1,20 @@ +{ + "uri": "http://onto-ns.org/meta/dlite/0.1/FluidData", + "description": "A dataset describing a fluid.", + "dimensions": { + "ntimes": "The number of times the measurements are performed.", + "npositions": "The number of positions the measurements are performed.", + }, + "properties": { + "LJPotential": { + "type": "string", + "description": "Reference to Lennart-Jones potential." + }, + "TemperatureField": { + "type": "float64", + "shape": ["ntimes", "npositions"], + "unit": "°C", + "description": "Array of measured temperatures." + } + } +} diff --git a/requirements_full.txt b/requirements_full.txt index 98cdb8504..c8a4e2fed 100644 --- a/requirements_full.txt +++ b/requirements_full.txt @@ -8,7 +8,9 @@ tables>=3.8,<5.0 rdflib>=4.2.1,<8 pint>=0.15,<1 pymongo>=4.4.0,<5 -tripper>=0.2.16,<0.3.1 + +#tripper>=0.2.16,<0.3.1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@master requests>=2.10,<3 jinja2>=3.0,<4 diff --git a/requirements_mappings.txt b/requirements_mappings.txt index 8bae142f6..10c485a2b 100644 --- a/requirements_mappings.txt +++ b/requirements_mappings.txt @@ -1,5 +1,6 @@ # Requirements for property mappings -tripper>=0.2.16,<0.3.1 +#tripper>=0.2.16,<0.3.1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@master pint>=0.15,<1