diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 54d8e3811..8a5f81312 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -8,6 +8,7 @@ set(py_sources mappings.py datamodel.py rdf.py + dataset.py quantity.py testutils.py ) diff --git a/bindings/python/dataset.py b/bindings/python/dataset.py new file mode 100644 index 000000000..0ee2ef51d --- /dev/null +++ b/bindings/python/dataset.py @@ -0,0 +1,576 @@ +"""Module for representing DLite data models and instances with rdflib. + +DLite data models are represented as EMMO datasets. + +NOTE: This module depends on Tripper. +""" +import json +import re +import warnings +from collections import defaultdict +from typing import TYPE_CHECKING +from uuid import uuid4 + +from tripper import Literal, Namespace, Triplestore +from tripper import MAP, OTEIO, OWL, RDF, RDFS, SKOS, XSD +from tripper.utils import en +from tripper.errors import NoSuchIRIError + +import dlite + +if TYPE_CHECKING: # pragma: no cover + from typings import List, Optional, Sequence, Tuple + + # A triple with literal objects in n3 notation + Triple = Sequence[str, str, str] + + +# XXX TODO - Make a local cache of EMMO such that we only download it once +TS_EMMO = Triplestore("rdflib") +TS_EMMO.parse("https://w3id.org/emmo/1.0.0-rc1") + +EMMO_VERSIONIRI = TS_EMMO.value("https://w3id.org/emmo", OWL.versionIRI) + +EMMO = Namespace( + iri="https://w3id.org/emmo#", + label_annotations=True, + check=True, + triplestore=TS_EMMO, +) + +EMMO_TYPES = { + "blob": "BinaryData", + "bool": "BooleanData", + "int": "IntegerData", + "int8": "ByteData", + "int16": "ShortData", + "int32": "IntData", + "int64": "LongData", + "uint": "NonNegativeIntegerData", + "uint8": "UnsignedByteData", + "uint16": "UnsignedShortData", + "uint32": "UnsignedIntData", + "uint64": "UnsignedLongData", + "float": "FloatingPointData", + "float32": "FloatData", + "float64": "DoubleData", + "string": "StringData", + "ref": "DataSet", + #"dimension": "Dimension", + #"property": "Datum", + #"relation": NotImplemented, +} + +# Maps unit names to IRIs +unit_cache = {} + + +class MissingUnitError(ValueError): + "Unit not found in ontology." + +class UnsupportedTypeError(TypeError, NotImplementedError): + "The given type is not supported." + +class KBError(ValueError): + "Missing or inconsistent data in knowledge base." + + +def _string(s): + """Return `s` as a literal string.""" + return Literal(s, datatype="xsd:string") + + +def title(s): + """Capitalise first letter in `s`.""" + return s[0].upper() + s[1:] + + +def dlite2emmotype(dlitetype): + """Convert a DLite type string to corresponding EMMO class label.""" + dtype, ssize = re.match("([a-zA-Z]+)([0-9]*)", dlitetype).groups() + size = int(ssize) if ssize else None + if size and dtype in ("int", "uint", "float"): + size /= 8 + if dlitetype in EMMO_TYPES: + emmotype = EMMO_TYPES[dlitetype] + elif dtype in EMMO_TYPES: + emmotype = EMMO_TYPES[dtype] + else: + raise UnsupportedTypeError(dlitetype) + return emmotype, size + + +def emmo2dlitetype(emmotype, size=None): + """Convert EMMO type and size to dlite type.""" + dlitetypes = [k for k, v in EMMO_TYPES.items() if v == emmotype] + if not dlitetypes: + raise UnsupportedTypeError(emmotype) + dlitetype, = dlitetypes + typeno = dlite.to_typenumber(dlitetype.rstrip("0123456789")) + if size: + return dlite.to_typename(typeno, int(size)) + return dlite.to_typename(typeno) + + +def get_shape(ts, dimiri, dimensions=None, mappings=None, uri=None): + """Returns a shape list for a datum who's first dimension is `dimiri`. + + If `dimensions` is given, it should be a list that will be updated + with new dimensions. + + If `mappings` and `uri` are given, then `mappings` should be a + list that will be updated with new mappings. `uri` should be the + URI of the data model. + """ + shape = [] + while dimiri: + mapsto = [] + next = label = descr = None + for pred, obj in ts.predicate_objects(dimiri): + if pred == EMMO.hasNext: + next = obj + elif pred == EMMO.hasSymbolValue: + label = str(obj) + elif pred == EMMO.elucidation: + descr = str(obj) + elif pred == RDF.type and obj not in (EMMO.Dimension,): + mapsto.append(obj) + if not label: + raise KBError("dimension has no prefLabel:", dimiri) + if dimensions is not None: + if not descr: + raise KBError("dimension has no elucidation:", dimiri) + dimensions.append(dlite.Dimension(label, descr)) + shape.append(label) + if mappings and uri: + for obj in mapsto: + mappings.append((f"{uri}#{label}", MAP.mapsTo, obj)) + dimiri = next + return shape + + +def dimensional_string(unit_iri): + """Return the inferred dimensional string of the given unit IRI. Returns + None if no dimensional string can be inferred.""" + raise NotImplementedError() + + +def get_unit_symbol(iri): + """Return the unit symbol for .""" + symbol = TS_EMMO.value(iri, EMMO.unitSymbol) + if symbol: + return str(symbol) + for r in TS_EMMO.restrictions(iri, EMMO.hasSymbolValue, type="value"): + symbol = r["value"] + if symbol: + return str(symbol) + raise KBError("No symbol value is defined for unit:", iri) + + +def get_unit_iri(unit): + """Returns the IRI for the given unit.""" + if not unit_cache: + ts = TS_EMMO + for predicate in (EMMO.unitSymbol, EMMO.ucumCode, EMMO.uneceCommonCode): + for s, _, o in ts.triples(predicate=predicate): + if o.value in unit_cache and predicate == EMMO.unitSymbol: + warnings.warn( + f"more than one unit with symbol '{o.value}': " + f"{unit_cache[o.value]}" + ) + else: + unit_cache[o.value] = s + for o in ts.objects(s, SKOS.prefLabel): + unit_cache[o.value] = s + for o in ts.objects(s, SKOS.altLabel): + if o.value not in unit_cache: + unit_cache[o.value] = s + + for r, _, o in ts.triples(predicate=OWL.hasValue): + if ( + ts.has(r, RDF.type, OWL.Restriction) and + ts.has(r, OWL.onProperty, EMMO.hasSymbolValue) + ): + s = ts.value(predicate=RDFS.subClassOf, object=r) + unit_cache[o.value] = s + + if unit in unit_cache: + return unit_cache[unit] + + raise MissingUnitError(unit) + + +def metadata_to_rdf( + meta: dlite.Metadata, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> "List[Triple]": + """Serialise DLite metadata to RDF. + + Arguments: + meta: Metadata to serialise. + iri: IRI of the dataset in the triplestore. Defaults to `meta.uri`. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + A list of RDF triples. Literal objects are encoded in n3 notation. + """ + # Create lookup table + dct = meta.asdict() + + # For adding mappings + maps = defaultdict(list) + for s, p, o in mappings: + uri = str(s).rstrip("/#") + if p == MAP.mapsTo: + name = str(s).split("#", 1)[-1] + prep = RDF.type if name in meta.dimnames() else RDFS.subClassOf + else: + prep = p + maps[uri].append((prep, o)) + + def addmap(uri, iri): + """Add mapping relation to triples.""" + for p, o in maps[uri.rstrip("/#")]: + if p in (RDF.type, RDFS.subClassOf): + triples.append((iri, p, o)) + else: + restriction_iri = f"_:restriction_map_{iri}_{uuid4()}" + triples.extend([ + (iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, p), + (restriction_iri, OWL.someValuesFrom, o), + ]) + + # Dimension descriptions + dim_descr = {d.name: d.description for d in meta.properties['dimensions']} + + # Start populating triples + triples = [] + + # Add datamodel (emmo:DataSet) + if iri is None: + iri = meta.uri + iri = str(iri).rstrip("#/") + triples.extend([ + (iri, RDF.type, OWL.Class), + (iri, RDFS.subClassOf, EMMO.DataSet), + (iri, SKOS.prefLabel, en(title(meta.name))), + (iri, OTEIO.hasURI, Literal(meta.uri, datatype=XSD.anyURI)), + ]) + addmap(meta.uri, iri) + + if "description" in dct: + triples.append((iri, EMMO.elucidation, en(dct["description"]))) + + # Add properties (emmo:Datum) + for prop in meta.properties["properties"]: + prop_id = f"{meta.uri}#{prop.name}" + prop_iri = f"{iri}#{prop.name}" + addmap(prop_id, prop_iri) + restriction_iri = f"_:restriction_{prop_iri}" + prop_name = f"{prop.name[0].upper()}{prop.name[1:]}" + triples.extend([ + (iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasDatum), + (restriction_iri, OWL.onClass, prop_iri), + (restriction_iri, OWL.qualifiedCardinality, + Literal(1, datatype=XSD.nonNegativeInteger)), + (prop_iri, RDF.type, OWL.Class), + (prop_iri, RDFS.subClassOf, EMMO.Datum), + (prop_iri, SKOS.prefLabel, en(prop_name)), + ]) + + emmotype, size = dlite2emmotype(prop.type) + if prop.ndims: + restriction_iri = f"_:restriction_type_{prop_iri}" + triples.extend([ + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasScalarData), + (restriction_iri, OWL.someValuesFrom, EMMO[emmotype]), + ]) + else: + triples.append((prop_iri, RDFS.subClassOf, EMMO[emmotype])) + if size: + sizeval = Literal(size, datatype=XSD.nonNegativeInteger) + triples.append((prop_iri, OTEIO.datasize, sizeval)) + + if prop.shape.size: + restriction_iri = f"_:restriction_{prop_iri}_shape" + triples.extend([ + (prop_iri, RDFS.subClassOf, EMMO.Array), + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasDimension), + ]) + for i, dim in enumerate(prop.shape): + dim_iri = f"{iri}#{prop.name}_dimension{i}" + addmap(f"{meta.uri}#{dim}", dim_iri) + triples.extend([ + (dim_iri, RDF.type, EMMO.Dimension), + (dim_iri, EMMO.hasSymbolValue, + Literal(dim, datatype=XSD.string)), + (dim_iri, EMMO.elucidation, en(dim_descr[dim])), + (dim_iri, SKOS.prefLabel, en(f"{prop.name}_dimension{i}")), + ]) + if i == 0: + triples.append((restriction_iri, OWL.hasValue, dim_iri)) + else: + triples.append((source_iri, EMMO.hasNext, dim_iri)) + source_iri = dim_iri + + if prop.unit: + unit_iri = get_unit_iri(prop.unit) + if unit_iri: + restriction_iri = f"_:restriction_{prop_iri}_unit" + triples.extend([ + (prop_iri, RDFS.subClassOf, restriction_iri), + (restriction_iri, RDF.type, OWL.Restriction), + (restriction_iri, OWL.onProperty, EMMO.hasMeasurementUnit), + (restriction_iri, OWL.onClass, unit_iri), + (restriction_iri, OWL.qualifiedCardinality, + Literal(1, datatype=XSD.nonNegativeInteger)), + ]) + + if prop.description: + triples.append((prop_iri, EMMO.elucidation, en(prop.description))) + + return triples + + +def add_dataset( + ts: Triplestore, + meta: dlite.Metadata, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> str: + """Save DLite metadata as an EMMO dataset to a triplestore. + + Arguments: + ts: Triplestore to save to. + meta: DLite metadata to save. + iri: IRI of the dataset in the triplestore. Defaults to `meta.uri`. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + IRI of the saved dataset. + """ + if not meta.is_meta: + raise TypeError( + "Expected data model, got instance: {meta.uri or meta.uuid}" + ) + + if iri is None: + iri = meta.uri + iri = str(iri).rstrip("#/") + + ts.add_triples(metadata_to_rdf(meta, iri=iri, mappings=mappings)) + + used_namespaces = {"emmo": EMMO, "oteio": OTEIO} + for prefix, ns in used_namespaces.items(): + if prefix not in ts.namespaces: + ts.bind(prefix, ns) + + return iri + + +def get_dataset( + ts: Triplestore, + iri: str, + uri: "Optional[str]" = None, +) -> "Tuple[dlite.Metadata, List[Triple]]": + """Load dataset from triplestore. + + Arguments: + ts: Triplestore to load from. + iri: IRI of the dataset to load. + uri: URI of the DLite datamodel to load. The defaults is inferred + from `iri`. + + Returns: + A `(meta, mappings)` tuple, where `meta` is a DLite metadata and + `mappings` is a list of triples. + """ + if uri is None: + uri = str(ts.value(iri, OTEIO.hasURI, default=str(iri).rstrip("/#"))) + + emmotypes = {EMMO[v]: v for v in EMMO_TYPES.values()} + + mappings = [] + dimensions = [] + properties = [] + description = "" + datum_iris = [] + + for prop, obj in ts.predicate_objects(iri): + if prop == RDFS.subClassOf: + po = set(ts.predicate_objects(obj)) + if (RDF.type, OWL.Restriction) in po: + d = dict(po) + onprop = d.get(OWL.onProperty) + oncls = d.get(OWL.onClass) + someval = d.get(OWL.someValuesFrom) + if (OWL.onProperty, EMMO.hasDatum) in po: + datum_iris.append(oncls or someval) + elif onprop and oncls: + mappings.append((uri, onprop, oncls)) + elif onprop and someval: + mappings.append((uri, onprop, someval)) + elif obj not in (EMMO.DataSet, ): + mappings.append((uri, MAP.mapsTo, obj)) + elif prop == EMMO.elucidation: + description = str(obj) + + for datum_iri in datum_iris: + label = emmotype = size = None + unit = descr = "" + shape = [] + maps = [] + for pred, obj in ts.predicate_objects(datum_iri): + if pred == SKOS.prefLabel: + label = str(obj) + elif pred == EMMO.elucidation: + descr = str(obj) + elif pred == OTEIO.datasize: + size = int(obj) + elif RDFS.subClassOf: + if obj in emmotypes: + emmotype = emmotypes[obj] + else: + po = dict(ts.predicate_objects(obj)) + if po.get(RDF.type) == OWL.Restriction: + onprop = po.get(OWL.onProperty) + oncls = po.get(OWL.onClass) + onval = po.get(OWL.hasValue) + someval = po.get(OWL.someValuesFrom) + if onprop == EMMO.hasMeasurementUnit: + unit = get_unit_symbol(oncls) + elif onprop == EMMO.hasScalarData: + emmotype = emmotypes[someval] + elif onprop == EMMO.hasDimension: + shape = get_shape( + ts, onval, dimensions, mappings, uri + ) + else: + maps.append((onprop, oncls or someval)) + else: + maps.append((MAP.mapsTo, obj)) + if not label: + raise KBError("missing preferred label on datum:", datum_iri) + if not emmotype: + raise KBError("missing type on datum:", datum_iri) + for pred, obj in maps: + if pred and obj and obj not in (OWL.Class, EMMO.Datum, EMMO.Array): + mappings.append((f"{uri}#{label}", pred, obj)) + + dlitetype = emmo2dlitetype(emmotype, size) + properties.append(dlite.Property( + name=label, type=dlitetype, shape=shape, unit=unit, description=descr)) + + meta = dlite.Metadata(uri, dimensions, properties, description) + + return meta, mappings + + +def add_data( + ts: Triplestore, + inst: dlite.Instance, + iri: "Optional[str]" = None, + mappings: "Sequence[Triple]" = (), +) -> str: + """Save DLite instance as an EMMO dataset to a triplestore. + + Data instances are represented as individuals of the corresponding + EMMO DataSet. The corresponding metadata is also stored if it not + already exists in the triplestore. + + Arguments: + ts: Triplestore to save to. + inst: DLite instance to save. + iri: IRI of the dataset in the triplestore. The default is the + metadata IRI prepended with a slash and the UUID. + mappings: Sequence of mappings of properties to ontological concepts. + + Returns: + IRI of the saved dataset. + """ + if inst.is_meta: + return add_dataset(ts, inst, iri, mappings) + + metairi = ts.value( + predicate=OTEIO.hasURI, + object=Literal(inst.meta.uri, datatype=XSD.anyURI), + ) + if not metairi: + metairi = add_dataset(ts, inst.meta) + + if not iri: + iri = f"{metairi}/{inst.uri or inst.uuid}" + + triples = [] + triples.extend([ + (iri, RDF.type, metairi), + (iri, OTEIO.hasUUID, inst.uuid), + (iri, RDF.value, Literal(inst.asjson(), datatype=RDF.JSON)), + ]) + if inst.uri: + triples.append((iri, OTEIO.hasURI, inst.uri)) + + ts.add_triples(triples) + + used_namespaces = {"oteio": OTEIO} + for prefix, ns in used_namespaces.items(): + if prefix not in ts.namespaces: + ts.bind(prefix, ns) + + return iri + + +def get_data( + ts: Triplestore, + iri: str, +) -> "Tuple[dlite.Metadata, List[Triple]]": + """Load dataset from triplestore. + + Arguments: + ts: Triplestore to load from. + iri: IRI of the dataset to load. + + Returns: + A `(meta, mappings)` tuple, where `meta` is a DLite metadata and + `mappings` is a list of triples. + """ + mappings = [] + + # Bypass the triplestore if the instance is already in cache + try: + return dlite.get_instance(iri, check_storages=False), mappings + except dlite.DLiteMissingInstanceError: + pass + + metairi = ts.value(iri, RDF.type, default=None) + + if not metairi: + # `iri` does not correspond to a data instance, check for metadata + if ts.has(iri, RDFS.subClassOf, EMMO.DataSet): + return get_dataset(ts, iri), mappings + raise KBError( + f"Cannot find neither a data instance nor metadata with IRI: {iri}" + ) + + if not dlite.has_instance(metairi, check_storages=False): + meta, maps = get_dataset(ts, metairi) + mappings.extend(maps) + else: + meta = dlite.get_instance(metairi, check_storages=False) + + json = ts.value(iri, RDF.value) + if not json: + raise KBError(f"cannot find JSON value for IRI: {iri}") + + inst = dlite.Instance.from_json(str(json)) + + return inst, mappings diff --git a/bindings/python/dlite-type.i b/bindings/python/dlite-type.i index 794740034..15327557b 100644 --- a/bindings/python/dlite-type.i +++ b/bindings/python/dlite-type.i @@ -11,9 +11,11 @@ char *to_typename(int type, int size) { char *s; - if (size < 0) return dlite_err(1, "size must be non-negative"), NULL; if (!(s = malloc(16))) return NULL; - if (dlite_type_set_typename(type, size, s, 16)) { + if (size < 0) { + //s = strdup(dlite_type_get_enum_name(type)); + s = strdup(dlite_type_get_dtypename(type)); + } else if (dlite_type_set_typename(type, size, s, 16)) { free(s); return NULL; } @@ -43,10 +45,27 @@ enum _DLiteType { %apply int *OUTPUT { int *type, int *size }; +%feature( + "docstring", + "Returns type number and size from given type name." +) from_typename; status_t from_typename(const char *typename, int *type, int *size); %newobject to_typename; -char *to_typename(int type, int size); +%feature( + "docstring", + "Returns type name for given type number and size. " + "If `size` is negative, only the name of `type` is returned." +) to_typename; +char *to_typename(int type, int size=-1); + +%feature( + "docstring", + "Returns DLite type number corresponding to `dtypename`." +) dlite_type_get_dtype; +%rename(to_typenumber) dlite_type_get_dtype; +int dlite_type_get_dtype(const char *dtypename); + %rename(get_alignment) dlite_type_get_alignment; size_t dlite_type_get_alignment(int type, size_t size); diff --git a/bindings/python/tests/CMakeLists.txt b/bindings/python/tests/CMakeLists.txt index a9746189f..cbeaeb69a 100644 --- a/bindings/python/tests/CMakeLists.txt +++ b/bindings/python/tests/CMakeLists.txt @@ -25,6 +25,8 @@ set(tests test_pydantic test_relation test_iri + test_dataset1_save + test_dataset2_load ) foreach(test ${tests}) diff --git a/bindings/python/tests/entities/FluidData.json b/bindings/python/tests/entities/FluidData.json new file mode 100644 index 000000000..fcbb29841 --- /dev/null +++ b/bindings/python/tests/entities/FluidData.json @@ -0,0 +1,20 @@ +{ + "uri": "http://onto-ns.org/meta/dlite/0.1/FluidData", + "description": "A dataset describing a fluid.", + "dimensions": { + "ntimes": "The number of times the measurements are performed.", + "npositions": "The number of positions the measurements are performed.", + }, + "properties": { + "LJPotential": { + "type": "string", + "description": "Reference to Lennart-Jones potential." + }, + "TemperatureField": { + "type": "float64", + "shape": ["ntimes", "npositions"], + "unit": "°C", + "description": "Array of measured temperatures." + } + } +} diff --git a/bindings/python/tests/test_dataset1_save.py b/bindings/python/tests/test_dataset1_save.py new file mode 100644 index 000000000..6862cd698 --- /dev/null +++ b/bindings/python/tests/test_dataset1_save.py @@ -0,0 +1,109 @@ +from pathlib import Path + +try: + from tripper import DCTERMS, MAP, OWL, RDF, RDFS, XSD, Triplestore + from tripper.utils import en +except ModuleNotFoundError: + import sys + sys.exit(44) + +import dlite +from dlite.dataset import add_dataset, add_data +from dlite.dataset import EMMO, EMMO_VERSIONIRI +from dlite.testutils import raises + + +thisdir = Path(__file__).absolute().parent +outdir = thisdir / "output" +indir = thisdir / "input" +entitydir = thisdir / "entities" +dlite.storage_path.append(entitydir / "*.json") +dlite.storage_path.append(indir / "*.json") + + +# Test help functions +# =================== +from dlite.dataset import MissingUnitError, get_unit_iri + +assert get_unit_iri("Kelvin") == "https://w3id.org/emmo#Kelvin" +assert get_unit_iri("K") == "https://w3id.org/emmo#Kelvin" +assert get_unit_iri("°C") == "https://w3id.org/emmo#DegreeCelsius" +assert get_unit_iri("m/s") == "https://w3id.org/emmo#MetrePerSecond" + +with raises(MissingUnitError): + get_unit_iri("Atom") + +with raises(MissingUnitError): + # Because prefixed units are not in EMMO by default + # They can be including by importing https://w3id.org/emmo/1.0.0-rc1/disciplines/units/prefixedunits + get_unit_iri("cm") + + +# To be fixed in issue https://github.com/SINTEF/dlite/issues/878 +#from dlite.dataset import TS_EMMO +#TS_EMMO.parse("https://w3id.org/emmo/1.0.0-rc1/disciplines/units/prefixedunits", format="turtle") +#assert get_unit_iri("mm") == "https://w3id.org/emmo#MilliMetre" + + + +# Test serialising Metadata as an EMMO dataset +# ============================================ +Fluid = dlite.get_instance("http://onto-ns.org/meta/dlite/0.1/FluidData") + +assert Fluid.get_hash() == ( + '4739a3820ced457d07447c8916112021a0fbda9cbc97758e40b67369e34c00b4' +) + +ts = Triplestore(backend="rdflib") +EX = ts.bind("ex", "https://w3id.org/emmo/application/ex/0.2/") +FLUID = ts.bind("fluid", "http://onto-ns.org/meta/dlite/0.1/FluidData#") + +mappings = [ + (FLUID, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), +] +#add_dataset(ts, chem.meta, base_iri=base_iri) +add_dataset(ts, Fluid, iri=EX.FluidData, mappings=mappings) + + +# Test serialising data instances to KB +# ===================================== + +# Create instances +fluid1 = Fluid(dimensions={"ntimes":2, "npositions": 3}, id="fluid1") +fluid1.LJPotential = "WaterPot" +fluid1.TemperatureField = [[20., 24., 28.], [22, 26, 29]] + +uuid2 = dlite.get_uuid("fluid2") # just to ensure persistent uuid... +fluid2 = Fluid(dimensions={"ntimes":2, "npositions": 4}, id=uuid2) +fluid2.LJPotential = "AcetonePot" +fluid2.TemperatureField = [[20., 24., 28., 32.], [22, 26, 30, 34]] + +assert fluid1.get_hash() == ( + "412b7387f8c13c9d1aaa65ca21d59957be5635b41c7c3851b268de508817f7f8" +) +assert fluid2.get_hash() == ( + "c4289ff03f880526fc0f87038302673e44101c2b648be2c57a4db84fe6779f67" +) + +add_data(ts, fluid1) +add_data(ts, fluid2) + + + +# Add ontology and save to file +# ============================= + +# Make our ex: namespace an EMMO application ontology in the triplestore +iri = str(EX).rstrip("/#") +ts.add_triples([ + (iri, RDF.type, OWL.Ontology), + (iri, DCTERMS.title, en("Test application ontology with a dataset.")), + (iri, OWL.imports, EMMO_VERSIONIRI), +]) + +ts.serialize(outdir / "dataset.ttl") diff --git a/bindings/python/tests/test_dataset2_load.py b/bindings/python/tests/test_dataset2_load.py new file mode 100644 index 000000000..7ff879944 --- /dev/null +++ b/bindings/python/tests/test_dataset2_load.py @@ -0,0 +1,74 @@ +from pathlib import Path + +try: + from tripper import MAP, Triplestore +except ModuleNotFoundError: + import sys + sys.exit(44) + +import dlite +from dlite.dataset import EMMO, get_dataset, get_data +from dlite.testutils import raises + + +thisdir = Path(__file__).absolute().parent +outdir = thisdir / "output" +indir = thisdir / "input" +entitydir = thisdir / "entities" + + +# Test load Metadata from triplestore +# =================================== + +ts = Triplestore(backend="rdflib") +ts.parse(outdir / "dataset.ttl") +EX = ts.namespaces["ex"] +FLUID = ts.bind("fluid", "http://onto-ns.org/meta/dlite/0.1/FluidData#") + +Fluid, mappings = get_dataset(ts, iri=EX.FluidData) + +# Check that the loaded datamodel looks as expected +assert Fluid.uri == str(FLUID).rstrip("#") +assert Fluid.dimnames() == ["ntimes", "npositions"] +assert len(Fluid.props) == 2 +assert Fluid.props["TemperatureField"].unit == "°C" + +# Check that we get the exact same hash as in test_dataset1_save.py +assert Fluid.get_hash() == ( + '4739a3820ced457d07447c8916112021a0fbda9cbc97758e40b67369e34c00b4' +) + +# Check that we get the exact same mappings as provided +assert set(mappings) == { + (Fluid.uri, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), +} + + +# Test load data instances from triplestore +# ========================================= + +uuid2 = dlite.get_uuid("fluid2") # persistent uuid... +fluid1, mappings1 = get_data(ts, iri=f"{EX.FluidData}/fluid1") +fluid2, mappings2 = get_data(ts, iri=f"{EX.FluidData}/{uuid2}") + +print("---------") +print(fluid1) + +assert fluid1.meta == Fluid +assert fluid1.uri == "fluid1" +assert fluid2.meta == Fluid +assert fluid2.uri == None + +# Check that the instances have the exact same hash values as +# when they were created +assert fluid1.get_hash() == ( + "412b7387f8c13c9d1aaa65ca21d59957be5635b41c7c3851b268de508817f7f8" +) +assert fluid2.get_hash() == ( + "c4289ff03f880526fc0f87038302673e44101c2b648be2c57a4db84fe6779f67" +) diff --git a/doc/_static/dataset-v2.svg b/doc/_static/dataset-v2.svg new file mode 100644 index 000000000..96ff13340 --- /dev/null +++ b/doc/_static/dataset-v2.svg @@ -0,0 +1,4 @@ + + + +Description of the semanticmeaning of array dimensionsDescription of the semantic...emmo:DataSetemmo:DataSetemmo:Materialemmo:Materialemmo:Fluidemmo:FluidisDescriptionFor someisDescriptionFor some_:MyFluidDataSet_:MyFluidDataSet_:TemperatureField_:TemperatureField_:LJPotential_:LJPotentialhasDatum exactly 1hasDatum exactly 1hasDatum exactly 1hasDatum exactly 1emmo:Datumemmo:Datumemmo:MolecularEntityemmo:MolecularEntityisConventionalFor someisConventionalFor someLJDataURILJDataURILJPotentialDataLJPotentialDataemmo:StringDataemmo:StringDataisDescriptionFor someisDescriptionFor someemmo:DataSetemmo:DataSet......prefLabelprefLabelelucidationelucidationLJPotentialLJPotentialemmo:Temperatureemmo:Temperatureemmo:Arrayemmo:Arrayemmo:Kelvinemmo:KelvinThe temperature of a fluid measured at a set of times and positions.The temperature of a...prefLabelprefLabelelucidationelucidationTemperatureFieldTemperatureFieldhasMeasurementUnit exactly 1hasMeasurementUnit exactly 1......hasDimension valuehasDimension valuehasNexthasNext_:dimension1_:dimension1_:dimension2_:dimension2elucidationelucidation......elucidationelucidationemmo:Dimensionemmo:Dimensionemmo:Positionemmo:Positionemmo:Timeemmo:TimeFluidDataFluidDataprefLabelprefLabelelucidationelucidation............prefLabelprefLabel......prefLabelprefLabelLegendLegendGenerated individualGenerated individualGenerated annotationGenerated annotationGenerated class (blank node)Generated class (bla...Existing classExisting classemmo:Signemmo:Signemmo:Symbolicemmo:Symbolicemmo:DoubleDataemmo:DoubleDatahasScalarData somehasScalarData someViewer does not support full SVG 1.1 \ No newline at end of file diff --git a/doc/_static/dataset.svg b/doc/_static/dataset.svg new file mode 100644 index 000000000..52017f4ef --- /dev/null +++ b/doc/_static/dataset.svg @@ -0,0 +1,4 @@ + + + +DataSetDataSetLJPotentialDataLJPotentialDataMolecularEntityMolecularEntityLJDataUriLJDataUriStringString......TemperatureTemperatureArrayArrayPropertyPropertyKelvinKelvin......TemperatureFieldShapeTemperatureFieldShapeemmo:Shapeemmo:ShapeisConventionalFor someisConventionalFor someDatumDatumprefLabelprefLabelLJPotentialLJPotentialprefLabelprefLabelTemperatureFieldTemperatureFieldhasMeasurementUnit exactly 1hasMeasurementUnit exactly 1elucidationelucidationhasShape exactly 1hasShape exactly 1elucidationelucidationisDescriptionFor someisDescriptionFor someFluidDataSetFluidDataSetemmo:Fluidemmo:Fluidemmo:Materialemmo:MaterialhasDatum exactly 1hasDatum exactly 1hasDatum exactly 1hasDatum exactly 1emmo:DataSetemmo:DataSetisDescriptionFor someisDescriptionFor someemmo:Descriptionemmo:Description"descr1..."@en"descr1..."@en......elucidationelucidationemmo:Sequenceemmo:Sequenceemmo:Dimensionemmo:DimensionhasBeginTile valuehasBeginTile valuehasNexthasNext_:dimension1_:dimension1_:dimension2_:dimension2elucidationelucidation"descr1..."@en"descr1..."@enemmo:Variableemmo:Variableemmo:Symbolemmo:Symbol"nx"^^xsd:string"nx"^^xsd:stringhasSymbolValuehasSymbolValue"ny"^^xsd:string"ny"^^xsd:stringhasSymbolValuehasSymbolValue"LJPotential"@en"LJPotential"@en"TemperatureField"@en"TemperatureField"@enelucidationelucidationViewer does not support full SVG 1.1 \ No newline at end of file diff --git a/examples/TEM_data/requirements.txt b/examples/TEM_data/requirements.txt index 2694c6710..caaef90ea 100644 --- a/examples/TEM_data/requirements.txt +++ b/examples/TEM_data/requirements.txt @@ -1,5 +1,6 @@ dlite-python>=0.4.1,<1 -tripper>=0.2.7,<1 +#tripper>=0.2.16,<1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@fix-for-rdflib-literals rdflib>=6,<8 #oteapi-dlite>=0.1.5,<1 -e git+https://github.com/EMMC-ASBL/oteapi-dlite.git@fa1b820383eb54a1c37f32f7b8ac9406b556dace#egg=oteapi_dlite diff --git a/examples/dataset/README.md b/examples/dataset/README.md new file mode 100644 index 000000000..29611ee82 --- /dev/null +++ b/examples/dataset/README.md @@ -0,0 +1,49 @@ +Representing DLite datamodels as EMMO Datasets +============================================== +The intention with this example is to show how to use the +`dlite.dataset` module to serialise and deserialise DLite datamodels +and instances to and from an EMMO-based RDF representation. + +![EMMO-based representation of a datamodel.](https://raw.githubusercontent.com/SINTEF/dlite/652-serialise-data-models-to-tbox/doc/_static/dataset-v2.svg) + +The figure above shown how the following simple [`FluidData`] +datamodel is represented with EMMO. + +```yaml +uri: http://onto-ns.org/meta/dlite/0.1/FluidData +meta: http://onto-ns.com/meta/0.3/EntitySchema +description: A dataset describing a fluid. +dimensions: + ntimes: The number of times the measurements are performed. + npositions: The number of positions the measurements are performed. +properties: + LJPotential: + type: string + description: Reference to Lennart-Jones potential. + TemperatureField: + type: float64 + shape: [ntimes, npositions] + unit: "°C" + description: Array of measured temperatures. +``` + +The datamodel is semantically enhanced using the following mappings + ```python + mappings = [ + (FLUID, EMMO.isDescriptionFor, EMMO.Fluid), + (FLUID.LJPotential, MAP.mapsTo, EMMO.String), + (FLUID.LJPotential, EMMO.isDescriptionFor, EMMO.MolecularEntity), + (FLUID.TemperatureField, MAP.mapsTo, EMMO.ThermodynamicTemperature), + (FLUID.ntimes, MAP.mapsTo, EMMO.Time), + (FLUID.npositions, MAP.mapsTo, EMMO.Position), + ] + ``` + +Some comments: +- Note the use of `emmo:isDescriptionFor` relations in the mappings. They are stored as-is in the triplestore. + +- The `map:mapsTo` are translated to `rdfs:subClassOf` when serialised to the triplestore. + + + +[`FluidData`]: https://github.com/SINTEF/dlite/blob/652-serialise-data-models-to-tbox/examples/dataset/datamodels/FluidData.json diff --git a/examples/dataset/data/.gitignore b/examples/dataset/data/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/examples/dataset/datamodels/FluidData.json b/examples/dataset/datamodels/FluidData.json new file mode 100644 index 000000000..fcbb29841 --- /dev/null +++ b/examples/dataset/datamodels/FluidData.json @@ -0,0 +1,20 @@ +{ + "uri": "http://onto-ns.org/meta/dlite/0.1/FluidData", + "description": "A dataset describing a fluid.", + "dimensions": { + "ntimes": "The number of times the measurements are performed.", + "npositions": "The number of positions the measurements are performed.", + }, + "properties": { + "LJPotential": { + "type": "string", + "description": "Reference to Lennart-Jones potential." + }, + "TemperatureField": { + "type": "float64", + "shape": ["ntimes", "npositions"], + "unit": "°C", + "description": "Array of measured temperatures." + } + } +} diff --git a/requirements_full.txt b/requirements_full.txt index 98cdb8504..c8a4e2fed 100644 --- a/requirements_full.txt +++ b/requirements_full.txt @@ -8,7 +8,9 @@ tables>=3.8,<5.0 rdflib>=4.2.1,<8 pint>=0.15,<1 pymongo>=4.4.0,<5 -tripper>=0.2.16,<0.3.1 + +#tripper>=0.2.16,<0.3.1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@master requests>=2.10,<3 jinja2>=3.0,<4 diff --git a/requirements_mappings.txt b/requirements_mappings.txt index 8bae142f6..10c485a2b 100644 --- a/requirements_mappings.txt +++ b/requirements_mappings.txt @@ -1,5 +1,6 @@ # Requirements for property mappings -tripper>=0.2.16,<0.3.1 +#tripper>=0.2.16,<0.3.1 +tripper @ git+https://github.com/EMMC-ASBL/tripper.git@master pint>=0.15,<1