From 71c0099c807fe9fac04a3ef82c26b3e4a9ff10f0 Mon Sep 17 00:00:00 2001 From: David Linke Date: Tue, 8 Oct 2024 08:15:04 +0200 Subject: [PATCH] WIP (sync between work places) --- poetry.lock | 16 ++++----- pyproject.toml | 13 ++++--- .../importers/shacl_import_engine.py | 18 ++++++++-- tests/__init__.py | 3 -- tests/resources/shacl_simple.ttl | 34 +++++++++++++++++++ tests/test_importers/test_shacl_importer.py | 15 +++----- 6 files changed, 70 insertions(+), 29 deletions(-) create mode 100644 tests/resources/shacl_simple.ttl diff --git a/poetry.lock b/poetry.lock index dd714bc..3666b03 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "airium" @@ -3292,9 +3292,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3792,8 +3792,8 @@ files = [ annotated-types = ">=0.4.0" pydantic-core = "2.20.1" typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, {version = ">=4.6.1", markers = "python_version < \"3.13\""}, + {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, ] [package.extras] @@ -4116,7 +4116,6 @@ description = "A pure Python implementation of the trie data structure." optional = false python-versions = "*" files = [ - {file = "PyTrie-0.4.0-py3-none-any.whl", hash = "sha256:f687c224ee8c66cda8e8628a903011b692635ffbb08d4b39c5f92b18eb78c950"}, {file = "PyTrie-0.4.0.tar.gz", hash = "sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379"}, ] @@ -5095,7 +5094,7 @@ sphinx = ">=4.0" name = "sphinx-pdj-theme" version = "0.4.0" description = "A cool theme for sphinx documentation" -optional = false +optional = true python-versions = "*" files = [ {file = "sphinx-pdj-theme-0.4.0.tar.gz", hash = "sha256:4b86bfd8b8e20344db56aba13473f634286149fa0203d18e0437157f48c7e0fa"}, @@ -5167,7 +5166,7 @@ test = ["flake8", "mypy", "pytest"] name = "sphinxcontrib-mermaid" version = "0.9.2" description = "Mermaid diagrams in yours Sphinx powered docs" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "sphinxcontrib-mermaid-0.9.2.tar.gz", hash = "sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af"}, @@ -5959,10 +5958,9 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [extras] -docs = [] -mariadb = [] +docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "036cba73b6fd660157c70cb76be27a501017e8904b35c8d2ccb00d412bbba870" +content-hash = "9c29a704add4aaf15c228f9d6a81164390f060582bee85a89d266e2232c4b0ed" diff --git a/pyproject.toml b/pyproject.toml index 684e019..1f31453 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,16 +54,19 @@ linkml-runtime = "^1.7.2" duckdb = "^0.10.1" numpy = "<2.0" +Sphinx = { version = ">=4.4.0", optional = true } +sphinx-pdj-theme = { version = ">=0.2.1", optional = true } +sphinx-click = ">=3.1.0" +sphinxcontrib-mermaid = { version = ">=0.9.2", optional = true } + [tool.poetry.dev-dependencies] pytest = ">=7.1.1" -Sphinx = ">=4.4.0" -sphinx-pdj-theme = ">=0.2.1" -sphinx-click = ">=3.1.0" -sphinxcontrib-mermaid = ">=0.9.2" myst-parser = "*" jupyter = ">=1.0.0" lxml = ">=4.9.1" +#mariadb = { version = "^1.3", optional = true } + [tool.poetry.group.llm.dependencies] llm = ">=0.12" @@ -82,7 +85,7 @@ extract-schema = "schema_automator.utils.schema_extractor:cli" [tool.poetry.extras] docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"] -mariadb = ["mariadb"] +#mariadb = ["mariadb"] [tool.codespell] # Ref: https://github.com/codespell-project/codespell#using-a-config-file diff --git a/schema_automator/importers/shacl_import_engine.py b/schema_automator/importers/shacl_import_engine.py index 39a1b98..352c9fa 100644 --- a/schema_automator/importers/shacl_import_engine.py +++ b/schema_automator/importers/shacl_import_engine.py @@ -1,13 +1,25 @@ +from collections import defaultdict import logging +from dataclasses import dataclass +from typing import Dict, List, Any + +from rdflib import Graph, RDF, OWL, URIRef, RDFS, SKOS, SDO, Namespace + +from funowl import Literal + from linkml.utils.schema_builder import SchemaBuilder from linkml_runtime import SchemaView +from linkml_runtime.utils.formatutils import underscore +from linkml_runtime.utils.introspection import package_schemaview from linkml_runtime.linkml_model import ( SchemaDefinition, SlotDefinition, ClassDefinition, ) +from schema_automator.importers.import_engine import ImportEngine +logger = logging.getLogger(__name__) HTTP_SDO = Namespace("http://schema.org/") @@ -80,7 +92,7 @@ def convert( **kwargs, ) -> SchemaDefinition: """ - Converts an OWL schema-style ontology + Converts an shacl shapes file :param file: :param name: @@ -110,6 +122,7 @@ def convert( if default_prefix not in schema.prefixes: sb.add_prefix(default_prefix, model_uri, replace_if_present=True) schema.id = schema.prefixes[default_prefix].prefix_reference + cls_slots = defaultdict(list) props = [] for rdfs_property_metaclass in self._rdfs_metamodel_iri( @@ -137,6 +150,7 @@ def convert( slot = SlotDefinition(sn, **init_dict) slot.slot_uri = str(p.n3(g.namespace_manager)) sb.add_slot(slot) + rdfs_classes = [] for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__): for s in g.subjects(RDF.type, rdfs_class_metaclass): @@ -201,7 +215,7 @@ def _element_from_iri(self, iri: URIRef) -> str: r = self.reverse_metamodel_mappings.get(iri, []) if len(r) > 0: if len(r) > 1: - logging.debug(f"Multiple mappings for {iri}: {r}") + logger.debug(f"Multiple mappings for {iri}: {r}") return r[0] def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any: diff --git a/tests/__init__.py b/tests/__init__.py index b092ca8..ad4619d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,9 +1,6 @@ import os -import pprint ROOT = os.path.abspath(os.path.dirname(__file__)) INPUT_DIR = os.path.join(ROOT, 'resources') OUTPUT_DIR = os.path.join(ROOT, 'outputs') MODEL_DIR = os.path.join(ROOT, 'test_models') - - diff --git a/tests/resources/shacl_simple.ttl b/tests/resources/shacl_simple.ttl new file mode 100644 index 0000000..9c3b08b --- /dev/null +++ b/tests/resources/shacl_simple.ttl @@ -0,0 +1,34 @@ +# example from http://book.validatingrdf.com/bookHtml011.html#ch050SHACLExample + +@prefix schema: . +@prefix sh: . +@prefix xsd: . +@prefix ex: . + +ex:UserShape a sh:NodeShape; + sh:targetClass ex:User ; + sh:property [ # Blank node 1 + sh:path schema:name ; + sh:minCount 1; + sh:maxCount 1; + sh:datatype xsd:string ; + ] ; + sh:property [ # Blank node 2 + sh:path schema:gender ; + sh:minCount 1; + sh:maxCount 1; + sh:or ( + [ sh:in (schema:Male schema:Female) ] + [ sh:datatype xsd:string] + ) + ] ; + sh:property [ # Blank node 3 + sh:path schema:birthDate ; + sh:maxCount 1; + sh:datatype xsd:date ; + ] ; + sh:property [ # Blank node 4 + sh:path schema:knows ; + sh:nodeKind sh:IRI ; + sh:class ex:User ; + ] . diff --git a/tests/test_importers/test_shacl_importer.py b/tests/test_importers/test_shacl_importer.py index 6970514..556c58f 100644 --- a/tests/test_importers/test_shacl_importer.py +++ b/tests/test_importers/test_shacl_importer.py @@ -11,18 +11,17 @@ # TODO - Write tests (this is a copy of test_rdfs_importer) -REPRO = os.path.join(INPUT_DIR, 'reproschema.ttl') -OUTSCHEMA = os.path.join(OUTPUT_DIR, 'reproschema-from-ttl.yaml') - +REPRO = os.path.join(INPUT_DIR, 'shacl_simple.ttl') +OUTSCHEMA = os.path.join(OUTPUT_DIR, 'user_from_shacl_simple2.yaml') def test_from_shacl(): """Test Shacl conversion.""" - oie = ShaclImportEngine() + sie = ShaclImportEngine() - return - schema = oie.convert(REPRO, default_prefix='reproschema', identifier='id') + schema = sie.convert(REPRO, default_prefix='usr', identifier='id') write_schema(schema, OUTSCHEMA) + return # roundtrip s = YAMLGenerator(OUTSCHEMA).serialize() print(s[0:100]) @@ -35,7 +34,3 @@ def test_from_shacl(): assert len(slots) == 1 slot = slots[0] assert slot.name == "id" - - - -