Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for saving/loading in various RDF formats #825

Merged
merged 5 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ myst-parser>=1.0.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
wordcloud==1.9.3
rdflib>=6
2 changes: 2 additions & 0 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class HedExceptions:

SCHEMA_DUPLICATE_NAMES = "SCHEMA_DUPLICATE_NAMES"

CANNOT_PARSE_RDF = "CANNOT_PARSE_RDF"


class HedFileError(Exception):
"""Exception raised when a file cannot be parsed due to being malformed, file IO, etc."""
Expand Down
84 changes: 71 additions & 13 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import json
import os

from hed.schema.hed_schema_constants import HedKey, HedSectionKey
from hed.schema import hed_schema_constants as constants
from hed.schema.schema_io import schema_util
from hed.schema.schema_io.schema2xml import Schema2XML
from hed.schema.schema_io.schema2wiki import Schema2Wiki
from hed.schema.schema_io.schema2owl import Schema2Owl
from hed.schema.schema_io.owl_constants import ext_to_format
from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection
from hed.errors import ErrorHandler
from hed.errors.error_types import ValidationErrors
Expand Down Expand Up @@ -208,6 +211,11 @@ def valid_prefixes(self):
# ===============================================
# Creation and saving functions
# ===============================================

# todo: we may want to collapse these 6 functions into one like this
# def serialize(self, filename=None, save_merged=False, file_format=whatever is default):
# pass

def get_as_mediawiki_string(self, save_merged=False):
""" Return the schema to a mediawiki string.

Expand All @@ -222,6 +230,26 @@ def get_as_mediawiki_string(self, save_merged=False):
output_strings = Schema2Wiki.process_schema(self, save_merged)
return '\n'.join(output_strings)

def get_as_owl_string(self, save_merged=False, file_format="owl"):
""" Return the schema to a mediawiki string.

Parameters:
save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Override format from filename extension.
Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld")
Other values should work, but aren't as fully supported.
Returns:
str: The schema as a string in mediawiki format.

:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
return rdf_data.serialize(format=file_format)

def get_as_xml_string(self, save_merged=True):
""" Return the schema to an XML string.

Expand All @@ -234,39 +262,69 @@ def get_as_xml_string(self, save_merged=True):

"""
xml_tree = Schema2XML.process_schema(self, save_merged)
return schema_util._xml_element_2_str(xml_tree)
return schema_util.xml_element_2_str(xml_tree)

def save_as_mediawiki(self, filename=None, save_merged=False):
def save_as_mediawiki(self, filename, save_merged=False):
""" Save as mediawiki to a file.

filename: str
If present, move the resulting file to this location.
save location
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

Returns:
str: The newly created schema filename.
:raises OSError:
- File cannot be saved for some reason
"""
output_strings = Schema2Wiki.process_schema(self, save_merged)
local_wiki_file = schema_util.write_strings_to_file(output_strings, ".mediawiki")
return schema_util.move_file(local_wiki_file, filename)
with open(filename, mode='w', encoding='utf-8') as opened_file:
for string in output_strings:
opened_file.write(string)
opened_file.write('\n')

def save_as_xml(self, filename=None, save_merged=True):
def save_as_owl(self, filename, save_merged=False, file_format=None):
""" Save as json to a file.

filename: str
Save the file here
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld

:raises OSError:
- File cannot be saved for some reason

:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
ext = os.path.splitext(filename.lower())[1]
if ext in ext_to_format and file_format is None:
file_format = ext_to_format[ext]
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
rdf_data.serialize(filename, format=file_format)

def save_as_xml(self, filename, save_merged=True):
""" Save as XML to a file.

filename: str
If present, move the resulting file to this location.
save location
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

Returns:
str: The name of the newly created schema file.
:raises OSError:
- File cannot be saved for some reason
"""
xml_tree = Schema2XML.process_schema(self, save_merged)
local_xml_file = schema_util.write_xml_tree_2_xml_file(xml_tree, ".xml")
return schema_util.move_file(local_xml_file, filename)
with open(filename, mode='w', encoding='utf-8') as opened_file:
xml_string = schema_util.xml_element_2_str(xml_tree)
opened_file.write(xml_string)

def set_schema_prefix(self, schema_namespace):
""" Set library namespace associated for this schema.
Expand Down
21 changes: 12 additions & 9 deletions hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,7 @@ class HedKey:
Rooted = "rooted"
DeprecatedFrom = "deprecatedFrom"
ConversionFactor = "conversionFactor"

# All known properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
IsInheritedProperty = 'isInheritedProperty'
Reserved = "reserved"

SIUnit = 'SIUnit'
UnitSymbol = 'unitSymbol'
Expand All @@ -68,6 +60,17 @@ class HedKey:
# Node attributes
InLibrary = "inLibrary"

# All known properties
BoolProperty = 'boolProperty'
UnitClassProperty = 'unitClassProperty'
UnitProperty = 'unitProperty'
UnitModifierProperty = 'unitModifierProperty'
ValueClassProperty = 'valueClassProperty'
ElementProperty = 'elementProperty'
NodeProperty = 'nodeProperty'
IsInheritedProperty = 'isInheritedProperty'



VERSION_ATTRIBUTE = 'version'
LIBRARY_ATTRIBUTE = 'library'
Expand Down
10 changes: 9 additions & 1 deletion hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def __eq__(self, other):
# We only want to compare known attributes
self_attr = self.get_known_attributes()
other_attr = other.get_known_attributes()
if self_attr != other_attr:
# We can no longer be sure on the order of attribute values, since owl formatting has no order
if self_attr != other_attr and not self._compare_attributes_no_order(self_attr, other_attr):
return False
if self.description != other.description:
return False
Expand All @@ -135,6 +136,13 @@ def get_known_attributes(self):
return {key: value for key, value in self.attributes.items()
if not self._unknown_attributes or key not in self._unknown_attributes}

@staticmethod
def _compare_attributes_no_order(left, right):
left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}

return left == right


class UnitClassEntry(HedSchemaEntry):
""" A single unit class entry in the HedSchema. """
Expand Down
22 changes: 18 additions & 4 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import functools
from hed.schema.schema_io.xml2schema import SchemaLoaderXML
from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki
from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
from hed.schema import hed_cache

from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string
from collections import defaultdict
from hed.schema.schema_io.owl_constants import ext_to_format


MAX_MEMORY_CACHE = 40
Expand All @@ -20,8 +22,10 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
""" Create a schema from the given string.

Parameters:
schema_string (str): An XML or mediawiki file as a single long string.
schema_string (str): An XML, mediawiki or OWL, file as a single long string
schema_format (str): The schema format of the source schema string.
Allowed normal values: .mediawiki, .xml
Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports)
schema_namespace (str, None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Expand All @@ -45,6 +49,8 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema)
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema)
elif schema_format:
hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)

Expand All @@ -54,14 +60,18 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
return hed_schema


def load_schema(hed_path=None, schema_namespace=None, schema=None):
def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None):
""" Load a schema from the given file or URL path.

Parameters:
hed_path (str or None): A filepath or url to open a schema from.
hed_path (str): A filepath or url to open a schema from.
schema_namespace (str or None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld

Returns:
HedSchema: The loaded schema.
Expand All @@ -76,11 +86,15 @@ def load_schema(hed_path=None, schema_namespace=None, schema=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file",
filename=hed_path)

ext = os.path.splitext(hed_path.lower())[1]
is_url = hed_cache._check_if_url(hed_path)

if is_url:
file_as_string = schema_util.url_to_string(hed_path)
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
elif ext in ext_to_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext])
elif file_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format)
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path, schema=schema)
elif hed_path.lower().endswith(".mediawiki"):
Expand Down
1 change: 1 addition & 0 deletions hed/schema/hed_schema_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _finalize_section(self, hed_schema):

class HedSchemaUnitClassSection(HedSchemaSection):
def _check_if_duplicate(self, name_key, new_entry):
"""Allow adding units to existing unit classes, using a placeholder one with no attributes."""
if name_key in self and len(new_entry.attributes) == 1\
and HedKey.InLibrary in new_entry.attributes:
return self.all_names[name_key]
Expand Down
10 changes: 7 additions & 3 deletions hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@ class SchemaLoader(ABC):

SchemaLoaderXML(filename) will load just the header_attributes
"""
def __init__(self, filename, schema_as_string=None, schema=None):
def __init__(self, filename, schema_as_string=None, schema=None, file_format=None):
"""Loads the given schema from one of the two parameters.

Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): The format of this file if needed(only for owl currently)
"""
if schema_as_string and filename:
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.",
filename)
self.file_format = file_format
self.filename = filename
self.schema_as_string = schema_as_string
self.appending_to_schema = False
Expand Down Expand Up @@ -68,19 +70,21 @@ def schema(self):
return self._schema

@classmethod
def load(cls, filename=None, schema_as_string=None, schema=None):
def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None):
""" Loads and returns the schema, including partnered schema if applicable.

Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): If this is an owl file being loaded, this is the format.
Allowed values include: turtle, json-ld, and owl(xml)

Returns:
schema(HedSchema): The new schema
"""
loader = cls(filename, schema_as_string, schema)
loader = cls(filename, schema_as_string, schema, file_format)
return loader._load()

def _load(self):
Expand Down
Loading
Loading