Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor cleanup/documentation cleanup of schema and models #887

Merged
merged 4 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
The actual formatted error messages for each type.
"""Format templates for HED schema error messages.

Add new errors here, or any other file imported after error_reporter.py.
"""
Expand Down
26 changes: 8 additions & 18 deletions hed/errors/error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from functools import wraps
import xml.etree.ElementTree as ET
import copy

from hed.errors.error_types import ErrorContext, ErrorSeverity
from hed.errors.known_error_codes import known_error_codes

Expand Down Expand Up @@ -175,6 +175,7 @@ def wrapper(tag, *args, severity=default_severity, **kwargs):


class ErrorHandler:
"""Class to hold error context and having general error functions."""
def __init__(self, check_for_warnings=True):
# The current (ordered) dictionary of contexts.
self.error_context = []
Expand Down Expand Up @@ -217,9 +218,6 @@ def reset_error_context(self):
"""
self.error_context = []

def get_error_context_copy(self):
return copy.copy(self.error_context)

def format_error_with_context(self, *args, **kwargs):
error_object = ErrorHandler.format_error(*args, **kwargs)
if self is not None:
Expand Down Expand Up @@ -253,9 +251,9 @@ def format_error(error_type, *args, actual_error=None, **kwargs):
if not error_func:
error_object = ErrorHandler.val_error_unknown(*args, **kwargs)
error_object['code'] = error_type
return [error_object]
else:
error_object = error_func(*args, **kwargs)

error_object = error_func(*args, **kwargs)
if actual_error:
error_object['code'] = actual_error

Expand Down Expand Up @@ -294,19 +292,11 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non
- This can't filter out warnings like the other ones.

"""
error_func = error_functions.get(error_type)
if not error_func:
error_object = ErrorHandler.val_error_unknown(*args, **kwargs)
error_object['code'] = error_type
else:
error_object = error_func(*args, **kwargs)
error_list = ErrorHandler.format_error(error_type, *args, actual_error=actual_error, **kwargs)

if actual_error:
error_object['code'] = actual_error

ErrorHandler._add_context_to_errors(error_object, error_context)
ErrorHandler._update_error_with_char_pos(error_object)
return [error_object]
ErrorHandler._add_context_to_errors(error_list[0], error_context)
ErrorHandler._update_error_with_char_pos(error_list[0])
return error_list

@staticmethod
def _add_context_to_errors(error_object, error_context_to_add):
Expand Down
1 change: 1 addition & 0 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


class ErrorSeverity:
"""Severity codes for errors"""
ERROR = 1
WARNING = 10

Expand Down
2 changes: 1 addition & 1 deletion hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Format templates for HED error messages. """
""" Format templates for HED schema error messages. """
from hed.errors.error_types import SchemaErrors, SchemaWarnings, ErrorSeverity, SchemaAttributeErrors
from hed.errors.error_reporter import hed_error

Expand Down
5 changes: 0 additions & 5 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ class BaseInput:

TEXT_EXTENSION = ['.tsv', '.txt']
EXCEL_EXTENSION = ['.xlsx']
FILE_EXTENSION = [*TEXT_EXTENSION, *EXCEL_EXTENSION]
STRING_INPUT = 'string'
FILE_INPUT = 'file'
TAB_DELIMITER = '\t'
COMMA_DELIMITER = ','

def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None,
allow_blank_names=True):
Expand Down
4 changes: 2 additions & 2 deletions hed/models/definition_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _strip_value_placeholder(self, def_tag_name):

def _validate_name_and_context(self, def_tag_name, error_handler):
if error_handler:
context = error_handler.get_error_context_copy()
context = error_handler.error_context
else:
context = []
new_def_issues = []
Expand Down Expand Up @@ -298,7 +298,7 @@ def get_as_strings(def_dict):
def_dict(DefinitionDict or dict): A dict of definitions

Returns:
dict(str: str): definition name and contents
dict(str): definition name and contents
"""
if isinstance(def_dict, DefinitionDict):
def_dict = def_dict.defs
Expand Down
2 changes: 2 additions & 0 deletions hed/models/definition_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def __init__(self, name, contents, takes_value, source_context):
if contents:
contents = contents.copy()
contents.sort()
if contents:
contents = contents.copy()
self.contents = contents
self.takes_value = takes_value
self.source_context = source_context
Expand Down
5 changes: 5 additions & 0 deletions hed/models/hed_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,11 @@ def _get_tag_units_portion(extension_text, tag_unit_classes):
return None, None, None

def is_placeholder(self):
"""Returns if this tag has a placeholder in it.

Returns:
has_placeholder(bool): True if it has a placeholder
"""
if "#" in self.org_tag or "#" in self._extension_value:
return True
return False
Expand Down
2 changes: 1 addition & 1 deletion hed/models/query_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Classes representing HED search results. """
""" Classes representing HED search results and tokens. """


class SearchResult:
Expand Down
2 changes: 1 addition & 1 deletion hed/models/string_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def split_base_tags(hed_string, base_tags, remove_group=False):


def split_def_tags(hed_string, def_names, remove_group=False):
""" Split a HedString object into two separate HedString objects based on the presence of wildcard tags.
""" Split a HedString object into two separate HedString objects based on the presence of def tags

This does NOT handle def-expand tags currently.

Expand Down
94 changes: 47 additions & 47 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from hed.schema.schema_io import schema_util
from hed.schema.schema_io.schema2xml import Schema2XML
from hed.schema.schema_io.schema2wiki import Schema2Wiki
from hed.schema.schema_io.schema2owl import Schema2Owl
from hed.schema.schema_io.owl_constants import ext_to_format
# from hed.schema.schema_io.schema2owl import Schema2Owl
# from hed.schema.schema_io.owl_constants import ext_to_format
from hed.schema.hed_schema_section import (HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection,
HedSchemaUnitSection)
from hed.errors import ErrorHandler
Expand Down Expand Up @@ -246,25 +246,25 @@ def get_as_mediawiki_string(self, save_merged=False):
output_strings = Schema2Wiki.process_schema(self, save_merged)
return '\n'.join(output_strings)

def get_as_owl_string(self, save_merged=False, file_format="owl"):
""" Return the schema to a mediawiki string.

Parameters:
save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Override format from filename extension.
Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
Other values should work, but aren't as fully supported.
Returns:
str: The schema as a string in mediawiki format.

:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
return rdf_data.serialize(format=file_format)
# def get_as_owl_string(self, save_merged=False, file_format="owl"):
# """ Return the schema to a mediawiki string.
#
# Parameters:
# save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema.
# If it is not a "withStandard" schema, this setting has no effect.
# file_format(str or None): Override format from filename extension.
# Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld").
# Other values should work, but aren't as fully supported.
# Returns:
# str: The schema as a string in mediawiki format.
#
# :raises rdflib.plugin.PluginException:
# - Invalid format of file_format. Make sure you use a supported RDF format.
# """
# if file_format == "owl":
# file_format = "xml"
# rdf_data = Schema2Owl.process_schema(self, save_merged)
# return rdf_data.serialize(format=file_format)

def get_as_xml_string(self, save_merged=True):
""" Return the schema to an XML string.
Expand Down Expand Up @@ -298,32 +298,32 @@ def save_as_mediawiki(self, filename, save_merged=False):
opened_file.write(string)
opened_file.write('\n')

def save_as_owl(self, filename, save_merged=False, file_format=None):
""" Save as json to a file.

filename: str
Save the file here
save_merged: bool
If True, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld

:raises OSError:
- File cannot be saved for some reason

:raises rdflib.plugin.PluginException:
- Invalid format of file_format. Make sure you use a supported RDF format.
"""
ext = os.path.splitext(filename.lower())[1]
if ext in ext_to_format and file_format is None:
file_format = ext_to_format[ext]
if file_format == "owl":
file_format = "xml"
rdf_data = Schema2Owl.process_schema(self, save_merged)
rdf_data.serialize(filename, format=file_format)
# def save_as_owl(self, filename, save_merged=False, file_format=None):
# """ Save as json to a file.
#
# filename: str
# Save the file here
# save_merged: bool
# If True, this will save the schema as a merged schema if it is a "withStandard" schema.
# If it is not a "withStandard" schema, this setting has no effect.
# file_format(str or None): Required for owl formatted files other than the following:
# .ttl: turtle
# .owl: xml
# .json-ld: json-ld
#
# :raises OSError:
# - File cannot be saved for some reason
#
# :raises rdflib.plugin.PluginException:
# - Invalid format of file_format. Make sure you use a supported RDF format.
# """
# ext = os.path.splitext(filename.lower())[1]
# if ext in ext_to_format and file_format is None:
# file_format = ext_to_format[ext]
# if file_format == "owl":
# file_format = "xml"
# rdf_data = Schema2Owl.process_schema(self, save_merged)
# rdf_data.serialize(filename, format=file_format)

def save_as_xml(self, filename, save_merged=True):
""" Save as XML to a file.
Expand Down
2 changes: 1 addition & 1 deletion hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class HedSectionKey(Enum):
""" Kegs designating specific sections in a HedSchema object.
""" Keys designating specific sections in a HedSchema object.
"""
# overarching category listing all tags
Tags = 'tags'
Expand Down
4 changes: 0 additions & 4 deletions hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,6 @@ def __hash__(self):
def __str__(self):
return self.name

def get_known_attributes(self):
return {key: value for key, value in self.attributes.items()
if not self._unknown_attributes or key not in self._unknown_attributes}

@staticmethod
def _compare_attributes_no_order(left, right):
if left != right:
Expand Down
25 changes: 10 additions & 15 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

from hed.schema.schema_io.xml2schema import SchemaLoaderXML
from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki
from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
# from hed.schema.schema_io.owl2schema import SchemaLoaderOWL
from hed.schema import hed_cache

from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string
from collections import defaultdict
from hed.schema.schema_io.owl_constants import ext_to_format
# from hed.schema.schema_io.owl_constants import ext_to_format
from urllib.error import URLError

MAX_MEMORY_CACHE = 40
Expand All @@ -26,7 +26,6 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
schema_string (str): An XML, mediawiki or OWL, file as a single long string
schema_format (str): The schema format of the source schema string.
Allowed normal values: .mediawiki, .xml
Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports)
schema_namespace (str, None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
Expand Down Expand Up @@ -54,9 +53,9 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name)
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name)
elif schema_format:
hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format,
name=name)
# elif schema_format:
# hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format,
# name=name)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name)

Expand All @@ -65,18 +64,14 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche
return hed_schema


def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, name=None):
def load_schema(hed_path, schema_namespace=None, schema=None, name=None):
""" Load a schema from the given file or URL path.

Parameters:
hed_path (str): A filepath or url to open a schema from.
schema_namespace (str or None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
file_format(str or None): Required for owl formatted files other than the following:
.ttl: turtle
.owl: xml
.json-ld: json-ld
name(str or None): User supplied identifier for this schema

Returns:
Expand All @@ -100,10 +95,10 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None,
except URLError as e:
raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name)
elif ext in ext_to_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name)
elif file_format:
hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name)
# elif ext in ext_to_format:
# hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name)
# elif file_format:
# hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name)
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name)
elif hed_path.lower().endswith(".mediawiki"):
Expand Down
4 changes: 3 additions & 1 deletion hed/schema/hed_schema_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def _finalize_section(self, hed_schema):


class HedSchemaUnitSection(HedSchemaSection):
"""The schema section containing units."""
def _check_if_duplicate(self, name_key, new_entry):
"""We need to mark duplicate units(units with unitSymbol are case sensitive, while others are not."""
if not new_entry.has_attribute(HedKey.UnitSymbol):
Expand All @@ -157,6 +158,7 @@ def _check_if_duplicate(self, name_key, new_entry):


class HedSchemaUnitClassSection(HedSchemaSection):
"""The schema section containing unit classes."""
def _check_if_duplicate(self, name_key, new_entry):
"""Allow adding units to existing unit classes, using a placeholder one with no attributes."""
if name_key in self and len(new_entry.attributes) == 1 \
Expand All @@ -166,7 +168,7 @@ def _check_if_duplicate(self, name_key, new_entry):


class HedSchemaTagSection(HedSchemaSection):
""" A section of the schema. """
"""The schema section containing all tags."""

def __init__(self, *args, case_sensitive=False, **kwargs):
super().__init__(*args, **kwargs, case_sensitive=case_sensitive)
Expand Down
Loading
Loading