Skip to content

Commit

Permalink
Merge pull request #670 from IanCa/develop
Browse files Browse the repository at this point in the history
Rewrite column mapper, add more unit tests for it
  • Loading branch information
VisLab committed May 30, 2023
2 parents 813d3de + ef53a87 commit 4f85345
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 266 deletions.
36 changes: 25 additions & 11 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def val_error_no_value(tag):


@hed_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN, default_severity=ErrorSeverity.WARNING)
def val_error_missing_column(column_name):
return f"Required column '{column_name}' not specified or found in file."
def val_error_missing_column(column_name, column_type):
return f"Required {column_type} column '{column_name}' not specified or found in file."


@hed_error(ValidationErrors.HED_UNKNOWN_COLUMN, default_severity=ErrorSeverity.WARNING)
Expand All @@ -117,19 +117,33 @@ def val_error_extra_column(column_name):
"or identified in sidecars."


@hed_error(ValidationErrors.HED_BLANK_COLUMN, default_severity=ErrorSeverity.WARNING)
def val_error_hed_blank_column(column_number):
return f"Column number {column_number} has no column name"
@hed_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS)
def val_error_sidecar_with_column(column_names):
return f"You cannot use a sidecar and tag or prefix columns at the same time. " \
f"Found {column_names}."


@hed_error(ValidationErrors.DUPLICATE_COLUMN_IN_LIST)
def val_error_duplicate_clumn(column_number, column_name, list_name):
if column_name:
return f"Found column '{column_name}' at index {column_number} twice in {list_name}."
else:
return f"Found column number {column_number} twice in {list_name}. This isn't a major concern, but does indicate a mistake."

@hed_error(ValidationErrors.HED_DUPLICATE_COLUMN, default_severity=ErrorSeverity.WARNING)
def val_error_hed_duplicate_column(column_name):
return f"Multiple columns have name {column_name}. This is not a fatal error, but discouraged."

@hed_error(ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES)
def val_error_duplicate_clumn(column_number, column_name, list_names):
if column_name:
return f"Found column '{column_name}' at index {column_number} in the following inputs: {list_names}. " \
f"Each entry must be unique."
else:
return f"Found column number {column_number} in the following inputs: {list_names}. " \
f"Each entry must be unique."

@hed_error(ValidationErrors.DUPLICATE_NAME_NUMBER_COLUMN, default_severity=ErrorSeverity.WARNING)
def val_error_hed_duplicate_column_number(column_name, column_number):
return f"Column '{column_name}' added as a named column, then also as numbered column {column_number}"

@hed_error(ValidationErrors.HED_BLANK_COLUMN, default_severity=ErrorSeverity.WARNING)
def val_error_hed_blank_column(column_number):
return f"Column number {column_number} has no column name"


@hed_tag_error(ValidationErrors.HED_LIBRARY_UNMATCHED, actual_code=ValidationErrors.TAG_PREFIX_INVALID)
Expand Down
6 changes: 4 additions & 2 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ class ValidationErrors:

HED_MISSING_REQUIRED_COLUMN = "HED_MISSING_REQUIRED_COLUMN"
HED_UNKNOWN_COLUMN = "HED_UNKNOWN_COLUMN"
HED_DUPLICATE_COLUMN = "HED_DUPLICATE_COLUMN"
DUPLICATE_NAME_NUMBER_COLUMN = "DUPLICATE_NAME_NUMBER_COLUMN"
SIDECAR_AND_OTHER_COLUMNS = "SIDECAR_AND_OTHER_COLUMNS"

DUPLICATE_COLUMN_IN_LIST = "DUPLICATE_COLUMN_IN_LIST"
DUPLICATE_COLUMN_BETWEEN_SOURCES = "DUPLICATE_COLUMN_BETWEEN_SOURCES"
HED_BLANK_COLUMN = "HED_BLANK_COLUMN"

# Below here shows what the given error maps to
Expand Down
2 changes: 1 addition & 1 deletion hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)

# todo: Can we get rid of this behavior now that we're using pandas?
column_issues = ColumnMapper.validate_column_map(self.columns, allow_blank_names=allow_blank_names)
column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names)
if column_issues:
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
self.name, issues=column_issues)
Expand Down
Loading

0 comments on commit 4f85345

Please sign in to comment.