From 777ca680bcfcac2a0ca80ed7cc1280a4001738e9 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 24 Oct 2023 19:24:26 -0500 Subject: [PATCH] Improve coverage/fix minor bugs in sidecar validator --- hed/schema/schema_attribute_validators.py | 2 +- hed/validator/sidecar_validator.py | 68 +++++++++++++---------- tests/validator/test_sidecar_validator.py | 36 ++++++++++++ 3 files changed, 75 insertions(+), 31 deletions(-) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 0ccb9c33..f53157bb 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -209,7 +209,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name): library = tag_entry.attributes.get(attribute_name, "") if hed_schema.library != library: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, tag_entry.name, library) return issues diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 2a6f2209..1db14a23 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -180,20 +180,30 @@ def _find_non_matching_braces(hed_string): @staticmethod def _check_for_key(key, data): + # Probably can be cleaned up more -> Return True if any data or subdata is key if isinstance(data, dict): - if key in data: - return bool(data[key]) - else: - for sub_data in data.values(): - result = SidecarValidator._check_for_key(key, sub_data) - if result is not None: - return result + return SidecarValidator._check_dict(key, data) elif isinstance(data, list): - for sub_data in data: - result = SidecarValidator._check_for_key(key, sub_data) - if result is not None: - return result - return None + return SidecarValidator._check_list(key, data) + return False + + @staticmethod + def _check_dict(key, data_dict): + if key in data_dict: + return True + for sub_data in data_dict.values(): + if SidecarValidator._check_for_key(key, sub_data): + return True + return False + + @staticmethod + def _check_list(key, data_list): + for sub_data in data_list: + if sub_data == key: + return True + if SidecarValidator._check_for_key(key, sub_data): + return True + return False def _validate_column_structure(self, column_name, dict_for_entry, error_handler): """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar. @@ -219,27 +229,25 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler) if found_hed: val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED) elif column_type == ColumnType.Categorical: - raw_hed_dict = dict_for_entry["HED"] - if not raw_hed_dict: - val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if not isinstance(raw_hed_dict, dict): - val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, - given_type=type(raw_hed_dict), - expected_type="dict") - for key_name, hed_string in raw_hed_dict.items(): - error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) - if not isinstance(hed_string, str): - val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE, - given_type=type(hed_string), - expected_type="str") - if not hed_string: - val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) - if key_name in self.reserved_category_values: - val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) - error_handler.pop_error_context() + val_issues += self._validate_categorical_column(column_name, dict_for_entry, error_handler) return val_issues + def _validate_categorical_column(self, column_name, dict_for_entry, error_handler): + """Validates a categorical column in a json sidecar.""" + val_issues = [] + raw_hed_dict = dict_for_entry["HED"] + if not raw_hed_dict: + val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) + for key_name, hed_string in raw_hed_dict.items(): + error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) + if not hed_string: + val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING) + if key_name in self.reserved_category_values: + val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name) + error_handler.pop_error_context() + return val_issues + def _validate_pound_sign_count(self, hed_string, column_type): """ Check if a given hed string in the column has the correct number of pound signs. diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py index 84ae8a2f..2c13897d 100644 --- a/tests/validator/test_sidecar_validator.py +++ b/tests/validator/test_sidecar_validator.py @@ -64,3 +64,39 @@ def test_malformed_braces(self): self.assertEqual(len(issues), error_count) + + def test_bad_structure_na(self): + sidecar_with_na_json = ''' +{ + "column3": { + "HED": { + "cat1": "Event", + "n/a": "Description/invalid category name" + } + } +} +''' + sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 1) + + def test_bad_structure_HED_in_ignored(self): + sidecar_with_na_json = ''' + { + "column3": { + "other": { + "HED": "Event", + "n/a": "Description/invalid category name" + } + }, + "HED": { + + }, + "OtherBad": { + "subbad": ["thing1", "HED", "Other"] + } + } + ''' + sidecar = Sidecar(io.StringIO(sidecar_with_na_json)) + issues = sidecar.validate(self.hed_schema) + self.assertEqual(len(issues), 3) \ No newline at end of file