From 696e7fe109241e7802da3785a341abe17e8b6290 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 18 Aug 2022 13:06:07 -0400 Subject: [PATCH 001/124] Define ValidationResult class --- dandi/validate.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index 57d8dd285..41ceb0cca 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum from pathlib import Path from typing import Iterator, List, Optional, Tuple, Union @@ -5,7 +9,34 @@ from .files import find_dandi_files -# TODO: provide our own "errors" records, which would also include warnings etc + +@dataclass +class ValidationResult: + origin: ValidationOrigin + severity: Severity + id: str + scope: Scope + path: Path + message: str + dataset_path: Optional[Path] + asset_paths: Optional[list[str]] = None + + +@dataclass +class ValidationOrigin: + name: str + version: str + + +class Severity(Enum): + ERROR = "ERROR" + WARNING = "WARNING" + HINT = "HINT" + + +class Scope(Enum): + FILE = "file" + DANDISET = "dandiset" def validate_bids( From 037dc5eaebf8c2158338f9d96307c7c1eb96f0d9 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 26 Aug 2022 16:52:19 -0400 Subject: [PATCH 002/124] RF: Initial steps to start RFing validate-bids to use ValidationResult records --- dandi/bids_utils.py | 46 ++++++--------------------------------- dandi/cli/cmd_validate.py | 14 +++++++----- dandi/files/bids.py | 1 + dandi/validate.py | 43 ++++++++++++++++++++++++++++++++++-- setup.cfg | 2 +- 5 files changed, 59 insertions(+), 47 deletions(-) diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py index 489599df0..cf09f6ebf 100644 --- a/dandi/bids_utils.py +++ b/dandi/bids_utils.py @@ -1,46 +1,14 @@ from .utils import pluralize +from .validate import ValidationResult -def is_valid( - validation_result: dict, - allow_invalid_filenames: bool = False, - allow_missing_files: bool = False, -) -> bool: - """Determine whether a dataset validation result marks it as valid. - - Parameters - ---------- - validation_result: dict - Dictionary as returned by `dandi.support.bids.validator.validate_bids()`. - allow_missing_files: bool, optional - Whether to consider the dataset invalid if any mandatory files are not present. - allow_invalid_filenames: bool, optional - Whether to consider the dataset invalid if any filenames inside are invalid. - - Returns - ------- - bool: whether the dataset validation result marks it as valid. - - """ - - if allow_invalid_filenames and allow_missing_files: - return True - missing_files = [ - i["regex"] for i in validation_result["schema_tracking"] if i["mandatory"] - ] - invalid_filenames = validation_result["path_tracking"] - - if missing_files and not allow_missing_files: - return False - if invalid_filenames and not allow_invalid_filenames: - return False - else: - return True - - -def report_errors( - validation_result: dict, +def print_validation_results( + validation_result: list[ValidationResult], + # TODO: options for control + # - either report warnings, hints, ... + # - either report groupped by severity, record.id ) -> None: + raise NotImplementedError("TODO: RF to use ValidationResult records") import click missing_files = [ diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 52325a5ec..13776edb4 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -36,19 +36,23 @@ def validate_bids( dandi validate-bids /my/path """ - from ..bids_utils import is_valid, report_errors + from ..bids_utils import print_validation_results + from ..validate import Severity from ..validate import validate_bids as validate_bids_ - validator_result = validate_bids_( + validator_result = validate_bids_( # Controller *paths, report=report, report_path=report_path, schema_version=schema, ) - valid = is_valid(validator_result) - report_errors(validator_result) - if not valid: + if validator_result: + print_validation_results(validator_result) # View + + validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] + + if validation_errors: raise SystemExit(1) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 9207c98f3..e6996345a 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -66,6 +66,7 @@ def _validate(self) -> None: bids_paths = [str(self.filepath)] + [ str(asset.filepath) for asset in self.dataset_files ] + # TODO: use RFed data structures, avoid duplicating logic results = validate_bids(*bids_paths) self._dataset_errors: list[str] = [] if len(results["path_listing"]) == len(results["path_tracking"]): diff --git a/dandi/validate.py b/dandi/validate.py index 41ceb0cca..c6cb97928 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -44,7 +44,7 @@ def validate_bids( schema_version: Optional[str] = None, report: bool = False, report_path: str = "", -) -> dict: +) -> list[ValidationResult]: """Validate BIDS paths. Parameters @@ -67,6 +67,7 @@ def validate_bids( patterns. """ + import bidsschematools from bidsschematools.validator import validate_bids as validate_bids_ if report and not report_path: @@ -80,7 +81,45 @@ def validate_bids( schema_version=schema_version, report_path=report_path, ) - return dict(validation_result) + our_validation_result = [] + origin = ValidationOrigin( + name="bidsschematools", + version=bidsschematools.__version__, + ) + for path in validation_result["path_tracking"]: + our_validation_result.append( + ValidationResult( + origin=origin, + severity=Severity.ERROR, + id="BIDS.WRONG_PATH_TODO", + scope=Scope.FILE, + path=path, + message="TODO", + # TODO - discover dandiset or actually BIDS dataset + # might want separate the two + # dandiset_path="TODO", # might contain multiple datasets + # dataset_path="TODO", # BIDS dataset in this case + # asset_paths: Optional[list[str]] = None + ) + ) + + for pattern in validation_result["schema_tracking"]: + if pattern["mandatory"]: # TODO: future proof if gets renamed to required + our_validation_result.append( + ValidationResult( + origin=origin, + severity=Severity.ERROR, + id="BIDS.MANDATORY_FILE_MISSING", # we decided to generalize, and not have + scope=Scope.FILE, + message="TODO", + # TODO - discover dandiset or actually BIDS dataset + # might want separate the two + # dandiset_path="TODO", # might contain multiple datasets + # dataset_path="TODO", # BIDS dataset in this case + # asset_paths: Optional[list[str]] = None + ) + ) + return our_validation_result def validate( diff --git a/setup.cfg b/setup.cfg index 0de477174..270525efb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ project_urls = python_requires = >=3.7 install_requires = appdirs - bidsschematools + bidsschematools ~= 0.4.0 click click-didyoumean dandischema ~= 0.7.0 From 3bc01ba63b02889b6558489a1baa06736d880bf1 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 17:45:39 -0400 Subject: [PATCH 003/124] Improved class usage and added test suite --- dandi/tests/fixtures.py | 3 +++ dandi/tests/test_validate.py | 28 ++++++++++++++++++++++++++++ dandi/validate.py | 34 ++++++++++++++++++++++++++++------ 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 142b6fb91..eab945083 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -223,6 +223,9 @@ def fixture() -> Iterator[str]: nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data") bids_examples = get_gitrepo_fixture("https://github.com/dandi/bids-examples") +bids_error_examples = get_gitrepo_fixture( + "https://github.com/bids-standard/bids-error-examples" +) LOCAL_DOCKER_DIR = Path(__file__).with_name("data") / "dandiarchive-docker" LOCAL_DOCKER_ENV = LOCAL_DOCKER_DIR.name diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 6d854aec2..a639696b6 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -2,6 +2,7 @@ import os import appdirs +import pytest def test_validate_bids(bids_examples, tmp_path): @@ -33,3 +34,30 @@ def test_validate_bids(bids_examples, tmp_path): # Check if a report is being produced. assert len(glob(report_path)) == 1 + + +@pytest.mark.parametrize( + "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"] +) +def test_validate_bids_errors(bids_error_examples, dataset): + # This only checks that the error we found is correct, not that we found all errors. + # ideally make a list and erode etc. + import json + + from ..validate import validate_bids + + selected_dataset = os.path.join(bids_error_examples, dataset) + validation_result = validate_bids(selected_dataset, report=True) + with open(os.path.join(selected_dataset, ".ERRORS.json")) as f: + expected_errors = json.load(f) + for i in validation_result: + error_id = i.id + if i.path: + error_path = i.path + relative_error_path = os.path.relpath(error_path, i.dataset_path) + assert ( + relative_error_path + in expected_errors[error_id.lstrip("BIDS.")]["scope"] + ) + else: + assert i.id.lstrip("BIDS.") in expected_errors.keys() diff --git a/dandi/validate.py b/dandi/validate.py index c6cb97928..68e7e2dcd 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -16,9 +16,9 @@ class ValidationResult: severity: Severity id: str scope: Scope - path: Path message: str dataset_path: Optional[Path] + path: Optional[Path] = None asset_paths: Optional[list[str]] = None @@ -87,14 +87,22 @@ def validate_bids( version=bidsschematools.__version__, ) for path in validation_result["path_tracking"]: + # Hard-coding exclusion here pending feature + release in: + # https://github.com/bids-standard/bids-specification/issues/1272 + if path.endswith((".ERRORS", ".ERRORS.json")): + continue + dataset_path = _get_dataset_path(path, paths) our_validation_result.append( ValidationResult( + dataset_path=dataset_path, origin=origin, severity=Severity.ERROR, - id="BIDS.WRONG_PATH_TODO", + # For schema-integrated error code discussion, see: + # https://github.com/bids-standard/bids-specification/issues/1262 + id="BIDS.NON_BIDS_PATH_PLACEHOLDER", scope=Scope.FILE, path=path, - message="TODO", + message=f"The `{path}` file does not match any pattern known to BIDS.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two # dandiset_path="TODO", # might contain multiple datasets @@ -104,14 +112,21 @@ def validate_bids( ) for pattern in validation_result["schema_tracking"]: - if pattern["mandatory"]: # TODO: future proof if gets renamed to required + # Future proofing for standard-compliant name. + if ("mandatory" in pattern and pattern["mandatory"]) or ( + "required" in pattern and pattern["required"] + ): + dataset_path = _get_dataset_path(path, paths) our_validation_result.append( ValidationResult( + dataset_path=dataset_path, origin=origin, severity=Severity.ERROR, - id="BIDS.MANDATORY_FILE_MISSING", # we decided to generalize, and not have + # For schema-integrated error code discussion, see: + # https://github.com/bids-standard/bids-specification/issues/1262 + id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", scope=Scope.FILE, - message="TODO", + message=f'Missing file matching the following pattern: `{pattern["regex"]}`', # TODO - discover dandiset or actually BIDS dataset # might want separate the two # dandiset_path="TODO", # might contain multiple datasets @@ -122,6 +137,13 @@ def validate_bids( return our_validation_result +def _get_dataset_path(file_path, datasets): + # This logic might break with nested datasets. + for dataset in datasets: + if dataset in file_path: + return dataset + + def validate( *paths: str, schema_version: Optional[str] = None, From 43ab1f511074866878249f8e52717f0697cfe5a6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 20:22:08 -0400 Subject: [PATCH 004/124] Debugging --- dandi/files/bids.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index e6996345a..f399a8f38 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -66,11 +66,13 @@ def _validate(self) -> None: bids_paths = [str(self.filepath)] + [ str(asset.filepath) for asset in self.dataset_files ] - # TODO: use RFed data structures, avoid duplicating logic + # TODO gh-943: use RFed data structures, avoid duplicating logic results = validate_bids(*bids_paths) self._dataset_errors: list[str] = [] - if len(results["path_listing"]) == len(results["path_tracking"]): - self._dataset_errors.append("No valid BIDS files were found") + # TODO gh-943: Should we add valid files to Validation result? + # This specific check seems difficult to implement. + # if len(results["path_listing"]) == len(results["path_tracking"]): + # self._dataset_errors.append("No valid BIDS files were found") for entry in results["schema_tracking"]: if entry["mandatory"]: self._dataset_errors.append( From 1f88e4eeb9e4f4a12b268f30cee8ede03c99610a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 20:25:07 -0400 Subject: [PATCH 005/124] Debugging out of place cli invocation error --- dandi/tests/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index a639696b6..18653df12 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -17,7 +17,7 @@ def test_validate_bids(bids_examples, tmp_path): # Check if a report is being produced. pid = os.getpid() - log_dir = appdirs.user_log_dir("dandi-cli", "dandi") + log_dir = appdirs.user_log_dir("dandi") report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") assert len(glob(report_expression)) == 1 From 8e8e4b722084ff39a9b0b794f27b509e92a34fd2 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 20:43:17 -0400 Subject: [PATCH 006/124] Starting to deprecate `print_validation_results()` --- dandi/cli/cmd_validate.py | 10 +++++++--- dandi/cli/tests/test_cmd_validate.py | 5 +++-- dandi/validate.py | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 13776edb4..5a952f289 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -36,7 +36,7 @@ def validate_bids( dandi validate-bids /my/path """ - from ..bids_utils import print_validation_results + # from ..bids_utils import print_validation_results from ..validate import Severity from ..validate import validate_bids as validate_bids_ @@ -47,8 +47,12 @@ def validate_bids( schema_version=schema, ) - if validator_result: - print_validation_results(validator_result) # View + for i in validator_result: + click.secho( + i.message, + bold=True, + fg="red", + ) validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 228508e08..3ad3a7882 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -7,9 +7,10 @@ def test_validate_bids_error(bids_examples): from ..cmd_validate import validate_bids expected_error = ( - "Summary: 1 filename pattern required by BIDS could not be found " - "and 1 filename did not match any pattern known to BIDS.\n" + "File does not match any pattern known to BIDS.\n" + "BIDS-required file is not present.\n" ) + broken_dataset = os.path.join(bids_examples, "invalid_pet001") r = CliRunner().invoke(validate_bids, [broken_dataset]) # Does it break? diff --git a/dandi/validate.py b/dandi/validate.py index 68e7e2dcd..65e507867 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -102,7 +102,7 @@ def validate_bids( id="BIDS.NON_BIDS_PATH_PLACEHOLDER", scope=Scope.FILE, path=path, - message=f"The `{path}` file does not match any pattern known to BIDS.", + message="File does not match any pattern known to BIDS.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two # dandiset_path="TODO", # might contain multiple datasets @@ -126,7 +126,7 @@ def validate_bids( # https://github.com/bids-standard/bids-specification/issues/1262 id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", scope=Scope.FILE, - message=f'Missing file matching the following pattern: `{pattern["regex"]}`', + message="BIDS-required file is not present.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two # dandiset_path="TODO", # might contain multiple datasets From 85612152b57de23b1d09cdc0afb0d466c8542de4 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 20:47:32 -0400 Subject: [PATCH 007/124] comment for review --- dandi/validate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dandi/validate.py b/dandi/validate.py index 65e507867..8b6456357 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -18,6 +18,8 @@ class ValidationResult: scope: Scope message: str dataset_path: Optional[Path] + # TODO gh-943: should this be relative to `dataset_path`? + # would make writing tests with tmp paths a lot easier :3 path: Optional[Path] = None asset_paths: Optional[list[str]] = None From f082382b6fcf8d8dc225f330b054f28f3603e899 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 1 Sep 2022 21:08:32 -0400 Subject: [PATCH 008/124] Incremental deprecation of direct upstream output format --- dandi/files/bids.py | 33 +++++++++++++++++---------------- dandi/validate.py | 2 ++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index f399a8f38..687741826 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -69,25 +69,26 @@ def _validate(self) -> None: # TODO gh-943: use RFed data structures, avoid duplicating logic results = validate_bids(*bids_paths) self._dataset_errors: list[str] = [] - # TODO gh-943: Should we add valid files to Validation result? - # This specific check seems difficult to implement. - # if len(results["path_listing"]) == len(results["path_tracking"]): - # self._dataset_errors.append("No valid BIDS files were found") - for entry in results["schema_tracking"]: - if entry["mandatory"]: + self._asset_errors = defaultdict(list) + for i in results: + if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER": + bids_path = Path(i.path).relative_to(self.bids_root).as_posix() + self._dataset_errors.append( + f"The `{bids_path}` file was not matched by any regex schema entry." + ) + self._asset_errors[bids_path].append( + "File not matched by any regex schema entry" + ) + elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER": self._dataset_errors.append( - f"The `{entry['regex']}` regex pattern file" + f"The `{i.regex}` regex pattern file" " required by BIDS was not found." ) - self._asset_errors = defaultdict(list) - for path in results["path_tracking"]: - bids_path = Path(path).relative_to(self.bids_root).as_posix() - self._dataset_errors.append( - f"The `{bids_path}` file was not matched by any regex schema entry." - ) - self._asset_errors[bids_path].append( - "File not matched by any regex schema entry" - ) + + # TODO gh-943: Should we add valid files to Validation result? + # The following checks seem difficult to implement. + if len(results["path_listing"]) == len(results["path_tracking"]): + self._dataset_errors.append("No valid BIDS files were found") self._asset_metadata = defaultdict(dict) for meta in results["match_listing"]: bids_path = ( diff --git a/dandi/validate.py b/dandi/validate.py index 8b6456357..87fe680ec 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -21,6 +21,7 @@ class ValidationResult: # TODO gh-943: should this be relative to `dataset_path`? # would make writing tests with tmp paths a lot easier :3 path: Optional[Path] = None + regex: Optional[str] = None asset_paths: Optional[list[str]] = None @@ -128,6 +129,7 @@ def validate_bids( # https://github.com/bids-standard/bids-specification/issues/1262 id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", scope=Scope.FILE, + regex=pattern["regex"], message="BIDS-required file is not present.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two From dd77af20745ac133bc7a5044ffeeca402b3787d5 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 2 Sep 2022 19:13:03 -0400 Subject: [PATCH 009/124] Review comments --- dandi/files/bids.py | 4 +++- dandi/validate.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 687741826..25168c55e 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -70,6 +70,7 @@ def _validate(self) -> None: results = validate_bids(*bids_paths) self._dataset_errors: list[str] = [] self._asset_errors = defaultdict(list) + # TODO gh-943: rename regex maybe and make sure you use messages from class. for i in results: if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER": bids_path = Path(i.path).relative_to(self.bids_root).as_posix() @@ -81,12 +82,13 @@ def _validate(self) -> None: ) elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER": self._dataset_errors.append( - f"The `{i.regex}` regex pattern file" + f"The `{i.path_regex}` regex pattern file" " required by BIDS was not found." ) # TODO gh-943: Should we add valid files to Validation result? # The following checks seem difficult to implement. + # Reimplement this at the object level, Validator Result does not have to be one per file. if len(results["path_listing"]) == len(results["path_tracking"]): self._dataset_errors.append("No valid BIDS files were found") self._asset_metadata = defaultdict(dict) diff --git a/dandi/validate.py b/dandi/validate.py index 87fe680ec..7286aa1ab 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -18,10 +18,13 @@ class ValidationResult: scope: Scope message: str dataset_path: Optional[Path] + # TODO gh-943: add dandiset_path as attribute (optional). + dataset_path: Optional[Path] # TODO gh-943: should this be relative to `dataset_path`? # would make writing tests with tmp paths a lot easier :3 + # keep it absolute!!! path: Optional[Path] = None - regex: Optional[str] = None + path_regex: Optional[str] = None asset_paths: Optional[list[str]] = None @@ -129,7 +132,7 @@ def validate_bids( # https://github.com/bids-standard/bids-specification/issues/1262 id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", scope=Scope.FILE, - regex=pattern["regex"], + path_regex=pattern["regex"], message="BIDS-required file is not present.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two From aefb337f15fd6a52799d6b7896fa7b957b3b7e36 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 6 Sep 2022 13:35:12 -0400 Subject: [PATCH 010/124] Fixed test log path --- dandi/tests/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 18653df12..302c23ec8 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -17,7 +17,7 @@ def test_validate_bids(bids_examples, tmp_path): # Check if a report is being produced. pid = os.getpid() - log_dir = appdirs.user_log_dir("dandi") + log_dir = appdirs.user_log_dir("dandi-cli") report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") assert len(glob(report_expression)) == 1 From 1c8ddd3ff7acda416c616f15ceaaf33cd991c18d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 10:39:27 -0400 Subject: [PATCH 011/124] BIDS root detection --- dandi/validate.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 7286aa1ab..28dd9e6cf 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -97,7 +97,7 @@ def validate_bids( # https://github.com/bids-standard/bids-specification/issues/1272 if path.endswith((".ERRORS", ".ERRORS.json")): continue - dataset_path = _get_dataset_path(path, paths) + dataset_path = _get_set_path(path, "dataset_description.json") our_validation_result.append( ValidationResult( dataset_path=dataset_path, @@ -122,7 +122,9 @@ def validate_bids( if ("mandatory" in pattern and pattern["mandatory"]) or ( "required" in pattern and pattern["required"] ): - dataset_path = _get_dataset_path(path, paths) + # We don't have a path for this so we'll need some external logic to make sure + # that the dataset path is populated. + # dataset_path = _get_dataset_path(path, paths) our_validation_result.append( ValidationResult( dataset_path=dataset_path, @@ -144,11 +146,27 @@ def validate_bids( return our_validation_result -def _get_dataset_path(file_path, datasets): - # This logic might break with nested datasets. - for dataset in datasets: - if dataset in file_path: - return dataset +def _get_set_path(in_path, marker): + """Get the path to the root of a file set (e.g. BIDS dataset or DANDIset). + + Parameters + ---------- + in_path : str, optional + File for which to determine set path. + marker : str, optional + Filename marker which identifies the set root. + """ + import os + + candidate = os.path.join(in_path, marker) + # Windows support... otherwise we could do `if in_path == "/"`. + if in_path == "/" or not any(i in in_path for i in ["/", "\\"]): + return None + if os.path.isfile(candidate): + return candidate + else: + level_up = os.path.dirname(in_path.rstrip("/\\")) + return _get_set_path(level_up, marker) def validate( From cb6d4944a1775067a11fac5c6b7456fb6ac5eccd Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 12:04:25 -0400 Subject: [PATCH 012/124] Assigning dandiset and (BIDS) dataset path --- dandi/validate.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 28dd9e6cf..1c358efd5 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -17,12 +17,12 @@ class ValidationResult: id: str scope: Scope message: str - dataset_path: Optional[Path] # TODO gh-943: add dandiset_path as attribute (optional). - dataset_path: Optional[Path] # TODO gh-943: should this be relative to `dataset_path`? # would make writing tests with tmp paths a lot easier :3 # keep it absolute!!! + dandiset_path: Optional[Path] = None + dataset_path: Optional[Path] = None path: Optional[Path] = None path_regex: Optional[str] = None asset_paths: Optional[list[str]] = None @@ -98,9 +98,11 @@ def validate_bids( if path.endswith((".ERRORS", ".ERRORS.json")): continue dataset_path = _get_set_path(path, "dataset_description.json") + dandiset_path = _get_set_path(path, "dandiset.yaml") our_validation_result.append( ValidationResult( dataset_path=dataset_path, + dandiset_path=dandiset_path, origin=origin, severity=Severity.ERROR, # For schema-integrated error code discussion, see: @@ -127,7 +129,7 @@ def validate_bids( # dataset_path = _get_dataset_path(path, paths) our_validation_result.append( ValidationResult( - dataset_path=dataset_path, + #dataset_path=dataset_path, origin=origin, severity=Severity.ERROR, # For schema-integrated error code discussion, see: @@ -163,7 +165,7 @@ def _get_set_path(in_path, marker): if in_path == "/" or not any(i in in_path for i in ["/", "\\"]): return None if os.path.isfile(candidate): - return candidate + return os.path.dirname(candidate) else: level_up = os.path.dirname(in_path.rstrip("/\\")) return _get_set_path(level_up, marker) From e2b21ed37b805d7596368c2e2181707d762608da Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 12:05:48 -0400 Subject: [PATCH 013/124] removed comment lines --- dandi/validate.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 1c358efd5..7df73cb3e 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -101,8 +101,6 @@ def validate_bids( dandiset_path = _get_set_path(path, "dandiset.yaml") our_validation_result.append( ValidationResult( - dataset_path=dataset_path, - dandiset_path=dandiset_path, origin=origin, severity=Severity.ERROR, # For schema-integrated error code discussion, see: @@ -113,9 +111,9 @@ def validate_bids( message="File does not match any pattern known to BIDS.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two - # dandiset_path="TODO", # might contain multiple datasets - # dataset_path="TODO", # BIDS dataset in this case # asset_paths: Optional[list[str]] = None + dataset_path=dataset_path, + dandiset_path=dandiset_path, ) ) @@ -129,7 +127,6 @@ def validate_bids( # dataset_path = _get_dataset_path(path, paths) our_validation_result.append( ValidationResult( - #dataset_path=dataset_path, origin=origin, severity=Severity.ERROR, # For schema-integrated error code discussion, see: @@ -140,9 +137,9 @@ def validate_bids( message="BIDS-required file is not present.", # TODO - discover dandiset or actually BIDS dataset # might want separate the two - # dandiset_path="TODO", # might contain multiple datasets - # dataset_path="TODO", # BIDS dataset in this case # asset_paths: Optional[list[str]] = None + #dataset_path=dataset_path, + #dandiset_path=dandiset_path, ) ) return our_validation_result From 8af720a42f08e8d6c855cfcbce9a506fc563e327 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 16:26:38 -0400 Subject: [PATCH 014/124] Integrated metadata recording --- dandi/files/bids.py | 51 +++++++++---------------------- dandi/tests/test_validate.py | 2 ++ dandi/validate.py | 59 ++++++++++++++++++++++-------------- 3 files changed, 53 insertions(+), 59 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 25168c55e..20cc5ffd9 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -15,10 +15,12 @@ from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest -BIDS_TO_DANDI = { - "subject": "subject_id", - "session": "session_id", -} +BIDS_ASSET_ERRORS = [ + "BIDS.NON_BIDS_PATH_PLACEHOLDER", + ] +BIDS_DATASET_ERRORS = [ + "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", + ] @dataclass @@ -66,43 +68,18 @@ def _validate(self) -> None: bids_paths = [str(self.filepath)] + [ str(asset.filepath) for asset in self.dataset_files ] - # TODO gh-943: use RFed data structures, avoid duplicating logic results = validate_bids(*bids_paths) self._dataset_errors: list[str] = [] self._asset_errors = defaultdict(list) - # TODO gh-943: rename regex maybe and make sure you use messages from class. - for i in results: - if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER": - bids_path = Path(i.path).relative_to(self.bids_root).as_posix() - self._dataset_errors.append( - f"The `{bids_path}` file was not matched by any regex schema entry." - ) - self._asset_errors[bids_path].append( - "File not matched by any regex schema entry" - ) - elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER": - self._dataset_errors.append( - f"The `{i.path_regex}` regex pattern file" - " required by BIDS was not found." - ) - - # TODO gh-943: Should we add valid files to Validation result? - # The following checks seem difficult to implement. - # Reimplement this at the object level, Validator Result does not have to be one per file. - if len(results["path_listing"]) == len(results["path_tracking"]): - self._dataset_errors.append("No valid BIDS files were found") self._asset_metadata = defaultdict(dict) - for meta in results["match_listing"]: - bids_path = ( - Path(meta.pop("path")).relative_to(self.bids_root).as_posix() - ) - meta = { - BIDS_TO_DANDI[k]: v - for k, v in meta.items() - if k in BIDS_TO_DANDI - } - # meta["bids_schema_version"] = results["bids_schema_version"] - self._asset_metadata[bids_path] = prepare_metadata(meta) + for i in results: + if i.id in BIDS_ASSET_ERRORS: + self._asset_errors[i.path].append(i.message) + elif i.id in BIDS_DATASET_ERRORS: + self._dataset_errors.append(i.message) + elif i.id == "BIDS.MATCH": + bids_path = Path(i.path.relative_to(self.bids_root).as_posix()) + self._asset_metadata[bids_path] = prepare_metadata(i.metadata) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: """:meta private:""" diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 302c23ec8..f0cdc1a80 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -51,6 +51,8 @@ def test_validate_bids_errors(bids_error_examples, dataset): with open(os.path.join(selected_dataset, ".ERRORS.json")) as f: expected_errors = json.load(f) for i in validation_result: + if i.id == "BIDS.MATCH": + continue error_id = i.id if i.path: error_path = i.path diff --git a/dandi/validate.py b/dandi/validate.py index 7df73cb3e..7dbc1d0a4 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -10,22 +10,25 @@ from .files import find_dandi_files +BIDS_TO_DANDI = { + "subject": "subject_id", + "session": "session_id", +} + + @dataclass class ValidationResult: - origin: ValidationOrigin - severity: Severity id: str + origin: ValidationOrigin scope: Scope - message: str - # TODO gh-943: add dandiset_path as attribute (optional). - # TODO gh-943: should this be relative to `dataset_path`? - # would make writing tests with tmp paths a lot easier :3 - # keep it absolute!!! + asset_paths: Optional[list[str]] = None dandiset_path: Optional[Path] = None dataset_path: Optional[Path] = None + message: Optional[str] = "" + metadata: Optional[dict] = None path: Optional[Path] = None - path_regex: Optional[str] = None - asset_paths: Optional[list[str]] = None + path_regex: Optional[str] = "" + severity: Optional[Severity] = None @dataclass @@ -43,6 +46,7 @@ class Severity(Enum): class Scope(Enum): FILE = "file" DANDISET = "dandiset" + DATASET = "dataset" def validate_bids( @@ -71,6 +75,11 @@ def validate_bids( dict Dictionary reporting required patterns not found and existing filenames not matching any patterns. + + Notes + ----- + * Eventually this should be migrated to BIDS schema specified errors, see discussion here: + https://github.com/bids-standard/bids-specification/issues/1262 """ import bidsschematools @@ -92,6 +101,7 @@ def validate_bids( name="bidsschematools", version=bidsschematools.__version__, ) + for path in validation_result["path_tracking"]: # Hard-coding exclusion here pending feature + release in: # https://github.com/bids-standard/bids-specification/issues/1272 @@ -103,15 +113,10 @@ def validate_bids( ValidationResult( origin=origin, severity=Severity.ERROR, - # For schema-integrated error code discussion, see: - # https://github.com/bids-standard/bids-specification/issues/1262 id="BIDS.NON_BIDS_PATH_PLACEHOLDER", scope=Scope.FILE, path=path, message="File does not match any pattern known to BIDS.", - # TODO - discover dandiset or actually BIDS dataset - # might want separate the two - # asset_paths: Optional[list[str]] = None dataset_path=dataset_path, dandiset_path=dandiset_path, ) @@ -129,19 +134,29 @@ def validate_bids( ValidationResult( origin=origin, severity=Severity.ERROR, - # For schema-integrated error code discussion, see: - # https://github.com/bids-standard/bids-specification/issues/1262 id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", - scope=Scope.FILE, + scope=Scope.DATASET, path_regex=pattern["regex"], message="BIDS-required file is not present.", - # TODO - discover dandiset or actually BIDS dataset - # might want separate the two - # asset_paths: Optional[list[str]] = None - #dataset_path=dataset_path, - #dandiset_path=dandiset_path, ) ) + for meta in validation_result["match_listing"]: + file_path = meta.pop("path") + meta = { + BIDS_TO_DANDI[k]: v + for k, v in meta.items() + if k in BIDS_TO_DANDI + } + our_validation_result.append( + ValidationResult( + origin=origin, + id="BIDS.MATCH", + scope=Scope.FILE, + path=file_path, + metadata=meta, + ) + ) + return our_validation_result From ea4e7907e222b2c937a0f567af781587a267e677 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 17:13:11 -0400 Subject: [PATCH 015/124] Processing input path type --- dandi/files/bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 20cc5ffd9..6ffc45bab 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -78,7 +78,7 @@ def _validate(self) -> None: elif i.id in BIDS_DATASET_ERRORS: self._dataset_errors.append(i.message) elif i.id == "BIDS.MATCH": - bids_path = Path(i.path.relative_to(self.bids_root).as_posix()) + bids_path = Path(i.path).relative_to(self.bids_root).as_posix() self._asset_metadata[bids_path] = prepare_metadata(i.metadata) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: From db175a2f1471358a29cce54ad8723e96683b1c8e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 7 Sep 2022 17:14:57 -0400 Subject: [PATCH 016/124] Indentation fix for lint check --- dandi/files/bids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 6ffc45bab..d7b9aabaa 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -17,10 +17,10 @@ BIDS_ASSET_ERRORS = [ "BIDS.NON_BIDS_PATH_PLACEHOLDER", - ] +] BIDS_DATASET_ERRORS = [ "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", - ] +] @dataclass From a90c99a7df68b9eb34e73f98d75483b645e2a12e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 13:16:57 -0400 Subject: [PATCH 017/124] Printing output based on severity --- dandi/cli/cmd_validate.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 5a952f289..92c4b3d3b 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -48,11 +48,18 @@ def validate_bids( ) for i in validator_result: - click.secho( - i.message, - bold=True, - fg="red", - ) + if i.severity == Severity.ERROR: + click.secho( + i.message, + bold=True, + fg="red", + ) + if i.severity == Severity.WARNING: + click.secho( + i.message, + bold=True, + fg="orange", + ) validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] From 3304b45a8249fe464cee0373d32a1e0158f00a2c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 14:17:23 -0400 Subject: [PATCH 018/124] =?UTF-8?q?Message=20attribute=20is=20empty=20stri?= =?UTF-8?q?ng=20when=20nonexistent=20=E2=80=94=20not=20none?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dandi/validate.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 7dbc1d0a4..9a20ef763 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -9,7 +9,6 @@ from .files import find_dandi_files - BIDS_TO_DANDI = { "subject": "subject_id", "session": "session_id", @@ -24,7 +23,7 @@ class ValidationResult: asset_paths: Optional[list[str]] = None dandiset_path: Optional[Path] = None dataset_path: Optional[Path] = None - message: Optional[str] = "" + message: str = "" metadata: Optional[dict] = None path: Optional[Path] = None path_regex: Optional[str] = "" @@ -142,11 +141,7 @@ def validate_bids( ) for meta in validation_result["match_listing"]: file_path = meta.pop("path") - meta = { - BIDS_TO_DANDI[k]: v - for k, v in meta.items() - if k in BIDS_TO_DANDI - } + meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} our_validation_result.append( ValidationResult( origin=origin, From 2a9377e3ce8d74e94d28264c06a150401ddc61f3 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 14:32:07 -0400 Subject: [PATCH 019/124] Force posix path for windows tests --- dandi/tests/test_validate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index f0cdc1a80..992022013 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -43,6 +43,7 @@ def test_validate_bids_errors(bids_error_examples, dataset): # This only checks that the error we found is correct, not that we found all errors. # ideally make a list and erode etc. import json + import pathlib from ..validate import validate_bids @@ -57,6 +58,7 @@ def test_validate_bids_errors(bids_error_examples, dataset): if i.path: error_path = i.path relative_error_path = os.path.relpath(error_path, i.dataset_path) + relative_error_path = pathlib.Path(relative_error_path).as_posix() assert ( relative_error_path in expected_errors[error_id.lstrip("BIDS.")]["scope"] From 6a0875bea99e98fda435a8075c1789d5869ed00d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 15:37:51 -0400 Subject: [PATCH 020/124] Debugging windows tests --- dandi/tests/test_validate.py | 3 ++- dandi/validate.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 992022013..e445ea350 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -17,7 +17,8 @@ def test_validate_bids(bids_examples, tmp_path): # Check if a report is being produced. pid = os.getpid() - log_dir = appdirs.user_log_dir("dandi-cli") + log_dir = appdirs.user_log_dir("dandi-cli", "dandi") + # appdirs.user_log_dir("dandi-cli") report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") assert len(glob(report_expression)) == 1 diff --git a/dandi/validate.py b/dandi/validate.py index 9a20ef763..dd3aa214f 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -90,6 +90,7 @@ def validate_bids( report_path = report_path.format( log_dir=log_dir, ) + validation_result = validate_bids_( paths, schema_version=schema_version, From 4f248b966abb2b3d72f5ab352b2d7195cb3fa7e5 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 16:53:08 -0400 Subject: [PATCH 021/124] Typing fixes --- dandi/files/bids.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index d7b9aabaa..36d1b834f 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -74,12 +74,14 @@ def _validate(self) -> None: self._asset_metadata = defaultdict(dict) for i in results: if i.id in BIDS_ASSET_ERRORS: - self._asset_errors[i.path].append(i.message) + self._asset_errors[str(i.path)].append(i.message) elif i.id in BIDS_DATASET_ERRORS: self._dataset_errors.append(i.message) elif i.id == "BIDS.MATCH": bids_path = Path(i.path).relative_to(self.bids_root).as_posix() - self._asset_metadata[bids_path] = prepare_metadata(i.metadata) + self._asset_metadata[bids_path] = prepare_metadata( + dict(i.metadata) + ) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: """:meta private:""" From a564d0d78287559fd42d5707b189040ab15ff58a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 8 Sep 2022 17:00:53 -0400 Subject: [PATCH 022/124] Trying more typing fixes --- dandi/files/bids.py | 4 +--- dandi/validate.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 36d1b834f..0be69df59 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -79,9 +79,7 @@ def _validate(self) -> None: self._dataset_errors.append(i.message) elif i.id == "BIDS.MATCH": bids_path = Path(i.path).relative_to(self.bids_root).as_posix() - self._asset_metadata[bids_path] = prepare_metadata( - dict(i.metadata) - ) + self._asset_metadata[bids_path] = prepare_metadata(i.metadata) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: """:meta private:""" diff --git a/dandi/validate.py b/dandi/validate.py index dd3aa214f..d716d8e45 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -115,7 +115,7 @@ def validate_bids( severity=Severity.ERROR, id="BIDS.NON_BIDS_PATH_PLACEHOLDER", scope=Scope.FILE, - path=path, + path=Path(path), message="File does not match any pattern known to BIDS.", dataset_path=dataset_path, dandiset_path=dandiset_path, From cc57bfc7c5d56ce9ed74ad68a225e18931bac76b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 9 Sep 2022 15:49:20 -0400 Subject: [PATCH 023/124] Debugging typing --- dandi/files/bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 0be69df59..9fce160bc 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -78,7 +78,7 @@ def _validate(self) -> None: elif i.id in BIDS_DATASET_ERRORS: self._dataset_errors.append(i.message) elif i.id == "BIDS.MATCH": - bids_path = Path(i.path).relative_to(self.bids_root).as_posix() + bids_path = i.path.relative_to(self.bids_root).as_posix() self._asset_metadata[bids_path] = prepare_metadata(i.metadata) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: From 15e1cfba2e7a2c8bb91c69d733a1a4a973c5dc7c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 9 Sep 2022 05:45:57 -0400 Subject: [PATCH 024/124] Removed unneeded module --- dandi/bids_utils.py | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 dandi/bids_utils.py diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py deleted file mode 100644 index cf09f6ebf..000000000 --- a/dandi/bids_utils.py +++ /dev/null @@ -1,45 +0,0 @@ -from .utils import pluralize -from .validate import ValidationResult - - -def print_validation_results( - validation_result: list[ValidationResult], - # TODO: options for control - # - either report warnings, hints, ... - # - either report groupped by severity, record.id -) -> None: - raise NotImplementedError("TODO: RF to use ValidationResult records") - import click - - missing_files = [ - pattern["regex"] - for pattern in validation_result["schema_tracking"] - if pattern["mandatory"] - ] - error_list = [] - if missing_files: - error_substring = ( - f"{pluralize(len(missing_files), 'filename pattern')} required " - "by BIDS could not be found" - ) - error_list.append(error_substring) - if validation_result["path_tracking"]: - error_substring = ( - f"{pluralize(len(validation_result['path_tracking']), 'filename')} " - "did not match any pattern known to BIDS" - ) - error_list.append(error_substring) - if error_list: - error_string = " and ".join(error_list) - error_string = f"Summary: {error_string}." - click.secho( - error_string, - bold=True, - fg="red", - ) - else: - click.secho( - "All filenames are BIDS-valid and no mandatory files are missing.", - bold=True, - fg="green", - ) From 89a070a7e375b263bb1c8b079d4d4cae59789074 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 9 Sep 2022 16:01:09 -0400 Subject: [PATCH 025/124] Use assert to ensure that Optional becomes non-optional for type checking also used more descriptive "result" variable instead of "i". We contemplated some other name (like vrs - for "validation results") but it was not liked --- dandi/files/bids.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 9fce160bc..1a174720c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -72,14 +72,19 @@ def _validate(self) -> None: self._dataset_errors: list[str] = [] self._asset_errors = defaultdict(list) self._asset_metadata = defaultdict(dict) - for i in results: - if i.id in BIDS_ASSET_ERRORS: - self._asset_errors[str(i.path)].append(i.message) - elif i.id in BIDS_DATASET_ERRORS: - self._dataset_errors.append(i.message) - elif i.id == "BIDS.MATCH": - bids_path = i.path.relative_to(self.bids_root).as_posix() - self._asset_metadata[bids_path] = prepare_metadata(i.metadata) + for result in results: + if result.id in BIDS_ASSET_ERRORS: + assert result.path + self._asset_errors[str(result.path)].append(result.message) + elif result.id in BIDS_DATASET_ERRORS: + self._dataset_errors.append(result.message) + elif result.id == "BIDS.MATCH": + assert result.path + bids_path = result.path.relative_to(self.bids_root).as_posix() + assert result.metadata + self._asset_metadata[bids_path] = prepare_metadata( + result.metadata + ) def get_asset_errors(self, asset: BIDSAsset) -> list[str]: """:meta private:""" From fe238b66428026c3671a30f1664f0bc4b5b00a05 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 09:31:31 -0400 Subject: [PATCH 026/124] Further corrected type --- dandi/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index d716d8e45..f045a1a3f 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -148,7 +148,7 @@ def validate_bids( origin=origin, id="BIDS.MATCH", scope=Scope.FILE, - path=file_path, + path=Path(file_path), metadata=meta, ) ) From 25194eef54630efac6619f7d8c88eb56fe9a87ab Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 09:56:19 -0400 Subject: [PATCH 027/124] Filtered download for valid testdata --- dandi/tests/fixtures.py | 49 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index eab945083..872533715 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -39,6 +39,18 @@ lgr = get_logger() +BIDS_TESTDATA_SELECTION = [ + "asl003", + "eeg_cbm", + "hcp_example_bids", + "micr_SEMzarr", + "micr_SPIM", + "pet003", + "qmri_tb1tfl", + "qmri_vfa", +] + + def copytree(src, dst, symlinks=False, ignore=None): """Function mimicking `shutil.copytree()` behaviour but supporting existing target directories. @@ -221,8 +233,43 @@ def fixture() -> Iterator[str]: return fixture +def get_filtered_gitrepo_fixture( + url: str, + whitelist: Iterator[str], +) -> Callable[[], Iterator[str]]: + @pytest.fixture(scope="session") + def fixture(): + with tempfile.TemporaryDirectory() as path: + lgr.debug("Cloning %r into %r", url, path) + runout = run( + [ + "git", + "clone", + "--depth=1", + "--filter=blob:none", + "--sparse", + url, + path, + ], + capture_output=True, + ) + if runout.returncode: + raise RuntimeError(f"Failed to clone {url} into {path}") + # cwd specification is VERY important, not only to achieve the correct + # effects, but also to avoid dropping files from your repository if you + # were to run `git sparse-checkout` inside the software repo. + _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) + _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) + yield path + + return fixture + + nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data") -bids_examples = get_gitrepo_fixture("https://github.com/dandi/bids-examples") +bids_examples = get_filtered_gitrepo_fixture( + "https://github.com/dandi/bids-examples", + BIDS_TESTDATA_SELECTION, +) bids_error_examples = get_gitrepo_fixture( "https://github.com/bids-standard/bids-error-examples" ) From e630fb86498fed5e44884a68574e0b306ba0e8c8 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 09:59:31 -0400 Subject: [PATCH 028/124] Type correction --- dandi/tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 872533715..bbb3fa279 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -235,7 +235,7 @@ def fixture() -> Iterator[str]: def get_filtered_gitrepo_fixture( url: str, - whitelist: Iterator[str], + whitelist: List[str], ) -> Callable[[], Iterator[str]]: @pytest.fixture(scope="session") def fixture(): From 72f47a0ba0266e32a6a19c3f6d57d9428ae8324b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 10:14:14 -0400 Subject: [PATCH 029/124] Update error tests --- dandi/cli/tests/test_cmd_validate.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 3ad3a7882..fd448f6c2 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -3,15 +3,12 @@ from click.testing import CliRunner -def test_validate_bids_error(bids_examples): +def test_validate_bids_error(bids_error_examples): from ..cmd_validate import validate_bids - expected_error = ( - "File does not match any pattern known to BIDS.\n" - "BIDS-required file is not present.\n" - ) + expected_error = "File does not match any pattern known to BIDS.\n" - broken_dataset = os.path.join(bids_examples, "invalid_pet001") + broken_dataset = os.path.join(bids_error_examples, "invalid_pet001") r = CliRunner().invoke(validate_bids, [broken_dataset]) # Does it break? assert r.exit_code == 1 From b51e92e5b689fd26bbef12c1e75ceb1010770def Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 10:27:26 -0400 Subject: [PATCH 030/124] fixing windows fixture removal error --- dandi/tests/fixtures.py | 50 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index bbb3fa279..f298f9d6c 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -239,28 +239,34 @@ def get_filtered_gitrepo_fixture( ) -> Callable[[], Iterator[str]]: @pytest.fixture(scope="session") def fixture(): - with tempfile.TemporaryDirectory() as path: - lgr.debug("Cloning %r into %r", url, path) - runout = run( - [ - "git", - "clone", - "--depth=1", - "--filter=blob:none", - "--sparse", - url, - path, - ], - capture_output=True, - ) - if runout.returncode: - raise RuntimeError(f"Failed to clone {url} into {path}") - # cwd specification is VERY important, not only to achieve the correct - # effects, but also to avoid dropping files from your repository if you - # were to run `git sparse-checkout` inside the software repo. - _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) - _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) - yield path + try: + with tempfile.TemporaryDirectory() as path: + lgr.debug("Cloning %r into %r", url, path) + runout = run( + [ + "git", + "clone", + "--depth=1", + "--filter=blob:none", + "--sparse", + url, + path, + ], + capture_output=True, + ) + if runout.returncode: + raise RuntimeError(f"Failed to clone {url} into {path}") + # cwd specification is VERY important, not only to achieve the correct + # effects, but also to avoid dropping files from your repository if you + # were to run `git sparse-checkout` inside the software repo. + _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) + _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) + yield path + finally: + try: + shutil.rmtree(path) + except BaseException as exc: + lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) return fixture From 94a17931934f645aca2ebfe5a3d85e73c2fe7b02 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 12:36:17 -0400 Subject: [PATCH 031/124] Debug assertion error fix --- dandi/files/bids.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 1a174720c..cb1ff8ce3 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -81,7 +81,13 @@ def _validate(self) -> None: elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() - assert result.metadata + try: + assert result.metadata + except AssertionError: + print(result) + print(result) + print(result.path) + print(result.metadata) self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) From ec59dad6e9096210da6b145696868d0abd0aa3b8 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 12 Sep 2022 13:22:49 -0400 Subject: [PATCH 032/124] Using error fixture for error tests --- dandi/tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index f298f9d6c..691cbce3b 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -506,7 +506,7 @@ def bids_dandiset_invalid( new_dandiset: SampleDandiset, bids_examples: str ) -> SampleDandiset: copytree( - os.path.join(bids_examples, "invalid_pet001"), + os.path.join(bids_error_examples, "invalid_pet001"), str(new_dandiset.dspath) + "/", ) return new_dandiset From 43e17e95b857c51165ff47297cd1ae7df006e3ac Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 13 Sep 2022 00:22:48 -0400 Subject: [PATCH 033/124] Using correct fixture --- dandi/tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 691cbce3b..437d103a4 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -503,7 +503,7 @@ def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDan @pytest.fixture() def bids_dandiset_invalid( - new_dandiset: SampleDandiset, bids_examples: str + new_dandiset: SampleDandiset, bids_error_examples: str ) -> SampleDandiset: copytree( os.path.join(bids_error_examples, "invalid_pet001"), From 4f2e4aef061e3aad1ea1d11b8bc6c1909e5d4f88 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 13 Sep 2022 01:38:17 -0400 Subject: [PATCH 034/124] Disabled debugging code --- dandi/files/bids.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index cb1ff8ce3..1a174720c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -81,13 +81,7 @@ def _validate(self) -> None: elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() - try: - assert result.metadata - except AssertionError: - print(result) - print(result) - print(result.path) - print(result.metadata) + assert result.metadata self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) From bf5c0f169834f3f0115d1530e2c177e485743b8d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 14 Sep 2022 05:12:29 -0400 Subject: [PATCH 035/124] Debug assertion error --- dandi/files/bids.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 1a174720c..66cfa5c43 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -10,11 +10,14 @@ from dandischema.models import BareAsset +from . import get_logger from .bases import GenericAsset, LocalFileAsset, NWBAsset from .zarr import ZarrAsset from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest +lgr = get_logger() + BIDS_ASSET_ERRORS = [ "BIDS.NON_BIDS_PATH_PLACEHOLDER", ] @@ -81,7 +84,11 @@ def _validate(self) -> None: elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() - assert result.metadata + try: + assert result.metadata + except AssertionError: + lgr.warning("The path is %s", result.path) + lgr.warning("The metadata is %s", result.metadata) self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) From 697b24d263dc915b782feda9ad77f47322613686 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 14 Sep 2022 05:34:57 -0400 Subject: [PATCH 036/124] Ensuring that metadata is non-empty --- dandi/files/bids.py | 9 +-------- dandi/validate.py | 5 +++++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 66cfa5c43..1a174720c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -10,14 +10,11 @@ from dandischema.models import BareAsset -from . import get_logger from .bases import GenericAsset, LocalFileAsset, NWBAsset from .zarr import ZarrAsset from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest -lgr = get_logger() - BIDS_ASSET_ERRORS = [ "BIDS.NON_BIDS_PATH_PLACEHOLDER", ] @@ -84,11 +81,7 @@ def _validate(self) -> None: elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() - try: - assert result.metadata - except AssertionError: - lgr.warning("The path is %s", result.path) - lgr.warning("The metadata is %s", result.metadata) + assert result.metadata self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) diff --git a/dandi/validate.py b/dandi/validate.py index f045a1a3f..043dddeae 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -143,6 +143,11 @@ def validate_bids( for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} + # Top level files do not have any other metadata other than path. + if not meta: + meta = { + "subject": None, + } our_validation_result.append( ValidationResult( origin=origin, From bb52d56f329265549c4e844a04f450ff95adb0b0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 14 Sep 2022 18:11:40 -0400 Subject: [PATCH 037/124] Making example invalid dataset truly invalid --- dandi/tests/fixtures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 437d103a4..b0a4c0b8d 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -505,10 +505,12 @@ def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDan def bids_dandiset_invalid( new_dandiset: SampleDandiset, bids_error_examples: str ) -> SampleDandiset: + dataset_path = new_dandiset.dspath copytree( os.path.join(bids_error_examples, "invalid_pet001"), - str(new_dandiset.dspath) + "/", + str(dataset_path) + "/", ) + os.remove(os.path.join(dataset_path, "README")) return new_dandiset From ee9492ae6e2d3ba03d4d141e697b436895f890f6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 14 Sep 2022 19:47:50 -0400 Subject: [PATCH 038/124] Windows debugging --- dandi/tests/fixtures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index b0a4c0b8d..522400328 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -228,7 +228,10 @@ def fixture() -> Iterator[str]: try: shutil.rmtree(path) except BaseException as exc: - lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("1Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("2Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("3Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("4Failed to remove %s - using Windows?: %s", path, exc) return fixture From c828628a68a823c7d3058fcc357d925057a9281b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 14 Sep 2022 21:55:45 -0400 Subject: [PATCH 039/124] Further winerror 5 debugging --- dandi/tests/fixtures.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 522400328..ea133af5a 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -228,10 +228,10 @@ def fixture() -> Iterator[str]: try: shutil.rmtree(path) except BaseException as exc: - lgr.warning("1Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("2Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("3Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("4Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("1 Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("2 Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("3 Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("4 Failed to remove %s - using Windows?: %s", path, exc) return fixture @@ -241,7 +241,9 @@ def get_filtered_gitrepo_fixture( whitelist: List[str], ) -> Callable[[], Iterator[str]]: @pytest.fixture(scope="session") - def fixture(): + def fixture() -> Iterator[str]: + skipif.no_network() + skipif.no_git() try: with tempfile.TemporaryDirectory() as path: lgr.debug("Cloning %r into %r", url, path) @@ -268,8 +270,9 @@ def fixture(): finally: try: shutil.rmtree(path) - except BaseException as exc: - lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) + except PermissionError as exc: + lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc) return fixture From 2f1c6a7c60ffc87ed50dd908c6d860185b595dfb Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 15 Sep 2022 04:39:35 -0400 Subject: [PATCH 040/124] Using base exception instead of specific one --- dandi/tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index ea133af5a..45735a863 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -270,7 +270,7 @@ def fixture() -> Iterator[str]: finally: try: shutil.rmtree(path) - except PermissionError as exc: + except BaseException as exc: lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc) lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc) From 155e557e218d9fde3006ac65f4d6ef0edeec3d8a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 15 Sep 2022 16:18:09 -0400 Subject: [PATCH 041/124] Not using auto-cleaning temp dir via while --- dandi/tests/fixtures.py | 46 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 45735a863..c4e497e7b 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -244,29 +244,31 @@ def get_filtered_gitrepo_fixture( def fixture() -> Iterator[str]: skipif.no_network() skipif.no_git() + path = tempfile.mktemp() # not using pytest's tmpdir fixture to not + # collide in different scopes etc. But we + # would need to remove it ourselves try: - with tempfile.TemporaryDirectory() as path: - lgr.debug("Cloning %r into %r", url, path) - runout = run( - [ - "git", - "clone", - "--depth=1", - "--filter=blob:none", - "--sparse", - url, - path, - ], - capture_output=True, - ) - if runout.returncode: - raise RuntimeError(f"Failed to clone {url} into {path}") - # cwd specification is VERY important, not only to achieve the correct - # effects, but also to avoid dropping files from your repository if you - # were to run `git sparse-checkout` inside the software repo. - _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) - _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) - yield path + lgr.debug("Cloning %r into %r", url, path) + runout = run( + [ + "git", + "clone", + "--depth=1", + "--filter=blob:none", + "--sparse", + url, + path, + ], + capture_output=True, + ) + if runout.returncode: + raise RuntimeError(f"Failed to clone {url} into {path}") + # cwd specification is VERY important, not only to achieve the correct + # effects, but also to avoid dropping files from your repository if you + # were to run `git sparse-checkout` inside the software repo. + _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) + _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) + yield path finally: try: shutil.rmtree(path) From f30ad9935aceda9a38a30416ab5d719c2fb43768 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 00:20:55 -0400 Subject: [PATCH 042/124] Removed debugging code --- dandi/tests/fixtures.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index c4e497e7b..84b0bce8b 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -228,10 +228,7 @@ def fixture() -> Iterator[str]: try: shutil.rmtree(path) except BaseException as exc: - lgr.warning("1 Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("2 Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("3 Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("4 Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) return fixture @@ -273,8 +270,7 @@ def fixture() -> Iterator[str]: try: shutil.rmtree(path) except BaseException as exc: - lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc) - lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc) + lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) return fixture From 676cbc3845443519a2f92d5ab57230566de9a9e0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 00:21:48 -0400 Subject: [PATCH 043/124] More verbose error printing --- dandi/cli/cmd_validate.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 92c4b3d3b..4f3c6b772 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -48,15 +48,22 @@ def validate_bids( ) for i in validator_result: + if i.path: + scope = i.path + elif i.path_regex: + scope = i.path_regex + else: + scope = i.dataset_path + echo_string = f"[{i.id}] {scope}: {i.message}" if i.severity == Severity.ERROR: click.secho( - i.message, + echo_string, bold=True, fg="red", ) if i.severity == Severity.WARNING: click.secho( - i.message, + echo_string, bold=True, fg="orange", ) From 9a8861bc5d60a2d2ad73cf331a0dd54afe541a00 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 00:32:57 -0400 Subject: [PATCH 044/124] Using entire valid dataset whitelist for testing --- dandi/tests/fixtures.py | 2 +- dandi/tests/test_validate.py | 58 +++++++++++++++++++----------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 84b0bce8b..96e1bd597 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -277,7 +277,7 @@ def fixture() -> Iterator[str]: nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data") bids_examples = get_filtered_gitrepo_fixture( - "https://github.com/dandi/bids-examples", + "https://github.com/bids-standard/bids-examples", BIDS_TESTDATA_SELECTION, ) bids_error_examples = get_gitrepo_fixture( diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index e445ea350..41aef93a4 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -1,40 +1,42 @@ -from glob import glob +# from glob import glob import os -import appdirs +# import appdirs import pytest +from .fixtures import BIDS_TESTDATA_SELECTION -def test_validate_bids(bids_examples, tmp_path): - from ..validate import validate_bids - # TEST DEFAULT REPORT PATH: - # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we - # implement light data cloning analogous to (`[1]` because `[0]` is .git): - # https://github.com/bids-standard/bids-specification/pull/1143 - selected_dataset = os.path.join(bids_examples, "asl003") - _ = validate_bids(selected_dataset, report=True) +@pytest.mark.parametrize("dataset", BIDS_TESTDATA_SELECTION) +def test_validate_bids(bids_examples, tmp_path, dataset): + from ..validate import validate_bids - # Check if a report is being produced. - pid = os.getpid() - log_dir = appdirs.user_log_dir("dandi-cli", "dandi") - # appdirs.user_log_dir("dandi-cli") - report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") - assert len(glob(report_expression)) == 1 + selected_dataset = os.path.join(bids_examples, dataset) + validation_result = validate_bids(selected_dataset, report=True) + for i in validation_result: + assert not hasattr(i, "severtiy") - # TEST CISTOM REPORT PATH: - # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we - # implement light data cloning analogous to (`[1]` because `[0]` is .git): - # https://github.com/bids-standard/bids-specification/pull/1143 - report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") - selected_dataset = os.path.join(bids_examples, "asl003") - _ = validate_bids( - selected_dataset, - report_path=report_path, - ) - # Check if a report is being produced. - assert len(glob(report_path)) == 1 +# # Check if a report is being produced. +# pid = os.getpid() +# log_dir = appdirs.user_log_dir("dandi-cli", "dandi") +# # appdirs.user_log_dir("dandi-cli") +# report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") +# assert len(glob(report_expression)) == 1 +# +# # TEST CUSTOM REPORT PATH: +# # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we +# # implement light data cloning analogous to (`[1]` because `[0]` is .git): +# # https://github.com/bids-standard/bids-specification/pull/1143 +# report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") +# selected_dataset = os.path.join(bids_examples, "asl003") +# _ = validate_bids( +# selected_dataset, +# report_path=report_path, +# ) +# +# # Check if a report is being produced. +# assert len(glob(report_path)) == 1 @pytest.mark.parametrize( From 74cc79737ec05f7fc698fd3537df49efc95d00f0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 00:36:16 -0400 Subject: [PATCH 045/124] Log creation test using whitelist instead of hard-coded dataset --- dandi/tests/test_validate.py | 42 +++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 41aef93a4..4d6280981 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -1,7 +1,7 @@ -# from glob import glob +from glob import glob import os -# import appdirs +import appdirs import pytest from .fixtures import BIDS_TESTDATA_SELECTION @@ -17,26 +17,24 @@ def test_validate_bids(bids_examples, tmp_path, dataset): assert not hasattr(i, "severtiy") -# # Check if a report is being produced. -# pid = os.getpid() -# log_dir = appdirs.user_log_dir("dandi-cli", "dandi") -# # appdirs.user_log_dir("dandi-cli") -# report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") -# assert len(glob(report_expression)) == 1 -# -# # TEST CUSTOM REPORT PATH: -# # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we -# # implement light data cloning analogous to (`[1]` because `[0]` is .git): -# # https://github.com/bids-standard/bids-specification/pull/1143 -# report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") -# selected_dataset = os.path.join(bids_examples, "asl003") -# _ = validate_bids( -# selected_dataset, -# report_path=report_path, -# ) -# -# # Check if a report is being produced. -# assert len(glob(report_path)) == 1 +def test_report_path(bids_examples, tmp_path): + from ..validate import validate_bids + + pid = os.getpid() + log_dir = appdirs.user_log_dir("dandi-cli", "dandi") + # appdirs.user_log_dir("dandi-cli") + report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") + assert len(glob(report_expression)) == 1 + + report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") + selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0]) + _ = validate_bids( + selected_dataset, + report_path=report_path, + ) + + # Check if a report is being produced. + assert len(glob(report_path)) == 1 @pytest.mark.parametrize( From db6a81fb22a154680d37d61a3d0c0565976fd17c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 00:40:58 -0400 Subject: [PATCH 046/124] Fixed cmd_validate tests --- dandi/cli/tests/test_cmd_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index fd448f6c2..173edba93 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -14,4 +14,4 @@ def test_validate_bids_error(bids_error_examples): assert r.exit_code == 1 # Does it report the issue correctly? - assert r.output == expected_error + assert expected_error in r.output From 715f70e604abf3d7e830dffc9e2348c497cb6ddc Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 15:56:08 -0400 Subject: [PATCH 047/124] Fixed report path test --- dandi/tests/test_validate.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 4d6280981..84ed73e11 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -1,7 +1,7 @@ from glob import glob import os -import appdirs +# import appdirs import pytest from .fixtures import BIDS_TESTDATA_SELECTION @@ -20,11 +20,10 @@ def test_validate_bids(bids_examples, tmp_path, dataset): def test_report_path(bids_examples, tmp_path): from ..validate import validate_bids - pid = os.getpid() - log_dir = appdirs.user_log_dir("dandi-cli", "dandi") - # appdirs.user_log_dir("dandi-cli") - report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") - assert len(glob(report_expression)) == 1 + # pid = os.getpid() + # log_dir = appdirs.user_log_dir("dandi-cli", "dandi") + # report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") + # assert len(glob(report_expression)) == 1 report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0]) From fb5847c5d5146cebd17c0b230751aa9237639861 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 16 Sep 2022 17:11:49 -0400 Subject: [PATCH 048/124] Testing the command line for all error cases --- dandi/cli/tests/test_cmd_validate.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 173edba93..eda2376fc 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -1,17 +1,26 @@ import os from click.testing import CliRunner +import pytest -def test_validate_bids_error(bids_error_examples): +@pytest.mark.parametrize( + "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"] +) +def test_validate_bids_error(bids_error_examples, dataset): + import json + from ..cmd_validate import validate_bids - expected_error = "File does not match any pattern known to BIDS.\n" + # expected_error = "File does not match any pattern known to BIDS.\n" - broken_dataset = os.path.join(bids_error_examples, "invalid_pet001") + broken_dataset = os.path.join(bids_error_examples, dataset) + with open(os.path.join(broken_dataset, ".ERRORS.json")) as f: + expected_errors = json.load(f) r = CliRunner().invoke(validate_bids, [broken_dataset]) # Does it break? assert r.exit_code == 1 - # Does it report the issue correctly? - assert expected_error in r.output + # Does it detect all errors? + for key in expected_errors: + assert key in r.output From e0027c3088cf95e08445ab5a97a81e45f59c3f24 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Sat, 17 Sep 2022 01:10:20 -0400 Subject: [PATCH 049/124] Removed deprecated comments --- dandi/cli/cmd_validate.py | 1 - dandi/cli/tests/test_cmd_validate.py | 2 -- dandi/tests/test_validate.py | 5 ----- 3 files changed, 8 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 4f3c6b772..089644c70 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -36,7 +36,6 @@ def validate_bids( dandi validate-bids /my/path """ - # from ..bids_utils import print_validation_results from ..validate import Severity from ..validate import validate_bids as validate_bids_ diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index eda2376fc..095d21907 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -12,8 +12,6 @@ def test_validate_bids_error(bids_error_examples, dataset): from ..cmd_validate import validate_bids - # expected_error = "File does not match any pattern known to BIDS.\n" - broken_dataset = os.path.join(bids_error_examples, dataset) with open(os.path.join(broken_dataset, ".ERRORS.json")) as f: expected_errors = json.load(f) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 84ed73e11..111196975 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -20,11 +20,6 @@ def test_validate_bids(bids_examples, tmp_path, dataset): def test_report_path(bids_examples, tmp_path): from ..validate import validate_bids - # pid = os.getpid() - # log_dir = appdirs.user_log_dir("dandi-cli", "dandi") - # report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log") - # assert len(glob(report_expression)) == 1 - report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0]) _ = validate_bids( From 453cd65c31e6c7bad63fd5b3623d39e7a9a5ec19 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Sat, 17 Sep 2022 01:13:06 -0400 Subject: [PATCH 050/124] Populating dandi and bids dataset paths --- dandi/validate.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index 043dddeae..3d9841dfb 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -143,7 +143,10 @@ def validate_bids( for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} - # Top level files do not have any other metadata other than path. + dataset_path = _get_set_path(file_path, "dataset_description.json") + dandiset_path = _get_set_path(file_path, "dandiset.yaml") + # Top level files do not have any other metadata other than path, + # which we pop and put in the object... if not meta: meta = { "subject": None, @@ -155,6 +158,8 @@ def validate_bids( scope=Scope.FILE, path=Path(file_path), metadata=meta, + dataset_path=dataset_path, + dandiset_path=dandiset_path, ) ) From 786729acc5b1288b7625dac108a125a89c675637 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 19 Sep 2022 14:27:28 -0400 Subject: [PATCH 051/124] Simple fixes suggested by jwodder --- dandi/files/bids.py | 8 ++------ dandi/tests/fixtures.py | 4 ++-- dandi/tests/test_validate.py | 10 +++------- dandi/validate.py | 8 +++----- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 1a174720c..3827ea9cc 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -15,12 +15,8 @@ from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest -BIDS_ASSET_ERRORS = [ - "BIDS.NON_BIDS_PATH_PLACEHOLDER", -] -BIDS_DATASET_ERRORS = [ - "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", -] +BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) +BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) @dataclass diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 96e1bd597..ba2037e88 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -263,8 +263,8 @@ def fixture() -> Iterator[str]: # cwd specification is VERY important, not only to achieve the correct # effects, but also to avoid dropping files from your repository if you # were to run `git sparse-checkout` inside the software repo. - _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path) - _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path) + run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True) + run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True) yield path finally: try: diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 111196975..d2fc6d8d4 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -1,15 +1,16 @@ from glob import glob +import json import os +import pathlib -# import appdirs import pytest from .fixtures import BIDS_TESTDATA_SELECTION +from ..validate import validate_bids @pytest.mark.parametrize("dataset", BIDS_TESTDATA_SELECTION) def test_validate_bids(bids_examples, tmp_path, dataset): - from ..validate import validate_bids selected_dataset = os.path.join(bids_examples, dataset) validation_result = validate_bids(selected_dataset, report=True) @@ -18,7 +19,6 @@ def test_validate_bids(bids_examples, tmp_path, dataset): def test_report_path(bids_examples, tmp_path): - from ..validate import validate_bids report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log") selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0]) @@ -37,10 +37,6 @@ def test_report_path(bids_examples, tmp_path): def test_validate_bids_errors(bids_error_examples, dataset): # This only checks that the error we found is correct, not that we found all errors. # ideally make a list and erode etc. - import json - import pathlib - - from ..validate import validate_bids selected_dataset = os.path.join(bids_error_examples, dataset) validation_result = validate_bids(selected_dataset, report=True) diff --git a/dandi/validate.py b/dandi/validate.py index 3d9841dfb..65b0ca272 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -23,10 +23,10 @@ class ValidationResult: asset_paths: Optional[list[str]] = None dandiset_path: Optional[Path] = None dataset_path: Optional[Path] = None - message: str = "" + message: Optional[str] = None metadata: Optional[dict] = None path: Optional[Path] = None - path_regex: Optional[str] = "" + path_regex: Optional[str] = None severity: Optional[Severity] = None @@ -124,9 +124,7 @@ def validate_bids( for pattern in validation_result["schema_tracking"]: # Future proofing for standard-compliant name. - if ("mandatory" in pattern and pattern["mandatory"]) or ( - "required" in pattern and pattern["required"] - ): + if pattern.get("mandatory") or pattern.get("required"): # We don't have a path for this so we'll need some external logic to make sure # that the dataset path is populated. # dataset_path = _get_dataset_path(path, paths) From d0070bd6b3b54b63390129475a0a206b7221e466 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 20 Sep 2022 08:42:06 -0400 Subject: [PATCH 052/124] Fixed typing --- dandi/files/bids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 3827ea9cc..39105ddf3 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -71,8 +71,10 @@ def _validate(self) -> None: for result in results: if result.id in BIDS_ASSET_ERRORS: assert result.path + assert result.message self._asset_errors[str(result.path)].append(result.message) elif result.id in BIDS_DATASET_ERRORS: + assert result.message self._dataset_errors.append(result.message) elif result.id == "BIDS.MATCH": assert result.path From b205aafd0820c48202020274d11370372626175b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 21 Sep 2022 11:41:42 -0400 Subject: [PATCH 053/124] Correcting type --- dandi/tests/fixtures.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index ba2037e88..ec803e44a 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -234,14 +234,15 @@ def fixture() -> Iterator[str]: def get_filtered_gitrepo_fixture( + tmp_path, url: str, whitelist: List[str], ) -> Callable[[], Iterator[str]]: @pytest.fixture(scope="session") - def fixture() -> Iterator[str]: + def fixture() -> Iterator[Path]: skipif.no_network() skipif.no_git() - path = tempfile.mktemp() # not using pytest's tmpdir fixture to not + path = tmp_path # not using pytest's tmpdir fixture to not # collide in different scopes etc. But we # would need to remove it ourselves try: @@ -269,7 +270,7 @@ def fixture() -> Iterator[str]: finally: try: shutil.rmtree(path) - except BaseException as exc: + except Exception as exc: lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) return fixture @@ -277,8 +278,8 @@ def fixture() -> Iterator[str]: nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data") bids_examples = get_filtered_gitrepo_fixture( - "https://github.com/bids-standard/bids-examples", - BIDS_TESTDATA_SELECTION, + url="https://github.com/bids-standard/bids-examples", + whitelist=BIDS_TESTDATA_SELECTION, ) bids_error_examples = get_gitrepo_fixture( "https://github.com/bids-standard/bids-error-examples" From d04f4bda403dbc0fb9774c7a721536c5f63a9380 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 20 Sep 2022 14:16:34 -0400 Subject: [PATCH 054/124] Using pytest temp path generation --- dandi/tests/fixtures.py | 56 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index ec803e44a..42715df7d 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -239,39 +239,33 @@ def get_filtered_gitrepo_fixture( whitelist: List[str], ) -> Callable[[], Iterator[str]]: @pytest.fixture(scope="session") - def fixture() -> Iterator[Path]: + def fixture( + tmp_path_factory: pytest.TempPathFactory, + ) -> Iterator[str]: skipif.no_network() skipif.no_git() - path = tmp_path # not using pytest's tmpdir fixture to not - # collide in different scopes etc. But we - # would need to remove it ourselves - try: - lgr.debug("Cloning %r into %r", url, path) - runout = run( - [ - "git", - "clone", - "--depth=1", - "--filter=blob:none", - "--sparse", - url, - path, - ], - capture_output=True, - ) - if runout.returncode: - raise RuntimeError(f"Failed to clone {url} into {path}") - # cwd specification is VERY important, not only to achieve the correct - # effects, but also to avoid dropping files from your repository if you - # were to run `git sparse-checkout` inside the software repo. - run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True) - run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True) - yield path - finally: - try: - shutil.rmtree(path) - except Exception as exc: - lgr.warning("Failed to remove %s - using Windows?: %s", path, exc) + path = tmp_path_factory.mktemp("gitrepo") + lgr.debug("Cloning %r into %r", url, path) + runout = run( + [ + "git", + "clone", + "--depth=1", + "--filter=blob:none", + "--sparse", + url, + str(path), + ], + capture_output=True, + ) + if runout.returncode: + raise RuntimeError(f"Failed to clone {url} into {path}") + # cwd specification is VERY important, not only to achieve the correct + # effects, but also to avoid dropping files from your repository if you + # were to run `git sparse-checkout` inside the software repo. + run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True) + run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True) + yield path return fixture From 7d21f4ed8990f045813a02f9eeab0e86c5ac1da0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 21 Sep 2022 11:51:03 -0400 Subject: [PATCH 055/124] Corrected function parameters --- dandi/tests/fixtures.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 42715df7d..bc000ba64 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -234,10 +234,9 @@ def fixture() -> Iterator[str]: def get_filtered_gitrepo_fixture( - tmp_path, url: str, whitelist: List[str], -) -> Callable[[], Iterator[str]]: +) -> Callable[[pytest.TempPathFactory], Iterator[str]]: @pytest.fixture(scope="session") def fixture( tmp_path_factory: pytest.TempPathFactory, From 4ba4b7b0905379dc61935155735a710ee40c1dae Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 16:57:09 -0400 Subject: [PATCH 056/124] More parsimonious error reporting --- dandi/cli/cmd_validate.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 089644c70..b0b24f401 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -54,17 +54,16 @@ def validate_bids( else: scope = i.dataset_path echo_string = f"[{i.id}] {scope}: {i.message}" + i_fg = "" if i.severity == Severity.ERROR: + i_fg = "red" + elif i.severity == Severity.WARNING: + i_fg = "orange" + if i_fg: click.secho( echo_string, bold=True, - fg="red", - ) - if i.severity == Severity.WARNING: - click.secho( - echo_string, - bold=True, - fg="orange", + fg=i_fg, ) validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] From 23e54be748da0fbf4135d8b63e868b90cf66e99e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 16:58:52 -0400 Subject: [PATCH 057/124] Global import --- dandi/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index 65b0ca272..6655b8e07 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -6,6 +6,7 @@ from typing import Iterator, List, Optional, Tuple, Union import appdirs +import os from .files import find_dandi_files @@ -174,7 +175,6 @@ def _get_set_path(in_path, marker): marker : str, optional Filename marker which identifies the set root. """ - import os candidate = os.path.join(in_path, marker) # Windows support... otherwise we could do `if in_path == "/"`. From 28e7374c1a083b73e49751ffd28fe1d3bbd8fbfd Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 17:04:48 -0400 Subject: [PATCH 058/124] Using built-in find_parent_directory_containing function --- dandi/validate.py | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 6655b8e07..0df8f0533 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -9,6 +9,7 @@ import os from .files import find_dandi_files +from .utils import find_parent_directory_containing BIDS_TO_DANDI = { "subject": "subject_id", @@ -108,8 +109,8 @@ def validate_bids( # https://github.com/bids-standard/bids-specification/issues/1272 if path.endswith((".ERRORS", ".ERRORS.json")): continue - dataset_path = _get_set_path(path, "dataset_description.json") - dandiset_path = _get_set_path(path, "dandiset.yaml") + dataset_path = find_parent_directory_containing("dataset_description.json", path) + dandiset_path = find_parent_directory_containing("dandiset.yaml", path) our_validation_result.append( ValidationResult( origin=origin, @@ -128,7 +129,7 @@ def validate_bids( if pattern.get("mandatory") or pattern.get("required"): # We don't have a path for this so we'll need some external logic to make sure # that the dataset path is populated. - # dataset_path = _get_dataset_path(path, paths) + # dataset_path = find_parent_directory_containing(paths, path) our_validation_result.append( ValidationResult( origin=origin, @@ -142,8 +143,8 @@ def validate_bids( for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} - dataset_path = _get_set_path(file_path, "dataset_description.json") - dandiset_path = _get_set_path(file_path, "dandiset.yaml") + dataset_path = find_parent_directory_containing("dataset_description.json", file_path) + dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path) # Top level files do not have any other metadata other than path, # which we pop and put in the object... if not meta: @@ -165,28 +166,6 @@ def validate_bids( return our_validation_result -def _get_set_path(in_path, marker): - """Get the path to the root of a file set (e.g. BIDS dataset or DANDIset). - - Parameters - ---------- - in_path : str, optional - File for which to determine set path. - marker : str, optional - Filename marker which identifies the set root. - """ - - candidate = os.path.join(in_path, marker) - # Windows support... otherwise we could do `if in_path == "/"`. - if in_path == "/" or not any(i in in_path for i in ["/", "\\"]): - return None - if os.path.isfile(candidate): - return os.path.dirname(candidate) - else: - level_up = os.path.dirname(in_path.rstrip("/\\")) - return _get_set_path(level_up, marker) - - def validate( *paths: str, schema_version: Optional[str] = None, From 3702eacb74ef222d926a8ef029f9d411e017a0d0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 17:07:19 -0400 Subject: [PATCH 059/124] Dropped unused import --- dandi/validate.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 0df8f0533..d4f369206 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -6,7 +6,6 @@ from typing import Iterator, List, Optional, Tuple, Union import appdirs -import os from .files import find_dandi_files from .utils import find_parent_directory_containing @@ -147,10 +146,6 @@ def validate_bids( dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path) # Top level files do not have any other metadata other than path, # which we pop and put in the object... - if not meta: - meta = { - "subject": None, - } our_validation_result.append( ValidationResult( origin=origin, From 2d9723ccc0f0ba5df789b14c8d5a75822615458a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 17:37:12 -0400 Subject: [PATCH 060/124] Re-added placeholder metadata to satisfy assert --- dandi/validate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dandi/validate.py b/dandi/validate.py index d4f369206..819e9d02a 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -146,6 +146,10 @@ def validate_bids( dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path) # Top level files do not have any other metadata other than path, # which we pop and put in the object... + if not meta: + meta = { + "subject": None, + } our_validation_result.append( ValidationResult( origin=origin, From 61e5d3a22199e46ff059e9973b2743684dc7b4c5 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 18:20:05 -0400 Subject: [PATCH 061/124] No longer recomputing dandi/data-set path for files in same directory --- dandi/validate.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 819e9d02a..4beffdeb0 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -6,6 +6,7 @@ from typing import Iterator, List, Optional, Tuple, Union import appdirs +import os from .files import find_dandi_files from .utils import find_parent_directory_containing @@ -103,13 +104,17 @@ def validate_bids( version=bidsschematools.__version__, ) + # Storing variable to not re-compute set paths for each individual file. + parent_path = False for path in validation_result["path_tracking"]: # Hard-coding exclusion here pending feature + release in: # https://github.com/bids-standard/bids-specification/issues/1272 if path.endswith((".ERRORS", ".ERRORS.json")): continue - dataset_path = find_parent_directory_containing("dataset_description.json", path) - dandiset_path = find_parent_directory_containing("dandiset.yaml", path) + if parent_path != os.path.dirname(path): + parent_path = os.path.dirname(path) + dataset_path = find_parent_directory_containing("dataset_description.json", parent_path) + dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path) our_validation_result.append( ValidationResult( origin=origin, @@ -139,9 +144,16 @@ def validate_bids( message="BIDS-required file is not present.", ) ) + + # Storing variable to not re-compute set paths for each individual file. + parent_path = False for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} + if parent_path != os.path.dirname(file_path): + parent_path = os.path.dirname(file_path) + dataset_path = find_parent_directory_containing("dataset_description.json", parent_path) + dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path) dataset_path = find_parent_directory_containing("dataset_description.json", file_path) dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path) # Top level files do not have any other metadata other than path, From 17113543e3d21d9d95f93dcf6c07373a06aef9f0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 26 Sep 2022 19:43:47 -0400 Subject: [PATCH 062/124] Trying to make sure typechecking understands logic --- dandi/validate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 4beffdeb0..e060b5cc8 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -105,7 +105,7 @@ def validate_bids( ) # Storing variable to not re-compute set paths for each individual file. - parent_path = False + parent_path = None for path in validation_result["path_tracking"]: # Hard-coding exclusion here pending feature + release in: # https://github.com/bids-standard/bids-specification/issues/1272 @@ -146,7 +146,7 @@ def validate_bids( ) # Storing variable to not re-compute set paths for each individual file. - parent_path = False + parent_path = None for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} From 122581ad47e3fbf22747b7773ea172a940484464 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 27 Sep 2022 14:50:38 -0400 Subject: [PATCH 063/124] Removed redundant code and black fixes --- dandi/validate.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index e060b5cc8..6d61a61aa 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -2,11 +2,11 @@ from dataclasses import dataclass from enum import Enum +import os from pathlib import Path from typing import Iterator, List, Optional, Tuple, Union import appdirs -import os from .files import find_dandi_files from .utils import find_parent_directory_containing @@ -113,8 +113,12 @@ def validate_bids( continue if parent_path != os.path.dirname(path): parent_path = os.path.dirname(path) - dataset_path = find_parent_directory_containing("dataset_description.json", parent_path) - dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path) + dataset_path = find_parent_directory_containing( + "dataset_description.json", parent_path + ) + dandiset_path = find_parent_directory_containing( + "dandiset.yaml", parent_path + ) our_validation_result.append( ValidationResult( origin=origin, @@ -152,10 +156,12 @@ def validate_bids( meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} if parent_path != os.path.dirname(file_path): parent_path = os.path.dirname(file_path) - dataset_path = find_parent_directory_containing("dataset_description.json", parent_path) - dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path) - dataset_path = find_parent_directory_containing("dataset_description.json", file_path) - dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path) + dataset_path = find_parent_directory_containing( + "dataset_description.json", parent_path + ) + dandiset_path = find_parent_directory_containing( + "dandiset.yaml", parent_path + ) # Top level files do not have any other metadata other than path, # which we pop and put in the object... if not meta: From e25375bce4ca29c91880372ac42d1dfa091a0395 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 27 Sep 2022 14:53:27 -0400 Subject: [PATCH 064/124] No placeholder metadata --- dandi/files/bids.py | 2 +- dandi/validate.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 39105ddf3..d7ca279b1 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -79,7 +79,7 @@ def _validate(self) -> None: elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() - assert result.metadata + assert result.metadata is not None self._asset_metadata[bids_path] = prepare_metadata( result.metadata ) diff --git a/dandi/validate.py b/dandi/validate.py index 6d61a61aa..dc8bb5471 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -162,12 +162,6 @@ def validate_bids( dandiset_path = find_parent_directory_containing( "dandiset.yaml", parent_path ) - # Top level files do not have any other metadata other than path, - # which we pop and put in the object... - if not meta: - meta = { - "subject": None, - } our_validation_result.append( ValidationResult( origin=origin, From e292718c86ae0cf56ccccc5ebf15c98ab4475e4e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 27 Sep 2022 21:49:44 -0400 Subject: [PATCH 065/124] Unified error reporting across validation types also removed part of error reporting which was reporting valid files, let me know if you need it back. --- dandi/cli/cmd_validate.py | 43 +++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index b0b24f401..4700fc33b 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -3,7 +3,9 @@ import click -from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions +from .base import devel_debug_option, devel_option, map_to_click_exceptions +from ..utils import pluralize +from ..validate import Severity @click.command() @@ -36,7 +38,6 @@ def validate_bids( dandi validate-bids /my/path """ - from ..validate import Severity from ..validate import validate_bids as validate_bids_ validator_result = validate_bids_( # Controller @@ -47,24 +48,15 @@ def validate_bids( ) for i in validator_result: + if not i.severity: + continue if i.path: scope = i.path elif i.path_regex: scope = i.path_regex else: scope = i.dataset_path - echo_string = f"[{i.id}] {scope}: {i.message}" - i_fg = "" - if i.severity == Severity.ERROR: - i_fg = "red" - elif i.severity == Severity.WARNING: - i_fg = "orange" - if i_fg: - click.secho( - echo_string, - bold=True, - fg=i_fg, - ) + display_errors(scope, [i.id], [i.severity], [i.message]) validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] @@ -155,10 +147,21 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False): ) -def display_errors(path, errors): - if not errors: - lgr.info("%s: ok", path) +def display_errors(scope, errors, severities=[], messages=[]): + if Severity.ERROR in severities: + fg = "red" + elif Severity.WARNING in severities: + fg = "orange" else: - lgr.error("%s: %d error(s)", path, len(errors)) - for error in errors: - lgr.error(" %s", error) + fg = "blue" + summary = f"{scope}: {pluralize(len(errors), 'issue')} detected." + click.secho(summary, bold=True, fg="red") + for error, severity, message in zip(errors, severities, messages): + if severity == Severity.ERROR: + fg = "red" + elif severity == Severity.WARNING: + fg = "green" + else: + fg = "blue" + error_message = f" [{error}] {message}" + click.secho(error_message, fg=fg) From c4a36ca2db773cd0451ff944661b4623b43b351f Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 27 Sep 2022 22:28:00 -0400 Subject: [PATCH 066/124] Improve style --- dandi/cli/cmd_validate.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 4700fc33b..a90a5fc55 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -108,7 +108,10 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False): ): nfiles += 1 if view == "one-at-a-time": - display_errors(path, errors) + if errors: + display_errors( + path, ["NWBError"] * len(errors), [""] * len(errors), errors + ) all_files_errors[path] = errors if view == "groupped": @@ -155,7 +158,7 @@ def display_errors(scope, errors, severities=[], messages=[]): else: fg = "blue" summary = f"{scope}: {pluralize(len(errors), 'issue')} detected." - click.secho(summary, bold=True, fg="red") + click.secho(summary, fg=fg) for error, severity, message in zip(errors, severities, messages): if severity == Severity.ERROR: fg = "red" From cf34de38d36ab190a15c56be5019296fe771d339 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 27 Sep 2022 22:52:09 -0400 Subject: [PATCH 067/124] Import dandi.validate.Severity in local scope to prevent heavy dep load --- dandi/cli/cmd_validate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index a90a5fc55..e4fa2a255 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -5,7 +5,6 @@ from .base import devel_debug_option, devel_option, map_to_click_exceptions from ..utils import pluralize -from ..validate import Severity @click.command() @@ -38,6 +37,7 @@ def validate_bids( dandi validate-bids /my/path """ + from ..validate import Severity from ..validate import validate_bids as validate_bids_ validator_result = validate_bids_( # Controller @@ -151,6 +151,8 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False): def display_errors(scope, errors, severities=[], messages=[]): + from ..validate import Severity + if Severity.ERROR in severities: fg = "red" elif Severity.WARNING in severities: From e3636704cf24bb1973b5cec7f5d3c2b896c06d36 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 29 Sep 2022 05:51:25 -0400 Subject: [PATCH 068/124] Unified error reporting logic and draft grouping implementation. --- dandi/cli/cmd_validate.py | 183 +++++++++++++++++++++++++++----------- 1 file changed, 133 insertions(+), 50 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index e4fa2a255..ea5d56836 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -21,6 +21,11 @@ is_flag=True, help="Whether to write a report under a unique path in the DANDI log directory.", ) +@click.option( + "--grouping", + "-g", + help="Write report under path, this option implies `--report/-r`.", +) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @map_to_click_exceptions def validate_bids( @@ -28,9 +33,16 @@ def validate_bids( schema, report, report_path, + grouping=None, ): """Validate BIDS paths. + Parameters + ---------- + + grouping : str, optional + A string which is either "", "error", or "path" — "error" implemented. + Notes ----- Used from bash, eg: @@ -47,18 +59,40 @@ def validate_bids( schema_version=schema, ) + issues = [] for i in validator_result: if not i.severity: continue - if i.path: - scope = i.path - elif i.path_regex: - scope = i.path_regex else: - scope = i.dataset_path - display_errors(scope, [i.id], [i.severity], [i.message]) + issues.append(i) + + purviews = [ + list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues + ] + if not grouping: + display_errors( + purviews, + [i.id for i in issues], + [i.severity for i in issues], + [i.message for i in issues], + ) + elif grouping == "path": + for purview in purviews: + applies_to = [ + i for i in issues if purview in [i.path, i.path_regex, i.dataset_path] + ] + display_errors( + [purview], + [i.id for i in applies_to], + [i.severity for i in applies_to], + [i.message for i in applies_to], + ) + else: + raise NotImplementedError( + "The `grouping` parameter values currently supported are " "path or None." + ) - validation_errors = [e for e in validator_result if e.severity == Severity.ERROR] + validation_errors = [i for i in issues if i.severity == Severity.ERROR] if validation_errors: raise SystemExit(1) @@ -75,10 +109,18 @@ def validate_bids( @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @devel_debug_option() @map_to_click_exceptions -def validate(paths, schema=None, devel_debug=False, allow_any_path=False): +def validate( + paths, schema=None, devel_debug=False, allow_any_path=False, grouping=None +): """Validate files for NWB and DANDI compliance. Exits with non-0 exit code if any file is not compliant. + + Parameters + ---------- + + grouping : str, optional + A string which is either "", "error", or "path" — "error" not yet implemented. """ from ..pynwb_utils import ignore_benign_pynwb_warnings from ..validate import validate as validate_ @@ -96,42 +138,35 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False): # at this point, so we ignore it although ideally there should be a formal # way to get relevant warnings (not errors) from PyNWB ignore_benign_pynwb_warnings() - view = "one-at-a-time" # TODO: rename, add groupped - all_files_errors = {} + all_files_error_messages = {} nfiles = 0 - for path, errors in validate_( + for path, messages in validate_( *paths, schema_version=schema, devel_debug=devel_debug, allow_any_path=allow_any_path, ): nfiles += 1 - if view == "one-at-a-time": - if errors: - display_errors( - path, ["NWBError"] * len(errors), [""] * len(errors), errors - ) - all_files_errors[path] = errors - - if view == "groupped": - # TODO: Most likely we want to summarize errors across files since they - # are likely to be similar - # TODO: add our own criteria for validation (i.e. having needed metadata) - - # # can't be done since fails to compare different types of errors - # all_errors = sum(errors.values(), []) - # all_error_types = [] - # errors_unique = sorted(set(all_errors)) - # from collections import Counter - # # Let's make it - # print( - # "{} unique errors in {} files".format( - # len(errors_unique), len(errors)) - # ) - raise NotImplementedError("TODO") - - files_with_errors = [f for f, errors in all_files_errors.items() if errors] + if not grouping: + error_paths = [path] * len(messages) + errors = ["NWBError"] * len(messages) + severities = [""] * len(messages) + display_errors(error_paths, errors, severities, messages) + elif grouping == "path": + error_paths = [path] + errors = ["NWBError"] * len(messages) + severities = [""] * len(messages) + display_errors(error_paths, errors, severities, messages) + else: + raise NotImplementedError( + "The `grouping` parameter values currently supported are " + "path or None." + ) + + all_files_error_messages[path] = messages + + files_with_errors = [f for f, errors in all_files_error_messages.items() if errors] if files_with_errors: click.secho( @@ -150,23 +185,71 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False): ) -def display_errors(scope, errors, severities=[], messages=[]): +def _get_severity_color(severities): from ..validate import Severity if Severity.ERROR in severities: - fg = "red" + return "red" elif Severity.WARNING in severities: - fg = "orange" + return "green" else: - fg = "blue" - summary = f"{scope}: {pluralize(len(errors), 'issue')} detected." - click.secho(summary, fg=fg) - for error, severity, message in zip(errors, severities, messages): - if severity == Severity.ERROR: - fg = "red" - elif severity == Severity.WARNING: - fg = "green" - else: - fg = "blue" - error_message = f" [{error}] {message}" + return "blue" + + +def display_errors(purviews, errors, severities, messages): + """ + Unified error display for validation CLI, which auto-resolves grouping logic based on + the length of input lists. + + Parameters + ---------- + purviews: list of str + errors: list of str + severities: list of dandi.validate.Severity + messages: list of str + + + Notes + ----- + * There is a bit of roundabout and currently untestable logic to deal with potential cases + where the same error has multiple error message, could be removed in the future and removed + by assert if this won't ever be the case. + """ + + if all([len(i) == 1 for i in [purviews, errors, severities, messages]]): + fg = _get_severity_color(severities) + error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}" click.secho(error_message, fg=fg) + elif len(purviews) == 1: + group_message = f"{purviews[0]}: {pluralize(len(errors), 'issue')} detected." + fg = _get_severity_color(severities) + click.secho(group_message, fg=fg) + for error, severity, message in zip(errors, severities, messages): + error_message = f" [{error}] {message}" + fg = _get_severity_color([severity]) + click.secho(error_message, fg=fg) + elif len(errors) == 1: + fg = _get_severity_color(severities) + group_message = ( + f"{errors[0]}: detected in {pluralize(len(purviews), 'purviews')}" + ) + if len(set(messages)) == 1: + error_message += f" — {messages[0]}." + click.secho(group_message, fg=fg) + for purview, severity in zip(purviews, severities): + error_message = f" {purview}" + fg = _get_severity_color([severity]) + click.secho(error_message, fg=fg) + else: + error_message += "." + for purview, severity, message in zip(purviews, severities, messages): + error_message = f" {purview} — {message}" + fg = _get_severity_color([severity]) + click.secho(error_message, fg=fg) + else: + for purview, error, severity, message in zip( + purviews, errors, severities, messages + ): + fg = _get_severity_color([severity]) + error_message = f"[{error}] {purview} — {message}" + click.secho(error_message, fg=fg) From 6a56acdd34aeff6b51b32ef2e4ba01e4fc105f8a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 29 Sep 2022 13:33:04 -0400 Subject: [PATCH 069/124] Corrected warning color --- dandi/cli/cmd_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index ea5d56836..d105d2a0f 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -191,7 +191,7 @@ def _get_severity_color(severities): if Severity.ERROR in severities: return "red" elif Severity.WARNING in severities: - return "green" + return "yellow" else: return "blue" From b4ad1bcfe62fc3ff5c47797ba7f3ef5ad3f3cab8 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 29 Sep 2022 15:21:21 -0400 Subject: [PATCH 070/124] =?UTF-8?q?BROKEN=20=E2=80=94=20reusage=20of=20gen?= =?UTF-8?q?eric=20validator=20output=20for=20NWB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dandi/files/bases.py | 75 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index d43bf41a9..33dbfa1b7 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -19,6 +19,7 @@ from dandischema.models import get_schema_version from etelemetry import get_project from nwbinspector import Importance, inspect_nwb, load_config +import nwbinspector.__version__ as nwbinspector_version from nwbinspector.utils import get_package_version from packaging.version import Version from pydantic import ValidationError @@ -31,6 +32,7 @@ from dandi.pynwb_utils import validate as pynwb_validate from dandi.support.digests import get_dandietag, get_digest from dandi.utils import yaml_load +from dandi.validate import Scope, Severity, ValidationOrigin, ValidationResult lgr = get_logger() @@ -38,6 +40,15 @@ _required_dandiset_metadata_fields = ["identifier", "name", "description"] +NWB_IMPORTANCE_TO_DANDI_SEVERITY = { + "ERROR": Severity.ERROR, + "PYNWB_VALIDATION": Severity.ERROR, + "CRITICAL": Severity.ERROR, # when using --config dandi + "BEST_PRACTICE_VIOLATION": Severity.WARNING, + "BEST_PRACTICE_SUGGESTION": Severity.HINT, +} + + @dataclass # type: ignore[misc] # class DandiFile(ABC): """Abstract base class for local files & directories of interest to DANDI""" @@ -463,6 +474,8 @@ def get_validation_errors( devel_debug: bool = False, ) -> list[str]: errors: list[str] = pynwb_validate(self.filepath, devel_debug=devel_debug) + # Q gh-943 do we want to manage this thing as an error? + # It should be a separate thing possibly via logging IMHO. if schema_version is not None: errors.extend( super().get_validation_errors( @@ -473,15 +486,23 @@ def get_validation_errors( # make sure that we have some basic metadata fields we require try: # Ensure latest version of NWB Inspector is installed and used client-side + # Q gh-943 + # Are you sure we want to do this? Maybe some version-selection + # logic analogous to BIDS would be better, so people can prospectively specify + # what they used.... even if for the time being we're just chasing upstream. try: max_version = Version( get_project(repo="NeurodataWithoutBorders/nwbinspector")[ "version" ] ) + # Any reason we're not using `nwbinspector.__version__` ? current_version = get_package_version(name="nwbinspector") if current_version < max_version: + # Q gh-943 do we want to manage this thing as an error? + # It should be a separate thing possibly via logging IMHO. + # commenting out temporarily for tests errors.append( f"NWB Inspector version {current_version} is installed - please " f"use the latest release of the NWB Inspector ({max_version}) " @@ -496,20 +517,50 @@ def get_validation_errors( type(e).__name__, str(e), ) + origin = ValidationOrigin( + name="nwbinspector", + version=nwbinspector_version, + ) - # Run NWB Inspector with 'dandi' config - # CRITICAL errors and above are equivalent to validation errors - errors.extend( - [ - error.message - for error in inspect_nwb( - nwbfile_path=self.filepath, - skip_validate=True, - config=load_config(filepath_or_keyword="dandi"), - importance_threshold=Importance.CRITICAL, + scope = ( + Scope.FILE + ) # There's technically one folder-level thing but it gets registered at file level + for error in inspect_nwb( + nwbfile_path=self.filepath, + skip_validate=True, + config=load_config(filepath_or_keyword="dandi"), + importance_threshold=Importance.CRITICAL, + ): + id = error.check_function_name + asset_paths = [error.file_path] + # Assuming multiple sessions per multiple subjects, otherwise nesting level + # might differ + dandiset_path = error.file_path.parent.parent + dataset_path = error.dandiset_path + metadata = None + # assuming this is HDF5 Group information within the file + path = error.location + # unfortunate name collision; the InspectorMessage.severity is used only for + # minor ordering of final report - will have to remove the + # `importance_treshold` currently used to filter DANDI-level CRITICAL + # evaluations as errors vs. validation results + severity = NWB_IMPORTANCE_TO_DANDI_SEVERITY[error.importance] + message = error.message + + errors.append( + ValidationResult( + asset_paths=asset_paths, + origin=origin, + severity=severity, + id=id, + scope=scope, + metadata=metadata, + path=path, + message=message, + dataset_path=dataset_path, + dandiset_path=dandiset_path, ) - ] - ) + ) except Exception as e: if devel_debug: raise From de27873cfba0bf5fd1c1a85527cfc570292d5f78 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 29 Sep 2022 16:41:53 -0400 Subject: [PATCH 071/124] RF+BF: address circular import, unify more of validations into ValidationResults TODO: pretty much go through all other instances were "str" is returned for an error. Most of them could be caught up via `tox -e typing` or just inspecting other code paths. We also raised following concerns/topics for possible future development - do not return error structs for exception handling where we catch errors from opening files etc. But may be there was a reason why we decided to work with it this way instead of just handling exceptions. - there is .location (within .nwb / hdf5 file) to point more specifically within nwb file internal structure. We need to provide location for that in our data structure. But as we have asset_paths, we might want to do that as a dict. --- dandi/cli/cmd_validate.py | 6 +- dandi/files/bases.py | 122 ++++++++++++++++++++++---------------- dandi/pynwb_utils.py | 6 +- dandi/validate.py | 36 +---------- dandi/validate_types.py | 46 ++++++++++++++ 5 files changed, 126 insertions(+), 90 deletions(-) create mode 100644 dandi/validate_types.py diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index d105d2a0f..d1b3351ea 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -49,8 +49,8 @@ def validate_bids( dandi validate-bids /my/path """ - from ..validate import Severity from ..validate import validate_bids as validate_bids_ + from ..validate_types import Severity validator_result = validate_bids_( # Controller *paths, @@ -186,7 +186,7 @@ def validate( def _get_severity_color(severities): - from ..validate import Severity + from ..validate_types import Severity if Severity.ERROR in severities: return "red" @@ -205,7 +205,7 @@ def display_errors(purviews, errors, severities, messages): ---------- purviews: list of str errors: list of str - severities: list of dandi.validate.Severity + severities: list of dandi.validate_types.Severity messages: list of str diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 33dbfa1b7..12db75990 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -13,18 +13,19 @@ from typing import Any, BinaryIO, Generic, Optional from xml.etree.ElementTree import fromstring +import dandischema from dandischema.digests.dandietag import DandiETag from dandischema.models import BareAsset, CommonModel from dandischema.models import Dandiset as DandisetMeta from dandischema.models import get_schema_version from etelemetry import get_project from nwbinspector import Importance, inspect_nwb, load_config -import nwbinspector.__version__ as nwbinspector_version from nwbinspector.utils import get_package_version from packaging.version import Version from pydantic import ValidationError import requests +import dandi from dandi import get_logger from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient from dandi.metadata import get_default_metadata, nwb2asset @@ -32,7 +33,7 @@ from dandi.pynwb_utils import validate as pynwb_validate from dandi.support.digests import get_dandietag, get_digest from dandi.utils import yaml_load -from dandi.validate import Scope, Severity, ValidationOrigin, ValidationResult +from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult lgr = get_logger() @@ -40,7 +41,7 @@ _required_dandiset_metadata_fields = ["identifier", "name", "description"] -NWB_IMPORTANCE_TO_DANDI_SEVERITY = { +NWBI_IMPORTANCE_TO_DANDI_SEVERITY = { "ERROR": Severity.ERROR, "PYNWB_VALIDATION": Severity.ERROR, "CRITICAL": Severity.ERROR, # when using --config dandi @@ -81,7 +82,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: """ Attempt to validate the file and return a list of errors encountered """ @@ -106,7 +107,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: with open(self.filepath) as f: meta = yaml_load(f, typ="safe") if schema_version is None: @@ -130,7 +131,7 @@ def get_validation_errors( e, extra={"validating": True}, ) - return [str(e)] + return [str(e)] # TODO: proper record, scope dandiset except Exception as e: if devel_debug: raise @@ -140,7 +141,9 @@ def get_validation_errors( e, extra={"validating": True}, ) - return [f"Failed to initialize Dandiset meta: {e}"] + return [ + f"Failed to initialize Dandiset meta: {e}" + ] # TODO: proper record return [] @@ -177,7 +180,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: if schema_version is not None: current_version = get_schema_version() if schema_version != current_version: @@ -196,7 +199,23 @@ def get_validation_errors( e, extra={"validating": True}, ) - return [str(e)] + # TODO: how do we get **all** errors from validation - there must be a way + return [ + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.ERROR, + id="dandischema.TODO", + scope=Scope.FILE, + # metadata=metadata, + path=self.filepath, # note that it is not relative .path + message=str(e), + # TODO? dataset_path=dataset_path, + # TODO? dandiset_path=dandiset_path, + ) + ] except Exception as e: if devel_debug: raise @@ -206,7 +225,22 @@ def get_validation_errors( e, extra={"validating": True}, ) - return [f"Failed to read metadata: {e}"] + return [ + ValidationResult( + origin=ValidationOrigin( + name="dandi", + version=dandi.__version__, + ), + severity=Severity.ERROR, + id="dandi.SOFTWARE_ERROR", + scope=Scope.FILE, + # metadata=metadata, + path=self.filepath, # note that it is not relative .path + message=f"Failed to read metadata: {e}", + # TODO? dataset_path=dataset_path, + # TODO? dandiset_path=dandiset_path, + ) + ] return [] else: # TODO: Do something else? @@ -472,8 +506,10 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: - errors: list[str] = pynwb_validate(self.filepath, devel_debug=devel_debug) + ) -> list[ValidationResult]: + errors: list[ValidationResult] = pynwb_validate( + self.filepath, devel_debug=devel_debug + ) # Q gh-943 do we want to manage this thing as an error? # It should be a separate thing possibly via logging IMHO. if schema_version is not None: @@ -485,31 +521,25 @@ def get_validation_errors( else: # make sure that we have some basic metadata fields we require try: + current_version = get_package_version(name="nwbinspector") + # Ensure latest version of NWB Inspector is installed and used client-side - # Q gh-943 - # Are you sure we want to do this? Maybe some version-selection - # logic analogous to BIDS would be better, so people can prospectively specify - # what they used.... even if for the time being we're just chasing upstream. try: max_version = Version( get_project(repo="NeurodataWithoutBorders/nwbinspector")[ "version" ] ) - # Any reason we're not using `nwbinspector.__version__` ? - current_version = get_package_version(name="nwbinspector") if current_version < max_version: - # Q gh-943 do we want to manage this thing as an error? - # It should be a separate thing possibly via logging IMHO. - # commenting out temporarily for tests - errors.append( - f"NWB Inspector version {current_version} is installed - please " - f"use the latest release of the NWB Inspector ({max_version}) " + lgr.warning( + "NWB Inspector version %s is installed - please " + "use the latest release of the NWB Inspector (%s) " "when performing `dandi validate`. To update, run " - "`pip install -U nwbinspector` if you installed it with `pip`." + "`pip install -U nwbinspector` if you installed it with `pip`.", + current_version, + max_version, ) - return errors except Exception as e: # In case of no internet connection or other error lgr.warning( @@ -517,48 +547,36 @@ def get_validation_errors( type(e).__name__, str(e), ) + origin = ValidationOrigin( name="nwbinspector", - version=nwbinspector_version, + version=str(current_version), ) - scope = ( - Scope.FILE - ) # There's technically one folder-level thing but it gets registered at file level for error in inspect_nwb( nwbfile_path=self.filepath, skip_validate=True, config=load_config(filepath_or_keyword="dandi"), importance_threshold=Importance.CRITICAL, ): - id = error.check_function_name - asset_paths = [error.file_path] - # Assuming multiple sessions per multiple subjects, otherwise nesting level - # might differ - dandiset_path = error.file_path.parent.parent - dataset_path = error.dandiset_path - metadata = None - # assuming this is HDF5 Group information within the file - path = error.location # unfortunate name collision; the InspectorMessage.severity is used only for # minor ordering of final report - will have to remove the - # `importance_treshold` currently used to filter DANDI-level CRITICAL + # `importance_threshold` currently used to filter DANDI-level CRITICAL # evaluations as errors vs. validation results - severity = NWB_IMPORTANCE_TO_DANDI_SEVERITY[error.importance] - message = error.message - + severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance] errors.append( ValidationResult( - asset_paths=asset_paths, origin=origin, severity=severity, - id=id, - scope=scope, - metadata=metadata, - path=path, - message=message, - dataset_path=dataset_path, - dandiset_path=dandiset_path, + id=f"NWBI.{error.check_function_name}", + scope=Scope.FILE, + path=Path(error.file_path), + # TODO inner_path=error.location, + message=error.message, + # Assuming multiple sessions per multiple subjects, + # otherwise nesting level might differ + dataset_path=Path(error.file_path).parent.parent, # TODO + dandiset_path=Path(error.file_path).parent, # TODO ) ) except Exception as e: @@ -570,6 +588,8 @@ def get_validation_errors( e, extra={"validating": True}, ) + # TODO: might reraise instead of making it into an error + # TODO: provide proper ValidationResult struct errors.append(f"Failed to inspect NWBFile: {e}") return errors diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 8c2f0a1c8..ab0759d51 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -23,6 +23,7 @@ metadata_nwb_subject_fields, ) from .utils import get_module_version, is_url +from .validate_types import ValidationResult lgr = get_logger() @@ -319,7 +320,9 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None: @validate_cache.memoize_path -def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]: +def validate( + path: Union[str, Path], devel_debug: bool = False +) -> List[ValidationResult]: """Run validation on a file and return errors In case of an exception being thrown, an error message added to the @@ -334,6 +337,7 @@ def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]: try: with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader: errors = pynwb.validate(reader) + # TODO: return ValidationResult structs lgr.warning( "pynwb validation errors for %s: %s", path, diff --git a/dandi/validate.py b/dandi/validate.py index dc8bb5471..914e993c6 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,7 +1,5 @@ from __future__ import annotations -from dataclasses import dataclass -from enum import Enum import os from pathlib import Path from typing import Iterator, List, Optional, Tuple, Union @@ -10,6 +8,7 @@ from .files import find_dandi_files from .utils import find_parent_directory_containing +from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult BIDS_TO_DANDI = { "subject": "subject_id", @@ -17,39 +16,6 @@ } -@dataclass -class ValidationResult: - id: str - origin: ValidationOrigin - scope: Scope - asset_paths: Optional[list[str]] = None - dandiset_path: Optional[Path] = None - dataset_path: Optional[Path] = None - message: Optional[str] = None - metadata: Optional[dict] = None - path: Optional[Path] = None - path_regex: Optional[str] = None - severity: Optional[Severity] = None - - -@dataclass -class ValidationOrigin: - name: str - version: str - - -class Severity(Enum): - ERROR = "ERROR" - WARNING = "WARNING" - HINT = "HINT" - - -class Scope(Enum): - FILE = "file" - DANDISET = "dandiset" - DATASET = "dataset" - - def validate_bids( *paths: Union[str, Path], schema_version: Optional[str] = None, diff --git a/dandi/validate_types.py b/dandi/validate_types.py new file mode 100644 index 000000000..0f168ebc3 --- /dev/null +++ b/dandi/validate_types.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + + +@dataclass +class ValidationOrigin: + name: str + version: str + + +class Severity(Enum): + ERROR = "ERROR" + WARNING = "WARNING" + HINT = "HINT" + + +class Scope(Enum): + FILE = "file" + DANDISET = "dandiset" + DATASET = "dataset" + + +@dataclass +class ValidationResult: + id: str + origin: ValidationOrigin + scope: Scope + # asset_paths, if not populated, assumes [.path], but could be smth like + # {"path": "task-broken_bold.json", + # "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json", + # "sub-02/func/sub-02_task-broken_bold.json"]} + asset_paths: Optional[list[str]] = None + dandiset_path: Optional[Path] = None + dataset_path: Optional[Path] = None + # TODO: locations analogous to nwbinspector.InspectorMessage.location + # but due to multiple possible asset_paths, we might want to have it + # as a dict to point to location in some or each affected assets + message: Optional[str] = None + metadata: Optional[dict] = None + # ??? should it become a list e.g. for errors which rely on + # multiple files, like mismatch between .nii.gz header and .json sidecar + path: Optional[Path] = None + path_regex: Optional[str] = None + severity: Optional[Severity] = None From 6b7e680f5d3aebc2acc131f2f93f89056d76a047 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 29 Sep 2022 16:50:13 -0400 Subject: [PATCH 072/124] ENH: add within_asset_paths to point within files/assets --- dandi/files/bases.py | 7 ++++++- dandi/validate_types.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 12db75990..c1088607d 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -564,6 +564,11 @@ def get_validation_errors( # `importance_threshold` currently used to filter DANDI-level CRITICAL # evaluations as errors vs. validation results severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance] + kw = {} + if error.location: + kw["within_asset_paths"] = { + error.file_path: error.location, + } errors.append( ValidationResult( origin=origin, @@ -571,12 +576,12 @@ def get_validation_errors( id=f"NWBI.{error.check_function_name}", scope=Scope.FILE, path=Path(error.file_path), - # TODO inner_path=error.location, message=error.message, # Assuming multiple sessions per multiple subjects, # otherwise nesting level might differ dataset_path=Path(error.file_path).parent.parent, # TODO dandiset_path=Path(error.file_path).parent, # TODO + **kw, ) ) except Exception as e: diff --git a/dandi/validate_types.py b/dandi/validate_types.py index 0f168ebc3..24d775204 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -32,6 +32,9 @@ class ValidationResult: # "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json", # "sub-02/func/sub-02_task-broken_bold.json"]} asset_paths: Optional[list[str]] = None + # e.g. path within hdf5 file hierarchy + # As a dict we will map asset_paths into location within them + within_asset_paths: Optional[dict[str, str]] = None dandiset_path: Optional[Path] = None dataset_path: Optional[Path] = None # TODO: locations analogous to nwbinspector.InspectorMessage.location From ccf9491c633037268d4ff0859a040adcd32afb20 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 29 Sep 2022 16:58:02 -0400 Subject: [PATCH 073/124] BF: map importance to our severity based on enum"s name --- dandi/files/bases.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index c1088607d..8397e6f0b 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -41,7 +41,7 @@ _required_dandiset_metadata_fields = ["identifier", "name", "description"] -NWBI_IMPORTANCE_TO_DANDI_SEVERITY = { +NWBI_IMPORTANCE_TO_DANDI_SEVERITY: dict[Importance.name, Severity] = { "ERROR": Severity.ERROR, "PYNWB_VALIDATION": Severity.ERROR, "CRITICAL": Severity.ERROR, # when using --config dandi @@ -563,7 +563,7 @@ def get_validation_errors( # minor ordering of final report - will have to remove the # `importance_threshold` currently used to filter DANDI-level CRITICAL # evaluations as errors vs. validation results - severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance] + severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance.name] kw = {} if error.location: kw["within_asset_paths"] = { From b3106f49eaa5b9749656c4ad30a3917f100fca50 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 29 Sep 2022 17:10:30 -0400 Subject: [PATCH 074/124] ENH+RF: make sure we issue a warning about nwbinspector only once --- dandi/files/bases.py | 62 ++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 8397e6f0b..99cfabdc0 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -521,36 +521,9 @@ def get_validation_errors( else: # make sure that we have some basic metadata fields we require try: - current_version = get_package_version(name="nwbinspector") - - # Ensure latest version of NWB Inspector is installed and used client-side - try: - max_version = Version( - get_project(repo="NeurodataWithoutBorders/nwbinspector")[ - "version" - ] - ) - - if current_version < max_version: - lgr.warning( - "NWB Inspector version %s is installed - please " - "use the latest release of the NWB Inspector (%s) " - "when performing `dandi validate`. To update, run " - "`pip install -U nwbinspector` if you installed it with `pip`.", - current_version, - max_version, - ) - - except Exception as e: # In case of no internet connection or other error - lgr.warning( - "Failed to retrieve NWB Inspector version due to %s: %s", - type(e).__name__, - str(e), - ) - origin = ValidationOrigin( name="nwbinspector", - version=str(current_version), + version=str(_get_nwb_inspector_version()), ) for error in inspect_nwb( @@ -711,3 +684,36 @@ def _check_required_fields(d: dict, required: list[str]) -> list[str]: if v in ("REQUIRED", "PLACEHOLDER"): errors += [f"Required field {f!r} has value {v!r}"] return errors + + +_current_nwbinspector_version = None + + +def _get_nwb_inspector_version(): + global _current_nwbinspector_version + if _current_nwbinspector_version is not None: + return _current_nwbinspector_version + _current_nwbinspector_version = get_package_version(name="nwbinspector") + # Ensure latest version of NWB Inspector is installed and used client-side + try: + max_version = Version( + get_project(repo="NeurodataWithoutBorders/nwbinspector")["version"] + ) + + if _current_nwbinspector_version < max_version: + lgr.warning( + "NWB Inspector version %s is installed - please " + "use the latest release of the NWB Inspector (%s) " + "when performing `dandi validate`. To update, run " + "`pip install -U nwbinspector` if you installed it with `pip`.", + _current_nwbinspector_version, + max_version, + ) + + except Exception as e: # In case of no internet connection or other error + lgr.warning( + "Failed to retrieve NWB Inspector version due to %s: %s", + type(e).__name__, + str(e), + ) + return _current_nwbinspector_version From 49c9a0a58c4b4da00fc5edea700f745fd3185cc1 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 02:39:43 -0400 Subject: [PATCH 075/124] Integrating pydantic error into custom object. --- dandi/files/bases.py | 62 ++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 99cfabdc0..fccfcfc5b 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -125,25 +125,11 @@ def get_validation_errors( except ValidationError as e: if devel_debug: raise - lgr.warning( - "Validation error for %s: %s", - self.filepath, - e, - extra={"validating": True}, - ) - return [str(e)] # TODO: proper record, scope dandiset + return _pydantic_errors_to_validation_results(e, self.filepath) except Exception as e: if devel_debug: raise - lgr.warning( - "Unexpected validation error for %s: %s", - self.filepath, - e, - extra={"validating": True}, - ) - return [ - f"Failed to initialize Dandiset meta: {e}" - ] # TODO: proper record + return _pydantic_errors_to_validation_results(e, self.filepath) return [] @@ -560,14 +546,8 @@ def get_validation_errors( except Exception as e: if devel_debug: raise - lgr.warning( - "Failed to inspect NWBFile in %s: %s", - self.filepath, - e, - extra={"validating": True}, - ) # TODO: might reraise instead of making it into an error - # TODO: provide proper ValidationResult struct + return _pydantic_errors_to_validation_results(e, self.filepath) errors.append(f"Failed to inspect NWBFile: {e}") return errors @@ -717,3 +697,39 @@ def _get_nwb_inspector_version(): str(e), ) return _current_nwbinspector_version + + +def _pydantic_errors_to_validation_results( + errors: list[dict], + file_path: str, +) -> list[ValidationResult]: + """Convert list of dict from pydantic into our custom object.""" + out = [] + for e in errors: + id = dict( + id=":".join( + filter( + bool, + ( + "dandischema", + e.get("type", "UNKNOWN"), + "+".join(e.get("loc", [])), + ), + ) + ) + ) + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.ERROR, + id=id, + scope=Scope.DANDISET, + path=Path(file_path), + message=e.get("message", None), + # TODO? dataset_path=dataset_path, + # TODO? dandiset_path=dandiset_path, + ) + out.append(e) + return e From 9861be1d50ac19f4d9644b20bb6fbead83ccc546 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 02:47:03 -0400 Subject: [PATCH 076/124] Corrected function return --- dandi/files/bases.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index fccfcfc5b..cd033d20e 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -718,18 +718,19 @@ def _pydantic_errors_to_validation_results( ) ) ) - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), - severity=Severity.ERROR, - id=id, - scope=Scope.DANDISET, - path=Path(file_path), - message=e.get("message", None), - # TODO? dataset_path=dataset_path, - # TODO? dandiset_path=dandiset_path, + out.append( + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.ERROR, + id=id, + scope=Scope.DANDISET, + path=Path(file_path), + message=e.get("message", None), + # TODO? dataset_path=dataset_path, + # TODO? dandiset_path=dandiset_path, + ) ) - out.append(e) - return e + return out From f387998a31fa37008db7d5ddff27faf1a8f03940 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 03:06:17 -0400 Subject: [PATCH 077/124] Parsing further errors into the custom dandi validation object --- dandi/files/bases.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index cd033d20e..060e3cc66 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -113,7 +113,9 @@ def get_validation_errors( if schema_version is None: schema_version = meta.get("schemaVersion") if schema_version is None: - return _check_required_fields(meta, _required_dandiset_metadata_fields) + return _check_required_fields( + meta, _required_dandiset_metadata_fields, self.filepath + ) else: current_version = get_schema_version() if schema_version != current_version: @@ -655,14 +657,42 @@ def _upload_blob_part( } -def _check_required_fields(d: dict, required: list[str]) -> list[str]: +def _check_required_fields( + d: dict, required: list[str], file_path: str +) -> list[ValidationResult]: errors: list[str] = [] for f in required: v = d.get(f, None) if not v or (isinstance(v, str) and not v.strip()): - errors += [f"Required field {f!r} has no value"] + message = f"Required field {f!r} has no value" + errors.append( + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.ERROR, + id="dandischema.requred_field", + scope=Scope.FILE, + path=Path(file_path), + message=message, + ) + ) if v in ("REQUIRED", "PLACEHOLDER"): - errors += [f"Required field {f!r} has value {v!r}"] + message = f"Required field {f!r} has value {v!r}" + errors.append( + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.WARNING, + id="dandischema.placeholder_value", + scope=Scope.FILE, + path=Path(file_path), + message=message, + ) + ) return errors From 04ee73c1ac89fe3a61e7c72912ce3f5e3a0e51e2 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 13:55:45 -0400 Subject: [PATCH 078/124] Trying to fix types --- dandi/files/bases.py | 6 +++--- dandi/validate.py | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 060e3cc66..95af1544c 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -114,7 +114,7 @@ def get_validation_errors( schema_version = meta.get("schemaVersion") if schema_version is None: return _check_required_fields( - meta, _required_dandiset_metadata_fields, self.filepath + meta, _required_dandiset_metadata_fields, str(self.filepath) ) else: current_version = get_schema_version() @@ -131,7 +131,7 @@ def get_validation_errors( except Exception as e: if devel_debug: raise - return _pydantic_errors_to_validation_results(e, self.filepath) + return _pydantic_errors_to_validation_results(e, str(self.filepath)) return [] @@ -696,7 +696,7 @@ def _check_required_fields( return errors -_current_nwbinspector_version = None +_current_nwbinspector_version: str = None def _get_nwb_inspector_version(): diff --git a/dandi/validate.py b/dandi/validate.py index 914e993c6..4430d4dc2 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -148,7 +148,7 @@ def validate( schema_version: Optional[str] = None, devel_debug: bool = False, allow_any_path: bool = False, -) -> Iterator[Tuple[str, List[str]]]: +) -> Iterator[ValidationResult]: """Validate content Parameters @@ -162,9 +162,6 @@ def validate( errors for a path """ for df in find_dandi_files(*paths, dandiset_path=None, allow_all=allow_any_path): - yield ( - str(df.filepath), - df.get_validation_errors( + yield from df.get_validation_errors( schema_version=schema_version, devel_debug=devel_debug - ), - ) + ) From d161b463ec493655684b26474d273e0e98b7d1b8 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 16:34:10 -0400 Subject: [PATCH 079/124] Type fixing --- dandi/files/bids.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index d7ca279b1..f2f5dcd7c 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -5,7 +5,7 @@ from datetime import datetime from pathlib import Path from threading import Lock -from typing import Any, Optional +from typing import Any, List, Optional import weakref from dandischema.models import BareAsset @@ -14,6 +14,7 @@ from .zarr import ZarrAsset from ..metadata import add_common_metadata, prepare_metadata from ..misctypes import Digest +from ..validate_types import ValidationResult BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) @@ -65,17 +66,15 @@ def _validate(self) -> None: str(asset.filepath) for asset in self.dataset_files ] results = validate_bids(*bids_paths) - self._dataset_errors: list[str] = [] - self._asset_errors = defaultdict(list) + self._dataset_errors: list[ValidationResult] = [] + self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(list) self._asset_metadata = defaultdict(dict) for result in results: if result.id in BIDS_ASSET_ERRORS: assert result.path - assert result.message - self._asset_errors[str(result.path)].append(result.message) + self._asset_errors[str(result.path)].append(result) elif result.id in BIDS_DATASET_ERRORS: - assert result.message - self._dataset_errors.append(result.message) + self._dataset_errors.append(result) elif result.id == "BIDS.MATCH": assert result.path bids_path = result.path.relative_to(self.bids_root).as_posix() @@ -104,10 +103,10 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: self._validate() assert self._dataset_errors is not None - return list(self._dataset_errors) + return self._dataset_errors + self._asset_errors.values() # get_metadata(): inherit use of default metadata from LocalFileAsset @@ -154,7 +153,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: return self.bids_dataset_description.get_asset_errors(self) def get_metadata( @@ -208,7 +207,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: return ZarrBIDSAsset.get_validation_errors( self, schema_version, devel_debug ) + BIDSAsset.get_validation_errors(self) @@ -227,7 +226,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> List[ValidationResult]: return GenericAsset.get_validation_errors( self, schema_version, devel_debug ) + BIDSAsset.get_validation_errors(self) From 482246a533a084b17081f228135885c929399ff0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Fri, 30 Sep 2022 17:02:39 -0400 Subject: [PATCH 080/124] New object structure for zarr validation --- dandi/files/zarr.py | 58 ++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 9204eb5a8..0b04a9d83 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -11,6 +11,7 @@ from time import sleep from typing import Any, Optional, cast +from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult from dandischema.digests.zarr import get_checksum from dandischema.models import BareAsset, DigestType import requests @@ -184,36 +185,59 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: try: data = zarr.open(self.filepath) except Exception as e: if devel_debug: raise - lgr.warning( - "Error opening %s: %s: %s", - self.filepath, - type(e).__name__, - e, - extra={"validating": True}, - ) - return [str(e)] + return [ + ValidationResult( + origin=ValidationOrigin( + name="zarr", + version= zarr.version.version, + ), + severity=Severity.ERROR, + id="zarr.cannot_open", + scope=Scope.FILE, + path=self.filepath, + message="Error opening file.", + ) + ] if isinstance(data, zarr.Group) and not data: - msg = "Zarr group is empty" - if devel_debug: - raise ValueError(msg) - lgr.warning("%s: %s", self.filepath, msg, extra={"validating": True}) - return [msg] + return [ + ValidationResult( + origin=ValidationOrigin( + name="zarr", + version= zarr.version.version, + ), + severity=Severity.ERROR, + id="zarr.empty_group", + scope=Scope.FILE, + path=self.filepath, + message="Zarr group is empty.", + ) + ] try: next(self.filepath.glob(f"*{os.sep}" + os.sep.join(["*"] * MAX_ZARR_DEPTH))) except StopIteration: pass else: - msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep" if devel_debug: raise ValueError(msg) - lgr.warning("%s: %s", self.filepath, msg, extra={"validating": True}) - return [msg] + return [ + ValidationResult( + origin=ValidationOrigin( + name="zarr", + version= zarr.version.version, + ), + severity=Severity.ERROR, + id="zarr.tree_depth_exceeded", + scope=Scope.FILE, + path=self.filepath, + message=f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep", + ) + ] # TODO: Should this be appended to the above errors? return super().get_validation_errors( schema_version=schema_version, devel_debug=devel_debug From 1691547f900352a112789b5b6cd2bc7455d3b8b5 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 11:52:43 -0400 Subject: [PATCH 081/124] Further type fixes --- dandi/files/bases.py | 19 +++++++++++++------ dandi/files/bids.py | 14 ++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 95af1544c..270f602c5 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -181,12 +181,19 @@ def get_validation_errors( except ValidationError as e: if devel_debug: raise - lgr.warning( - "Validation error for %s: %s", - self.filepath, - e, - extra={"validating": True}, - ) + return [ + ValidationResult( + origin=ValidationOrigin( + name="dandischema", + version=dandischema.__version__, + ), + severity=Severity.ERROR, + id="dandischema.validationerror", + scope=Scope.FILE, + path=self.filepath, # note that it is not relative .path + message=str(e), + ) + ] # TODO: how do we get **all** errors from validation - there must be a way return [ ValidationResult( diff --git a/dandi/files/bids.py b/dandi/files/bids.py index f2f5dcd7c..814df1309 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -38,7 +38,7 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset): #: A list of validation error messages for individual assets in the #: dataset, keyed by `bids_path` properties; populated by `_validate()` - _asset_errors: Optional[dict[str, list[str]]] = None + _asset_errors: Optional[dict[str, list[ValidationResult]]] = None #: Asset metadata (in the form of a `dict` of BareAsset fields) for #: individual assets in the dataset, keyed by `bids_path` properties; @@ -67,7 +67,9 @@ def _validate(self) -> None: ] results = validate_bids(*bids_paths) self._dataset_errors: list[ValidationResult] = [] - self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(list) + self._asset_errors: dict[str, list[ValidationResult]] = defaultdict( + list + ) self._asset_metadata = defaultdict(dict) for result in results: if result.id in BIDS_ASSET_ERRORS: @@ -83,12 +85,12 @@ def _validate(self) -> None: result.metadata ) - def get_asset_errors(self, asset: BIDSAsset) -> list[str]: + def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]: """:meta private:""" self._validate() - errors: list[str] = [] + errors: list[ValidationResult] = [] if self._dataset_errors: - errors.append("BIDS dataset is invalid") + errors.extend(self._dataset_errors) assert self._asset_errors is not None errors.extend(self._asset_errors[asset.bids_path]) return errors @@ -179,7 +181,7 @@ def get_validation_errors( self, schema_version: Optional[str] = None, devel_debug: bool = False, - ) -> list[str]: + ) -> list[ValidationResult]: return NWBAsset.get_validation_errors( self, schema_version, devel_debug ) + BIDSAsset.get_validation_errors(self) From 889d2e78cbf11bd5d22b0a94ea5848268e5481ae Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 13:53:24 -0400 Subject: [PATCH 082/124] Attempting to fix typing errors --- dandi/files/bids.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 814df1309..d53e13d8f 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -34,7 +34,7 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset): #: A list of validation error messages pertaining to the dataset as a #: whole, populated by `_validate()` - _dataset_errors: Optional[list[str]] = None + _dataset_errors: Optional[list[ValidationResult]] = None #: A list of validation error messages for individual assets in the #: dataset, keyed by `bids_path` properties; populated by `_validate()` @@ -108,7 +108,10 @@ def get_validation_errors( ) -> list[ValidationResult]: self._validate() assert self._dataset_errors is not None - return self._dataset_errors + self._asset_errors.values() + if self._asset_errors is not None: + return self._dataset_errors + [*self._asset_errors.values()] + else: + return self._dataset_errors # get_metadata(): inherit use of default metadata from LocalFileAsset From dd03eb583e70224e299603c4f60cb29f05b71875 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 14:35:55 -0400 Subject: [PATCH 083/124] Fixed more typing errors --- dandi/files/bases.py | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 270f602c5..d2bf67bd8 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -181,19 +181,6 @@ def get_validation_errors( except ValidationError as e: if devel_debug: raise - return [ - ValidationResult( - origin=ValidationOrigin( - name="dandischema", - version=dandischema.__version__, - ), - severity=Severity.ERROR, - id="dandischema.validationerror", - scope=Scope.FILE, - path=self.filepath, # note that it is not relative .path - message=str(e), - ) - ] # TODO: how do we get **all** errors from validation - there must be a way return [ ValidationResult( @@ -557,7 +544,6 @@ def get_validation_errors( raise # TODO: might reraise instead of making it into an error return _pydantic_errors_to_validation_results(e, self.filepath) - errors.append(f"Failed to inspect NWBFile: {e}") return errors @@ -667,7 +653,7 @@ def _upload_blob_part( def _check_required_fields( d: dict, required: list[str], file_path: str ) -> list[ValidationResult]: - errors: list[str] = [] + errors: list[ValidationResult] = [] for f in required: v = d.get(f, None) if not v or (isinstance(v, str) and not v.strip()): @@ -703,7 +689,7 @@ def _check_required_fields( return errors -_current_nwbinspector_version: str = None +_current_nwbinspector_version: str = "" def _get_nwb_inspector_version(): @@ -743,16 +729,14 @@ def _pydantic_errors_to_validation_results( """Convert list of dict from pydantic into our custom object.""" out = [] for e in errors: - id = dict( - id=":".join( - filter( - bool, - ( - "dandischema", - e.get("type", "UNKNOWN"), - "+".join(e.get("loc", [])), - ), - ) + id = ":".join( + filter( + bool, + ( + "dandischema", + e.get("type", "UNKNOWN"), + "+".join(e.get("loc", [])), + ), ) ) out.append( From b2f4c2e7c43bf568565ea699815a82b0ab04abed Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 14:43:34 -0400 Subject: [PATCH 084/124] Fixed list unpacking issue discussed in meeting --- dandi/files/bids.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index d53e13d8f..7806f80a2 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -109,7 +109,9 @@ def get_validation_errors( self._validate() assert self._dataset_errors is not None if self._asset_errors is not None: - return self._dataset_errors + [*self._asset_errors.values()] + return self._dataset_errors + [ + i for j in self._asset_errors.values() for i in j + ] else: return self._dataset_errors From 0d77a0d0e39148a117b6adf16fba372691c7f770 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 14:54:11 -0400 Subject: [PATCH 085/124] Further typing fixes --- dandi/files/bases.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index d2bf67bd8..176937934 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -127,7 +127,7 @@ def get_validation_errors( except ValidationError as e: if devel_debug: raise - return _pydantic_errors_to_validation_results(e, self.filepath) + return _pydantic_errors_to_validation_results(e, str(self.filepath)) except Exception as e: if devel_debug: raise @@ -543,7 +543,7 @@ def get_validation_errors( if devel_debug: raise # TODO: might reraise instead of making it into an error - return _pydantic_errors_to_validation_results(e, self.filepath) + return _pydantic_errors_to_validation_results(e, str(self.filepath)) return errors @@ -723,7 +723,7 @@ def _get_nwb_inspector_version(): def _pydantic_errors_to_validation_results( - errors: list[dict], + errors: Any[list[dict], Exception], file_path: str, ) -> list[ValidationResult]: """Convert list of dict from pydantic into our custom object.""" From e263bf483e1e4892bec208a48ac1fcb1ea1c2f6a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 15:00:07 -0400 Subject: [PATCH 086/124] Defined missing and removed unused variable --- dandi/files/zarr.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 0b04a9d83..088bdab6e 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -11,7 +11,6 @@ from time import sleep from typing import Any, Optional, cast -from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult from dandischema.digests.zarr import get_checksum from dandischema.models import BareAsset, DigestType import requests @@ -37,6 +36,7 @@ from dandi.utils import chunked, pluralize from .bases import LocalDirectoryAsset +from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult lgr = get_logger() @@ -188,56 +188,57 @@ def get_validation_errors( ) -> list[ValidationResult]: try: data = zarr.open(self.filepath) - except Exception as e: + except Exception: if devel_debug: raise return [ ValidationResult( origin=ValidationOrigin( name="zarr", - version= zarr.version.version, + version=zarr.version.version, ), severity=Severity.ERROR, id="zarr.cannot_open", scope=Scope.FILE, path=self.filepath, message="Error opening file.", - ) - ] + ) + ] if isinstance(data, zarr.Group) and not data: return [ ValidationResult( origin=ValidationOrigin( name="zarr", - version= zarr.version.version, + version=zarr.version.version, ), severity=Severity.ERROR, id="zarr.empty_group", scope=Scope.FILE, path=self.filepath, message="Zarr group is empty.", - ) - ] + ) + ] try: next(self.filepath.glob(f"*{os.sep}" + os.sep.join(["*"] * MAX_ZARR_DEPTH))) except StopIteration: pass else: + msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep" if devel_debug: raise ValueError(msg) return [ ValidationResult( origin=ValidationOrigin( name="zarr", - version= zarr.version.version, + version=zarr.version.version, ), severity=Severity.ERROR, id="zarr.tree_depth_exceeded", scope=Scope.FILE, path=self.filepath, - message=f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep", - ) - ] + message=msg, + ) + ] # TODO: Should this be appended to the above errors? return super().get_validation_errors( schema_version=schema_version, devel_debug=devel_debug From 8b58dce30e5b27b226299c54f95b4d1af817cd0c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 16:19:32 -0400 Subject: [PATCH 087/124] Trying to standardize error output --- dandi/pynwb_utils.py | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index ab0759d51..ebf78e6ec 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -23,7 +23,7 @@ metadata_nwb_subject_fields, ) from .utils import get_module_version, is_url -from .validate_types import ValidationResult +from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult lgr = get_logger() @@ -333,22 +333,39 @@ def validate( path: str or Path """ path = str(path) # Might come in as pathlib's PATH - errors: List[str] + errors: List[ValidationResult] = [] try: with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader: - errors = pynwb.validate(reader) + error_outputs = pynwb.validate(reader) # TODO: return ValidationResult structs - lgr.warning( - "pynwb validation errors for %s: %s", - path, - errors, - extra={"validating": True}, - ) + for error_output in error_outputs: + errors.append = ValidationResult( + origin=ValidationOrigin( + name="pynwb", + version=pynwb.__version__, + ), + severity=Severity.WARNING, + id=f"pywnb.{error_output}", + scope=Scope.FILE, + path=Path(path), + message="Failed to validate.", + ) except Exception as exc: if devel_debug: raise - lgr.warning("Failed to validate %s: %s", path, exc, extra={"validating": True}) - errors = [f"Failed to validate {path}: {exc}"] + errors.extend = [ + ValidationResult( + origin=ValidationOrigin( + name="pynwb", + version=pynwb.__version__, + ), + severity=Severity.ERROR, + id=f"pywnb.{error_output}", + scope=Scope.FILE, + path=Path(path), + message=f"{exc}", + ) + ] # To overcome # https://github.com/NeurodataWithoutBorders/pynwb/issues/1090 @@ -366,6 +383,7 @@ def validate( if version is not None: # Explicitly sanitize so we collect warnings. # TODO: later cast into proper ERRORs + # Do we really need to do these acrobatics? string comparison works fine... version = _sanitize_nwb_version(version, log=errors.append) try: v = semantic_version.Version(version) @@ -373,7 +391,8 @@ def validate( v = None if v is not None and v < semantic_version.Version("2.1.0"): errors_ = errors[:] - errors = [e for e in errors if not re_ok_prior_210.search(str(e))] + errors = [e for e in errors if not re_ok_prior_210.search(e.message)] + # This is not an error, just logging about the process, hence logging: if errors != errors_: lgr.debug( "Filtered out %d validation errors on %s", From f2f577d40e5ae4dd34c511ebc1ae141461619125 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 3 Oct 2022 16:42:58 -0400 Subject: [PATCH 088/124] Fixed syntax --- dandi/pynwb_utils.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index ebf78e6ec..3398d4363 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -339,33 +339,37 @@ def validate( error_outputs = pynwb.validate(reader) # TODO: return ValidationResult structs for error_output in error_outputs: - errors.append = ValidationResult( + errors.append( + ValidationResult( + origin=ValidationOrigin( + name="pynwb", + version=pynwb.__version__, + ), + severity=Severity.WARNING, + id=f"pywnb.{error_output}", + scope=Scope.FILE, + path=Path(path), + message="Failed to validate.", + ) + ) + except Exception as exc: + if devel_debug: + raise + errors.extend( + [ + ValidationResult( origin=ValidationOrigin( name="pynwb", version=pynwb.__version__, ), - severity=Severity.WARNING, + severity=Severity.ERROR, id=f"pywnb.{error_output}", scope=Scope.FILE, path=Path(path), - message="Failed to validate.", + message=f"{exc}", ) - except Exception as exc: - if devel_debug: - raise - errors.extend = [ - ValidationResult( - origin=ValidationOrigin( - name="pynwb", - version=pynwb.__version__, - ), - severity=Severity.ERROR, - id=f"pywnb.{error_output}", - scope=Scope.FILE, - path=Path(path), - message=f"{exc}", - ) - ] + ] + ) # To overcome # https://github.com/NeurodataWithoutBorders/pynwb/issues/1090 From 5c1890c61701bf785cb8b46103d80e7062d65a8b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 09:52:35 -0400 Subject: [PATCH 089/124] Expanded list comprehension to assert attribute presence --- dandi/pynwb_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 3398d4363..dd5532559 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -395,7 +395,15 @@ def validate( v = None if v is not None and v < semantic_version.Version("2.1.0"): errors_ = errors[:] - errors = [e for e in errors if not re_ok_prior_210.search(e.message)] + errors = [] + for e in errors: + try: + assert e.message + if not re_ok_prior_210.search(e.message): + errors.append(e) + except AssertionError: + errors.append(e) + # errors = [e for e in errors if not re_ok_prior_210.search(e.message)] # This is not an error, just logging about the process, hence logging: if errors != errors_: lgr.debug( From e30809d619c1765602c7323661e3aad8ae7b71ae Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 10:00:08 -0400 Subject: [PATCH 090/124] No more typing errors --- dandi/files/bases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 176937934..0c513b7bd 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -519,7 +519,7 @@ def get_validation_errors( # `importance_threshold` currently used to filter DANDI-level CRITICAL # evaluations as errors vs. validation results severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance.name] - kw = {} + kw: Any = {} if error.location: kw["within_asset_paths"] = { error.file_path: error.location, From 87048c73b219105a45b703ad569e6f4acb533bb9 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 10:03:08 -0400 Subject: [PATCH 091/124] Removed unused imports --- dandi/validate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/validate.py b/dandi/validate.py index 4430d4dc2..85b155a1c 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -2,7 +2,7 @@ import os from pathlib import Path -from typing import Iterator, List, Optional, Tuple, Union +from typing import Iterator, Optional, Union import appdirs @@ -163,5 +163,5 @@ def validate( """ for df in find_dandi_files(*paths, dandiset_path=None, allow_all=allow_any_path): yield from df.get_validation_errors( - schema_version=schema_version, devel_debug=devel_debug - ) + schema_version=schema_version, devel_debug=devel_debug + ) From d4b35b4b3050b62110a4a294889e72df929a2309 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 10:08:08 -0400 Subject: [PATCH 092/124] Fixed weird typing issues only appearing on GH --- dandi/validate_types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/validate_types.py b/dandi/validate_types.py index 24d775204..dfd920e11 100644 --- a/dandi/validate_types.py +++ b/dandi/validate_types.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Optional +from typing import Dict, List, Optional @dataclass @@ -31,10 +31,10 @@ class ValidationResult: # {"path": "task-broken_bold.json", # "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json", # "sub-02/func/sub-02_task-broken_bold.json"]} - asset_paths: Optional[list[str]] = None + asset_paths: Optional[List[str]] = None # e.g. path within hdf5 file hierarchy # As a dict we will map asset_paths into location within them - within_asset_paths: Optional[dict[str, str]] = None + within_asset_paths: Optional[Dict[str, str]] = None dandiset_path: Optional[Path] = None dataset_path: Optional[Path] = None # TODO: locations analogous to nwbinspector.InspectorMessage.location From acd78b774a236673814cadd73594f5ede1bb8ee4 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 10:15:49 -0400 Subject: [PATCH 093/124] Placeholder for undefined variable string --- dandi/pynwb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index dd5532559..dddedef3e 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -363,7 +363,7 @@ def validate( version=pynwb.__version__, ), severity=Severity.ERROR, - id=f"pywnb.{error_output}", + id="pywnb.GENERIC", scope=Scope.FILE, path=Path(path), message=f"{exc}", From cc2ad2610ec43ad4b3092ce31c2a0d7249ae2da6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 11:33:01 -0400 Subject: [PATCH 094/124] Fixed assertion in test to match object output --- dandi/tests/test_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index 2a333aca8..86a40c44b 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -232,7 +232,7 @@ def test_validate_simple1(simple1_nwb): def test_validate_simple1_no_subject(simple1_nwb): errors = dandi_file(simple1_nwb).get_validation_errors() - assert errors == ["Subject is missing."] + assert [e.message for e in errors] == ["Subject is missing."] def test_validate_simple2(simple2_nwb): @@ -251,7 +251,7 @@ def test_validate_simple2_new(simple2_nwb): def test_validate_simple3_no_subject_id(simple3_nwb): errors = dandi_file(simple3_nwb).get_validation_errors() - assert errors == ["subject_id is missing."] + assert [e.message for e in errors] == ["subject_id is missing."] def test_validate_bogus(tmp_path): From b5e16b19204edc16c3f3246210929819a3c75571 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 13:58:44 -0400 Subject: [PATCH 095/124] Integrate OSError into object-based validation reporting --- dandi/files/bases.py | 31 +++++++++++++++++++------------ dandi/tests/test_files.py | 2 +- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 0c513b7bd..5c5371627 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -543,7 +543,7 @@ def get_validation_errors( if devel_debug: raise # TODO: might reraise instead of making it into an error - return _pydantic_errors_to_validation_results(e, str(self.filepath)) + return _pydantic_errors_to_validation_results([e], str(self.filepath)) return errors @@ -729,16 +729,23 @@ def _pydantic_errors_to_validation_results( """Convert list of dict from pydantic into our custom object.""" out = [] for e in errors: - id = ":".join( - filter( - bool, - ( - "dandischema", - e.get("type", "UNKNOWN"), - "+".join(e.get("loc", [])), - ), + if isinstance(e, OSError): + id = "OSError" + message = e.__str__() + scope = Scope.FILE + else: + id = ":".join( + filter( + bool, + ( + "dandischema", + e.get("type", "UNKNOWN"), + "+".join(e.get("loc", [])), + ), + ) ) - ) + message = e.get("message", None) + scope = Scope.DANDISET out.append( ValidationResult( origin=ValidationOrigin( @@ -747,9 +754,9 @@ def _pydantic_errors_to_validation_results( ), severity=Severity.ERROR, id=id, - scope=Scope.DANDISET, + scope=scope, path=Path(file_path), - message=e.get("message", None), + message=message, # TODO? dataset_path=dataset_path, # TODO? dandiset_path=dandiset_path, ) diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index 86a40c44b..3d9859f83 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -263,7 +263,7 @@ def test_validate_bogus(tmp_path): errors = dandi_file(path).get_validation_errors() # ATM we would get 2 errors -- since could not be open in two places, # but that would be too rigid to test. Let's just see that we have expected errors - assert any(e.startswith("Failed to inspect NWBFile") for e in errors) + assert any(e.message.startswith("Unable to open file") for e in errors) def test_upload_zarr(new_dandiset, tmp_path): From 9f49cc9882596c850d13b103221bf33e869e5cd6 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 15:00:39 -0400 Subject: [PATCH 096/124] Standardized dandi import --- dandi/files/bases.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 5c5371627..4a87fde11 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -26,7 +26,6 @@ import requests import dandi -from dandi import get_logger from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient from dandi.metadata import get_default_metadata, nwb2asset from dandi.misctypes import DUMMY_DIGEST, Digest, P @@ -35,7 +34,7 @@ from dandi.utils import yaml_load from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult -lgr = get_logger() +lgr = dandi.get_logger() # TODO -- should come from schema. This is just a simplistic example for now _required_dandiset_metadata_fields = ["identifier", "name", "description"] From 382c3aef1885ebd5ffe27159e1db97e0d129cf9e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 16:25:10 -0400 Subject: [PATCH 097/124] General-purpose exception catching --- dandi/files/bases.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 4a87fde11..95c3637ea 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -728,9 +728,12 @@ def _pydantic_errors_to_validation_results( """Convert list of dict from pydantic into our custom object.""" out = [] for e in errors: - if isinstance(e, OSError): - id = "OSError" - message = e.__str__() + if isinstance(e, Exception): + if hasattr(e, "message"): + message = e.message + else: + message = str(e) + id = "exception" scope = Scope.FILE else: id = ":".join( From 8d7148370991bce10885b6d278d421da229a5a9e Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 4 Oct 2022 16:27:09 -0400 Subject: [PATCH 098/124] More standardized error IDs --- dandi/files/bases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 95c3637ea..b469df812 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -736,7 +736,7 @@ def _pydantic_errors_to_validation_results( id = "exception" scope = Scope.FILE else: - id = ":".join( + id = ".".join( filter( bool, ( From 2d4e284247b0a17650f2f1805eada7fd8e53f17a Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 14:09:00 -0400 Subject: [PATCH 099/124] Typing fix for exception message --- dandi/files/bases.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index b469df812..1bfbcfe62 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -730,6 +730,7 @@ def _pydantic_errors_to_validation_results( for e in errors: if isinstance(e, Exception): if hasattr(e, "message"): + assert e.message message = e.message else: message = str(e) From 2c8d034c628167283ae7ca2eb70150a6253402c4 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 14:54:27 -0400 Subject: [PATCH 100/124] Description/testing of command line validation `--grouping` option. --- dandi/cli/cmd_validate.py | 2 +- dandi/cli/tests/test_cmd_validate.py | 21 +++++++++++++++++++++ dandi/files/bases.py | 1 - 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index d1b3351ea..37dd54c99 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -24,7 +24,7 @@ @click.option( "--grouping", "-g", - help="Write report under path, this option implies `--report/-r`.", + help="How to group error/warning reporting.", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @map_to_click_exceptions diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 095d21907..fed388e2d 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -22,3 +22,24 @@ def test_validate_bids_error(bids_error_examples, dataset): # Does it detect all errors? for key in expected_errors: assert key in r.output + + +def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"): + """ + This is currently a placeholder test, and should be updated once we have paths with + multiple errors. + """ + import json + + from ..cmd_validate import validate_bids + + broken_dataset = os.path.join(bids_error_examples, dataset) + with open(os.path.join(broken_dataset, ".ERRORS.json")) as f: + expected_errors = json.load(f) + r = CliRunner().invoke(validate_bids, ["--grouping=path", broken_dataset]) + # Does it break? + assert r.exit_code == 1 + + # Does it detect all errors? + for key in expected_errors: + assert key in r.output diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 1bfbcfe62..b469df812 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -730,7 +730,6 @@ def _pydantic_errors_to_validation_results( for e in errors: if isinstance(e, Exception): if hasattr(e, "message"): - assert e.message message = e.message else: message = str(e) From 80db60e052bce7ab3b78b5d6c9ccad78b4417428 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 5 Oct 2022 14:46:33 -0400 Subject: [PATCH 101/124] Workaround e.message typechecking dillema --- dandi/files/bases.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index b469df812..b5c5e627a 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -729,10 +729,7 @@ def _pydantic_errors_to_validation_results( out = [] for e in errors: if isinstance(e, Exception): - if hasattr(e, "message"): - message = e.message - else: - message = str(e) + message = getattr(e, "message", str(e)) id = "exception" scope = Scope.FILE else: From af48872e1babb9d2cf84eff35756f125ee416072 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 16:19:15 -0400 Subject: [PATCH 102/124] Removed unneeded imports, added tests for broken nwb dataset --- dandi/cli/tests/test_cmd_validate.py | 51 +++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index fed388e2d..72b6e9633 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -1,3 +1,4 @@ +import json import os from click.testing import CliRunner @@ -8,7 +9,6 @@ "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"] ) def test_validate_bids_error(bids_error_examples, dataset): - import json from ..cmd_validate import validate_bids @@ -24,22 +24,55 @@ def test_validate_bids_error(bids_error_examples, dataset): assert key in r.output -def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"): +def test_validate_bids_error_grouping_nwb( + bids_error_examples, dataset="invalid_asl003" +): """ This is currently a placeholder test, and should be updated once we have paths with multiple errors. """ - import json from ..cmd_validate import validate_bids - broken_dataset = os.path.join(bids_error_examples, dataset) - with open(os.path.join(broken_dataset, ".ERRORS.json")) as f: - expected_errors = json.load(f) - r = CliRunner().invoke(validate_bids, ["--grouping=path", broken_dataset]) + dataset = os.path.join(bids_error_examples, dataset) + r = CliRunner().invoke(validate_bids, ["--grouping=path", dataset]) # Does it break? assert r.exit_code == 1 # Does it detect all errors? - for key in expected_errors: - assert key in r.output + assert dataset in r.output + + +def test_validate_bids_error_grouping_bids(simple3_nwb): + """ + This is currently a placeholder test, and should be updated once we have paths with + multiple errors. + """ + + from ..cmd_validate import validate_bids + + r = CliRunner().invoke(validate_bids, ["--grouping=path", simple3_nwb]) + # Does it break? + assert r.exit_code == 1 + + # Does it detect all errors? + assert simple3_nwb in r.output + + +def test_validate_bids_error_grouping_notification( + bids_error_examples, dataset="invalid_asl003" +): + """Test user notification for unimplemented parameter value.""" + + from ..cmd_validate import validate_bids + + broken_dataset = os.path.join(bids_error_examples, dataset) + r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset]) + # Does it break? + assert r.exit_code == 1 + + # Does it notify the user correctly? + notification_substring = ( + "`grouping` parameter values currently supported are path or None" + ) + assert notification_substring in r.output From 15ce3c7bbe4d1e664d217522e2abb621087febb7 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 16:21:38 -0400 Subject: [PATCH 103/124] Removed deprecated comment --- dandi/files/bases.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index b5c5e627a..7732732ec 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -491,8 +491,6 @@ def get_validation_errors( errors: list[ValidationResult] = pynwb_validate( self.filepath, devel_debug=devel_debug ) - # Q gh-943 do we want to manage this thing as an error? - # It should be a separate thing possibly via logging IMHO. if schema_version is not None: errors.extend( super().get_validation_errors( From 8c430f64f7e59d1e5e05e4d17371a5148bc9b4b4 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 16:51:27 -0400 Subject: [PATCH 104/124] Added click decorator for grouping parameter in nwb validator --- dandi/cli/cmd_validate.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 37dd54c99..fc400733a 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -106,6 +106,11 @@ def validate_bids( default=False, is_flag=True, ) +@click.option( + "--grouping", + "-g", + help="How to group error/warning reporting.", +) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @devel_debug_option() @map_to_click_exceptions From 6348f644831239a7429e8e30b249477d30f538f0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 17:33:06 -0400 Subject: [PATCH 105/124] Standardized CLI NWB validator wrt BIDS validator draft and added tests Still using 2 separate functions, merging of validation functions best for subsequent PR --- dandi/cli/cmd_validate.py | 70 ++++++++++++++-------------- dandi/cli/tests/test_cmd_validate.py | 17 ++++--- 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index fc400733a..342d9eadb 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -129,6 +129,7 @@ def validate( """ from ..pynwb_utils import ignore_benign_pynwb_warnings from ..validate import validate as validate_ + from ..validate_types import Severity # Don't log validation warnings, as this command reports them to the user # anyway: @@ -144,51 +145,50 @@ def validate( # way to get relevant warnings (not errors) from PyNWB ignore_benign_pynwb_warnings() - all_files_error_messages = {} - nfiles = 0 - for path, messages in validate_( + validator_result = validate_( *paths, schema_version=schema, devel_debug=devel_debug, allow_any_path=allow_any_path, - ): - nfiles += 1 - if not grouping: - error_paths = [path] * len(messages) - errors = ["NWBError"] * len(messages) - severities = [""] * len(messages) - display_errors(error_paths, errors, severities, messages) - elif grouping == "path": - error_paths = [path] - errors = ["NWBError"] * len(messages) - severities = [""] * len(messages) - display_errors(error_paths, errors, severities, messages) + ) + issues = [] + for i in validator_result: + if not i.severity: + continue else: - raise NotImplementedError( - "The `grouping` parameter values currently supported are " - "path or None." - ) - - all_files_error_messages[path] = messages - - files_with_errors = [f for f, errors in all_files_error_messages.items() if errors] + issues.append(i) - if files_with_errors: - click.secho( - "Summary: Validation errors in {} out of {} files".format( - len(files_with_errors), nfiles - ), - bold=True, - fg="red", + purviews = [ + list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues + ] + if not grouping: + display_errors( + purviews, + [i.id for i in issues], + [i.severity for i in issues], + [i.message for i in issues], ) - raise SystemExit(1) + elif grouping == "path": + for purview in purviews: + applies_to = [ + i for i in issues if purview in [i.path, i.path_regex, i.dataset_path] + ] + display_errors( + [purview], + [i.id for i in applies_to], + [i.severity for i in applies_to], + [i.message for i in applies_to], + ) else: - click.secho( - "Summary: No validation errors among {} file(s)".format(nfiles), - bold=True, - fg="green", + raise NotImplementedError( + "The `grouping` parameter values currently supported are " "path or None." ) + validation_errors = [i for i in issues if i.severity == Severity.ERROR] + + if validation_errors: + raise SystemExit(1) + def _get_severity_color(severities): from ..validate_types import Severity diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 72b6e9633..d6c9b91ca 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -24,9 +24,7 @@ def test_validate_bids_error(bids_error_examples, dataset): assert key in r.output -def test_validate_bids_error_grouping_nwb( - bids_error_examples, dataset="invalid_asl003" -): +def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"): """ This is currently a placeholder test, and should be updated once we have paths with multiple errors. @@ -43,20 +41,21 @@ def test_validate_bids_error_grouping_nwb( assert dataset in r.output -def test_validate_bids_error_grouping_bids(simple3_nwb): +def test_validate_nwb_grouping_severity(simple3_nwb): """ This is currently a placeholder test, and should be updated once we have paths with multiple errors. """ - from ..cmd_validate import validate_bids + from ..cmd_validate import validate - r = CliRunner().invoke(validate_bids, ["--grouping=path", simple3_nwb]) - # Does it break? - assert r.exit_code == 1 + r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb]) + # Does it pass? + assert r.exit_code == 0 - # Does it detect all errors? + # Does it give required warnings for required path? assert simple3_nwb in r.output + assert "NWBI.check_subject_id_exists" in r.output def test_validate_bids_error_grouping_notification( From ed5f3534846bf87e0ee6ee9952c1adcd98ea4119 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 5 Oct 2022 17:48:08 -0400 Subject: [PATCH 106/124] Reporting lack of errors explicitly in CLI validation --- dandi/cli/cmd_validate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 342d9eadb..99047245f 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -96,6 +96,8 @@ def validate_bids( if validation_errors: raise SystemExit(1) + else: + click.secho("No errors found.", fg="green") @click.command() @@ -188,6 +190,8 @@ def validate( if validation_errors: raise SystemExit(1) + else: + click.secho("No errors found.", fg="green") def _get_severity_color(severities): From 65ae2c98fe99b68091d9c550b6894dd3fcadca64 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 10 Oct 2022 13:59:35 -0400 Subject: [PATCH 107/124] Simplified git command execution --- dandi/tests/fixtures.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index bc000ba64..448b152ba 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -245,7 +245,7 @@ def fixture( skipif.no_git() path = tmp_path_factory.mktemp("gitrepo") lgr.debug("Cloning %r into %r", url, path) - runout = run( + run( [ "git", "clone", @@ -255,10 +255,8 @@ def fixture( url, str(path), ], - capture_output=True, + check=True, ) - if runout.returncode: - raise RuntimeError(f"Failed to clone {url} into {path}") # cwd specification is VERY important, not only to achieve the correct # effects, but also to avoid dropping files from your repository if you # were to run `git sparse-checkout` inside the software repo. From b4d5dcdbeeb290d4a27bfc55c4ec9e88246da8b0 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 11:54:14 -0400 Subject: [PATCH 108/124] Using click option specification --- dandi/cli/cmd_validate.py | 6 ++++-- dandi/cli/tests/test_cmd_validate.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 99047245f..df8ada8c0 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -25,6 +25,8 @@ "--grouping", "-g", help="How to group error/warning reporting.", + type=click.Choice(['none', 'path'], case_sensitive=False), + default="none", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @map_to_click_exceptions @@ -33,7 +35,7 @@ def validate_bids( schema, report, report_path, - grouping=None, + grouping="none", ): """Validate BIDS paths. @@ -69,7 +71,7 @@ def validate_bids( purviews = [ list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues ] - if not grouping: + if grouping == "none": display_errors( purviews, [i.id for i in issues], diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index d6c9b91ca..b96c86559 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -68,10 +68,10 @@ def test_validate_bids_error_grouping_notification( broken_dataset = os.path.join(bids_error_examples, dataset) r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset]) # Does it break? - assert r.exit_code == 1 + assert r.exit_code == 2 # Does it notify the user correctly? notification_substring = ( - "`grouping` parameter values currently supported are path or None" + "Invalid value for '--grouping'" ) assert notification_substring in r.output From 624e3b0ab8417eb01d3031d30dc43584a95aba26 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 11:56:48 -0400 Subject: [PATCH 109/124] List comprehension for more compact code. --- dandi/cli/cmd_validate.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index df8ada8c0..ff901b318 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -61,12 +61,7 @@ def validate_bids( schema_version=schema, ) - issues = [] - for i in validator_result: - if not i.severity: - continue - else: - issues.append(i) + issues = [i for i in validator_result if i.severity] purviews = [ list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues @@ -155,12 +150,7 @@ def validate( devel_debug=devel_debug, allow_any_path=allow_any_path, ) - issues = [] - for i in validator_result: - if not i.severity: - continue - else: - issues.append(i) + issues = [i for i in validator_result if i.severity] purviews = [ list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues From 41df99d9d862543a894eafff6fee1a0e0ca52dcd Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 11:58:03 -0400 Subject: [PATCH 110/124] Removed superfluous list bracketing --- dandi/cli/cmd_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index ff901b318..230f8201c 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -217,7 +217,7 @@ def display_errors(purviews, errors, severities, messages): by assert if this won't ever be the case. """ - if all([len(i) == 1 for i in [purviews, errors, severities, messages]]): + if all(len(i) == 1 for i in [purviews, errors, severities, messages]): fg = _get_severity_color(severities) error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}" click.secho(error_message, fg=fg) From 09e526b5a23206d6f061b0de13ddf0f2f895502d Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 12:06:00 -0400 Subject: [PATCH 111/124] clarified comment, removed commented code --- dandi/pynwb_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index dddedef3e..c2fbf0771 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -387,7 +387,7 @@ def validate( if version is not None: # Explicitly sanitize so we collect warnings. # TODO: later cast into proper ERRORs - # Do we really need to do these acrobatics? string comparison works fine... + # Do we really need this custom internal function? string comparison works fine.. version = _sanitize_nwb_version(version, log=errors.append) try: v = semantic_version.Version(version) @@ -403,7 +403,6 @@ def validate( errors.append(e) except AssertionError: errors.append(e) - # errors = [e for e in errors if not re_ok_prior_210.search(e.message)] # This is not an error, just logging about the process, hence logging: if errors != errors_: lgr.debug( From e38577b0844774d7b69c1b0e16d6480f5f001908 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 13:27:37 -0400 Subject: [PATCH 112/124] Extracted validation variant common code into internal function and renamed tests for more consistency --- dandi/cli/cmd_validate.py | 45 +++++----------------------- dandi/cli/tests/test_cmd_validate.py | 5 ++-- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 230f8201c..11fe3ce21 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -52,7 +52,6 @@ def validate_bids( """ from ..validate import validate_bids as validate_bids_ - from ..validate_types import Severity validator_result = validate_bids_( # Controller *paths, @@ -61,40 +60,7 @@ def validate_bids( schema_version=schema, ) - issues = [i for i in validator_result if i.severity] - - purviews = [ - list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues - ] - if grouping == "none": - display_errors( - purviews, - [i.id for i in issues], - [i.severity for i in issues], - [i.message for i in issues], - ) - elif grouping == "path": - for purview in purviews: - applies_to = [ - i for i in issues if purview in [i.path, i.path_regex, i.dataset_path] - ] - display_errors( - [purview], - [i.id for i in applies_to], - [i.severity for i in applies_to], - [i.message for i in applies_to], - ) - else: - raise NotImplementedError( - "The `grouping` parameter values currently supported are " "path or None." - ) - - validation_errors = [i for i in issues if i.severity == Severity.ERROR] - - if validation_errors: - raise SystemExit(1) - else: - click.secho("No errors found.", fg="green") + _process_issues(validator_result, grouping) @click.command() @@ -128,7 +94,6 @@ def validate( """ from ..pynwb_utils import ignore_benign_pynwb_warnings from ..validate import validate as validate_ - from ..validate_types import Severity # Don't log validation warnings, as this command reports them to the user # anyway: @@ -150,12 +115,18 @@ def validate( devel_debug=devel_debug, allow_any_path=allow_any_path, ) + + _process_issues(validator_result, grouping) + + +def _process_issues(validator_result, grouping): + from ..validate_types import Severity issues = [i for i in validator_result if i.severity] purviews = [ list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues ] - if not grouping: + if grouping == "none": display_errors( purviews, [i.id for i in issues], diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index b96c86559..d832cadab 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -24,7 +24,7 @@ def test_validate_bids_error(bids_error_examples, dataset): assert key in r.output -def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"): +def test_validate_bids_grouping_error(bids_error_examples, dataset="invalid_asl003"): """ This is currently a placeholder test, and should be updated once we have paths with multiple errors. @@ -41,7 +41,7 @@ def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl0 assert dataset in r.output -def test_validate_nwb_grouping_severity(simple3_nwb): +def test_validate_nwb_path_grouping(simple3_nwb): """ This is currently a placeholder test, and should be updated once we have paths with multiple errors. @@ -51,6 +51,7 @@ def test_validate_nwb_grouping_severity(simple3_nwb): r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb]) # Does it pass? + print(r.output) assert r.exit_code == 0 # Does it give required warnings for required path? From 456fafd1444d5c0bb48090df8c4ff537443299a2 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 13:30:35 -0400 Subject: [PATCH 113/124] Importing outside of test functions --- dandi/cli/tests/test_cmd_validate.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index d832cadab..37be9ac25 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -4,14 +4,13 @@ from click.testing import CliRunner import pytest +from ..cmd_validate import validate, validate_bids @pytest.mark.parametrize( "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"] ) def test_validate_bids_error(bids_error_examples, dataset): - from ..cmd_validate import validate_bids - broken_dataset = os.path.join(bids_error_examples, dataset) with open(os.path.join(broken_dataset, ".ERRORS.json")) as f: expected_errors = json.load(f) @@ -30,8 +29,6 @@ def test_validate_bids_grouping_error(bids_error_examples, dataset="invalid_asl0 multiple errors. """ - from ..cmd_validate import validate_bids - dataset = os.path.join(bids_error_examples, dataset) r = CliRunner().invoke(validate_bids, ["--grouping=path", dataset]) # Does it break? @@ -47,8 +44,6 @@ def test_validate_nwb_path_grouping(simple3_nwb): multiple errors. """ - from ..cmd_validate import validate - r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb]) # Does it pass? print(r.output) @@ -64,8 +59,6 @@ def test_validate_bids_error_grouping_notification( ): """Test user notification for unimplemented parameter value.""" - from ..cmd_validate import validate_bids - broken_dataset = os.path.join(bids_error_examples, dataset) r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset]) # Does it break? From 857d7b8f9026f24894d35c8b874fb2f28c688819 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Tue, 11 Oct 2022 13:44:23 -0400 Subject: [PATCH 114/124] Updated grouping for vanilla validator --- dandi/cli/cmd_validate.py | 4 +++- dandi/pynwb_utils.py | 9 +-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 11fe3ce21..f60ac3d2f 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -75,12 +75,14 @@ def validate_bids( "--grouping", "-g", help="How to group error/warning reporting.", + type=click.Choice(['none', 'path'], case_sensitive=False), + default="none", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @devel_debug_option() @map_to_click_exceptions def validate( - paths, schema=None, devel_debug=False, allow_any_path=False, grouping=None + paths, schema=None, devel_debug=False, allow_any_path=False, grouping="none", ): """Validate files for NWB and DANDI compliance. diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index c2fbf0771..5ed0e90fd 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -395,14 +395,7 @@ def validate( v = None if v is not None and v < semantic_version.Version("2.1.0"): errors_ = errors[:] - errors = [] - for e in errors: - try: - assert e.message - if not re_ok_prior_210.search(e.message): - errors.append(e) - except AssertionError: - errors.append(e) + errors = [e for e in errors if not re_ok_prior_210.search(cast(str, getattr(e, "message", "")))] # This is not an error, just logging about the process, hence logging: if errors != errors_: lgr.debug( From 56973d3df7d330c1ac5be7b47a4a8d5fcaff9d3f Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 17 Oct 2022 08:18:17 -0400 Subject: [PATCH 115/124] Linting fixes --- dandi/cli/tests/test_cmd_validate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 37be9ac25..ac9a2fe60 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -6,6 +6,7 @@ from ..cmd_validate import validate, validate_bids + @pytest.mark.parametrize( "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"] ) @@ -65,7 +66,5 @@ def test_validate_bids_error_grouping_notification( assert r.exit_code == 2 # Does it notify the user correctly? - notification_substring = ( - "Invalid value for '--grouping'" - ) + notification_substring = "Invalid value for '--grouping'" assert notification_substring in r.output From 4eb0c703d93c77fb481087b14fe784dd00440094 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 17 Oct 2022 08:36:01 -0400 Subject: [PATCH 116/124] Importing cast function from typing --- dandi/pynwb_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 5ed0e90fd..f4d809664 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -3,7 +3,7 @@ import os.path as op from pathlib import Path import re -from typing import Any, Dict, List, Tuple, TypeVar, Union +from typing import Any, Dict, List, Tuple, TypeVar, Union, cast import warnings import dandischema @@ -395,7 +395,11 @@ def validate( v = None if v is not None and v < semantic_version.Version("2.1.0"): errors_ = errors[:] - errors = [e for e in errors if not re_ok_prior_210.search(cast(str, getattr(e, "message", "")))] + errors = [ + e + for e in errors + if not re_ok_prior_210.search(cast(str, getattr(e, "message", ""))) + ] # This is not an error, just logging about the process, hence logging: if errors != errors_: lgr.debug( From 404968b8a4fda0c9d182f9f5c1df750a6d1b8a23 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 17 Oct 2022 08:53:38 -0400 Subject: [PATCH 117/124] Type annotations --- dandi/cli/cmd_validate.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index f60ac3d2f..0c264b9af 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -25,7 +25,7 @@ "--grouping", "-g", help="How to group error/warning reporting.", - type=click.Choice(['none', 'path'], case_sensitive=False), + type=click.Choice(["none", "path"], case_sensitive=False), default="none", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @@ -75,14 +75,18 @@ def validate_bids( "--grouping", "-g", help="How to group error/warning reporting.", - type=click.Choice(['none', 'path'], case_sensitive=False), + type=click.Choice(["none", "path"], case_sensitive=False), default="none", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) @devel_debug_option() @map_to_click_exceptions def validate( - paths, schema=None, devel_debug=False, allow_any_path=False, grouping="none", + paths, + schema=None, + devel_debug=False, + allow_any_path=False, + grouping="none", ): """Validate files for NWB and DANDI compliance. @@ -123,6 +127,7 @@ def validate( def _process_issues(validator_result, grouping): from ..validate_types import Severity + issues = [i for i in validator_result if i.severity] purviews = [ @@ -170,7 +175,12 @@ def _get_severity_color(severities): return "blue" -def display_errors(purviews, errors, severities, messages): +def display_errors( + purviews: list[str], + errors: list[str], + severities: list[str], + messages: list[str], +) -> None: """ Unified error display for validation CLI, which auto-resolves grouping logic based on the length of input lists. From 6ac5b999016611c42a4c489a8c72f2163fcdd91c Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 17 Oct 2022 08:59:08 -0400 Subject: [PATCH 118/124] Fixed typing --- dandi/cli/cmd_validate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 0c264b9af..cb209b2cc 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -1,5 +1,6 @@ import logging import os +from typing import List import click @@ -176,10 +177,10 @@ def _get_severity_color(severities): def display_errors( - purviews: list[str], - errors: list[str], - severities: list[str], - messages: list[str], + purviews: List[str], + errors: List[str], + severities: List[str], + messages: List[str], ) -> None: """ Unified error display for validation CLI, which auto-resolves grouping logic based on From abc8e8ded048ad0d72c9c134486b1a718dbd79a1 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 19 Oct 2022 14:38:20 -0400 Subject: [PATCH 119/124] Removed superfluous print call --- dandi/cli/tests/test_cmd_validate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index ac9a2fe60..4298a8bc8 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -47,7 +47,6 @@ def test_validate_nwb_path_grouping(simple3_nwb): r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb]) # Does it pass? - print(r.output) assert r.exit_code == 0 # Does it give required warnings for required path? From f80b5b498592c1852597d2c63985217ab0290d21 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 19 Oct 2022 15:41:04 -0400 Subject: [PATCH 120/124] Fixes proposed by Yaro --- dandi/cli/cmd_validate.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index cb209b2cc..08fe14638 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -1,11 +1,12 @@ import logging import os -from typing import List +from typing import List, cast import click from .base import devel_debug_option, devel_option, map_to_click_exceptions from ..utils import pluralize +from ..validate_types import Severity @click.command() @@ -40,12 +41,6 @@ def validate_bids( ): """Validate BIDS paths. - Parameters - ---------- - - grouping : str, optional - A string which is either "", "error", or "path" — "error" implemented. - Notes ----- Used from bash, eg: @@ -127,7 +122,6 @@ def validate( def _process_issues(validator_result, grouping): - from ..validate_types import Severity issues = [i for i in validator_result if i.severity] @@ -166,7 +160,6 @@ def _process_issues(validator_result, grouping): def _get_severity_color(severities): - from ..validate_types import Severity if Severity.ERROR in severities: return "red" @@ -179,20 +172,13 @@ def _get_severity_color(severities): def display_errors( purviews: List[str], errors: List[str], - severities: List[str], + severities: List[Severity], messages: List[str], ) -> None: """ Unified error display for validation CLI, which auto-resolves grouping logic based on the length of input lists. - Parameters - ---------- - purviews: list of str - errors: list of str - severities: list of dandi.validate_types.Severity - messages: list of str - Notes ----- @@ -201,7 +187,7 @@ def display_errors( by assert if this won't ever be the case. """ - if all(len(i) == 1 for i in [purviews, errors, severities, messages]): + if all(len(cast(list, i)) == 1 for i in [purviews, errors, severities, messages]): fg = _get_severity_color(severities) error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}" click.secho(error_message, fg=fg) From 011b45db992930d9cd13581c2c276ae83660188f Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 19 Oct 2022 17:33:49 -0400 Subject: [PATCH 121/124] Not duplicating code for error subclass --- dandi/files/bases.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 7732732ec..94138c887 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -123,10 +123,6 @@ def get_validation_errors( ) try: DandisetMeta(**meta) - except ValidationError as e: - if devel_debug: - raise - return _pydantic_errors_to_validation_results(e, str(self.filepath)) except Exception as e: if devel_debug: raise From 76ddf068d6c3b3aa01ddfd83969a099d712c7177 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 19 Oct 2022 19:59:38 -0400 Subject: [PATCH 122/124] Orthography --- dandi/cli/cmd_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 08fe14638..bd050e469 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -11,7 +11,7 @@ @click.command() @click.option( - "--schema", help="Validate against new BIDS schema version", metavar="VERSION" + "--schema", help="Validate against new BIDS schema version.", metavar="VERSION" ) @click.option( "--report-path", From 2c7dd6abafb1c5404e2cfc8a2edeb1da3fa1684f Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 27 Oct 2022 02:13:51 -0400 Subject: [PATCH 123/124] We need newer bidsschematools --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 270525efb..7b00e6d8b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ project_urls = python_requires = >=3.7 install_requires = appdirs - bidsschematools ~= 0.4.0 + bidsschematools >= 0.5.0 click click-didyoumean dandischema ~= 0.7.0 From 620510922ee6762aaad3d3a38981d6d59f0a9680 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 27 Oct 2022 16:33:42 -0400 Subject: [PATCH 124/124] Type fix --- dandi/tests/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 448b152ba..ef92d5266 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -262,7 +262,7 @@ def fixture( # were to run `git sparse-checkout` inside the software repo. run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True) run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True) - yield path + yield str(path) return fixture