From 696e7fe109241e7802da3785a341abe17e8b6290 Mon Sep 17 00:00:00 2001
From: "John T. Wodder II" <git@varonathe.org>
Date: Thu, 18 Aug 2022 13:06:07 -0400
Subject: [PATCH 001/124] Define ValidationResult class

---
 dandi/validate.py | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 57d8dd285..41ceb0cca 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
 from pathlib import Path
 from typing import Iterator, List, Optional, Tuple, Union
 
@@ -5,7 +9,34 @@
 
 from .files import find_dandi_files
 
-# TODO: provide our own "errors" records, which would also include warnings etc
+
+@dataclass
+class ValidationResult:
+    origin: ValidationOrigin
+    severity: Severity
+    id: str
+    scope: Scope
+    path: Path
+    message: str
+    dataset_path: Optional[Path]
+    asset_paths: Optional[list[str]] = None
+
+
+@dataclass
+class ValidationOrigin:
+    name: str
+    version: str
+
+
+class Severity(Enum):
+    ERROR = "ERROR"
+    WARNING = "WARNING"
+    HINT = "HINT"
+
+
+class Scope(Enum):
+    FILE = "file"
+    DANDISET = "dandiset"
 
 
 def validate_bids(

From 037dc5eaebf8c2158338f9d96307c7c1eb96f0d9 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 26 Aug 2022 16:52:19 -0400
Subject: [PATCH 002/124] RF: Initial steps to start RFing validate-bids to use
 ValidationResult records

---
 dandi/bids_utils.py       | 46 ++++++---------------------------------
 dandi/cli/cmd_validate.py | 14 +++++++-----
 dandi/files/bids.py       |  1 +
 dandi/validate.py         | 43 ++++++++++++++++++++++++++++++++++--
 setup.cfg                 |  2 +-
 5 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py
index 489599df0..cf09f6ebf 100644
--- a/dandi/bids_utils.py
+++ b/dandi/bids_utils.py
@@ -1,46 +1,14 @@
 from .utils import pluralize
+from .validate import ValidationResult
 
 
-def is_valid(
-    validation_result: dict,
-    allow_invalid_filenames: bool = False,
-    allow_missing_files: bool = False,
-) -> bool:
-    """Determine whether a dataset validation result marks it as valid.
-
-    Parameters
-    ----------
-    validation_result: dict
-        Dictionary as returned by `dandi.support.bids.validator.validate_bids()`.
-    allow_missing_files: bool, optional
-        Whether to consider the dataset invalid if any mandatory files are not present.
-    allow_invalid_filenames: bool, optional
-        Whether to consider the dataset invalid if any filenames inside are invalid.
-
-    Returns
-    -------
-    bool: whether the dataset validation result marks it as valid.
-
-    """
-
-    if allow_invalid_filenames and allow_missing_files:
-        return True
-    missing_files = [
-        i["regex"] for i in validation_result["schema_tracking"] if i["mandatory"]
-    ]
-    invalid_filenames = validation_result["path_tracking"]
-
-    if missing_files and not allow_missing_files:
-        return False
-    if invalid_filenames and not allow_invalid_filenames:
-        return False
-    else:
-        return True
-
-
-def report_errors(
-    validation_result: dict,
+def print_validation_results(
+    validation_result: list[ValidationResult],
+    # TODO: options for control
+    #  - either report warnings, hints, ...
+    #  - either report groupped by severity, record.id
 ) -> None:
+    raise NotImplementedError("TODO: RF to use ValidationResult records")
     import click
 
     missing_files = [
diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 52325a5ec..13776edb4 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -36,19 +36,23 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    from ..bids_utils import is_valid, report_errors
+    from ..bids_utils import print_validation_results
+    from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
 
-    validator_result = validate_bids_(
+    validator_result = validate_bids_(  # Controller
         *paths,
         report=report,
         report_path=report_path,
         schema_version=schema,
     )
-    valid = is_valid(validator_result)
-    report_errors(validator_result)
 
-    if not valid:
+    if validator_result:
+        print_validation_results(validator_result)  # View
+
+    validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]
+
+    if validation_errors:
         raise SystemExit(1)
 
 
diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 9207c98f3..e6996345a 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -66,6 +66,7 @@ def _validate(self) -> None:
                 bids_paths = [str(self.filepath)] + [
                     str(asset.filepath) for asset in self.dataset_files
                 ]
+                # TODO: use RFed data structures, avoid duplicating logic
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[str] = []
                 if len(results["path_listing"]) == len(results["path_tracking"]):
diff --git a/dandi/validate.py b/dandi/validate.py
index 41ceb0cca..c6cb97928 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -44,7 +44,7 @@ def validate_bids(
     schema_version: Optional[str] = None,
     report: bool = False,
     report_path: str = "",
-) -> dict:
+) -> list[ValidationResult]:
     """Validate BIDS paths.
 
     Parameters
@@ -67,6 +67,7 @@ def validate_bids(
         patterns.
     """
 
+    import bidsschematools
     from bidsschematools.validator import validate_bids as validate_bids_
 
     if report and not report_path:
@@ -80,7 +81,45 @@ def validate_bids(
         schema_version=schema_version,
         report_path=report_path,
     )
-    return dict(validation_result)
+    our_validation_result = []
+    origin = ValidationOrigin(
+        name="bidsschematools",
+        version=bidsschematools.__version__,
+    )
+    for path in validation_result["path_tracking"]:
+        our_validation_result.append(
+            ValidationResult(
+                origin=origin,
+                severity=Severity.ERROR,
+                id="BIDS.WRONG_PATH_TODO",
+                scope=Scope.FILE,
+                path=path,
+                message="TODO",
+                # TODO - discover dandiset or actually BIDS dataset
+                # might want separate the two
+                # dandiset_path="TODO",  # might contain multiple datasets
+                # dataset_path="TODO",  # BIDS dataset in this case
+                # asset_paths: Optional[list[str]] = None
+            )
+        )
+
+    for pattern in validation_result["schema_tracking"]:
+        if pattern["mandatory"]:  # TODO: future proof if gets renamed to required
+            our_validation_result.append(
+                ValidationResult(
+                    origin=origin,
+                    severity=Severity.ERROR,
+                    id="BIDS.MANDATORY_FILE_MISSING",  # we decided to generalize, and not have
+                    scope=Scope.FILE,
+                    message="TODO",
+                    # TODO - discover dandiset or actually BIDS dataset
+                    # might want separate the two
+                    # dandiset_path="TODO",  # might contain multiple datasets
+                    # dataset_path="TODO",  # BIDS dataset in this case
+                    # asset_paths: Optional[list[str]] = None
+                )
+            )
+    return our_validation_result
 
 
 def validate(
diff --git a/setup.cfg b/setup.cfg
index 0de477174..270525efb 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ project_urls =
 python_requires = >=3.7
 install_requires =
     appdirs
-    bidsschematools
+    bidsschematools ~= 0.4.0
     click
     click-didyoumean
     dandischema ~= 0.7.0

From 3bc01ba63b02889b6558489a1baa06736d880bf1 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 17:45:39 -0400
Subject: [PATCH 003/124] Improved class usage and added test suite

---
 dandi/tests/fixtures.py      |  3 +++
 dandi/tests/test_validate.py | 28 ++++++++++++++++++++++++++++
 dandi/validate.py            | 34 ++++++++++++++++++++++++++++------
 3 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 142b6fb91..eab945083 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -223,6 +223,9 @@ def fixture() -> Iterator[str]:
 
 nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data")
 bids_examples = get_gitrepo_fixture("https://github.com/dandi/bids-examples")
+bids_error_examples = get_gitrepo_fixture(
+    "https://github.com/bids-standard/bids-error-examples"
+)
 
 LOCAL_DOCKER_DIR = Path(__file__).with_name("data") / "dandiarchive-docker"
 LOCAL_DOCKER_ENV = LOCAL_DOCKER_DIR.name
diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 6d854aec2..a639696b6 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -2,6 +2,7 @@
 import os
 
 import appdirs
+import pytest
 
 
 def test_validate_bids(bids_examples, tmp_path):
@@ -33,3 +34,30 @@ def test_validate_bids(bids_examples, tmp_path):
 
     # Check if a report is being produced.
     assert len(glob(report_path)) == 1
+
+
+@pytest.mark.parametrize(
+    "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"]
+)
+def test_validate_bids_errors(bids_error_examples, dataset):
+    # This only checks that the error we found is correct, not that we found all errors.
+    # ideally make a list and erode etc.
+    import json
+
+    from ..validate import validate_bids
+
+    selected_dataset = os.path.join(bids_error_examples, dataset)
+    validation_result = validate_bids(selected_dataset, report=True)
+    with open(os.path.join(selected_dataset, ".ERRORS.json")) as f:
+        expected_errors = json.load(f)
+    for i in validation_result:
+        error_id = i.id
+        if i.path:
+            error_path = i.path
+            relative_error_path = os.path.relpath(error_path, i.dataset_path)
+            assert (
+                relative_error_path
+                in expected_errors[error_id.lstrip("BIDS.")]["scope"]
+            )
+        else:
+            assert i.id.lstrip("BIDS.") in expected_errors.keys()
diff --git a/dandi/validate.py b/dandi/validate.py
index c6cb97928..68e7e2dcd 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -16,9 +16,9 @@ class ValidationResult:
     severity: Severity
     id: str
     scope: Scope
-    path: Path
     message: str
     dataset_path: Optional[Path]
+    path: Optional[Path] = None
     asset_paths: Optional[list[str]] = None
 
 
@@ -87,14 +87,22 @@ def validate_bids(
         version=bidsschematools.__version__,
     )
     for path in validation_result["path_tracking"]:
+        # Hard-coding exclusion here pending feature + release in:
+        # https://github.com/bids-standard/bids-specification/issues/1272
+        if path.endswith((".ERRORS", ".ERRORS.json")):
+            continue
+        dataset_path = _get_dataset_path(path, paths)
         our_validation_result.append(
             ValidationResult(
+                dataset_path=dataset_path,
                 origin=origin,
                 severity=Severity.ERROR,
-                id="BIDS.WRONG_PATH_TODO",
+                # For schema-integrated error code discussion, see:
+                # https://github.com/bids-standard/bids-specification/issues/1262
+                id="BIDS.NON_BIDS_PATH_PLACEHOLDER",
                 scope=Scope.FILE,
                 path=path,
-                message="TODO",
+                message=f"The `{path}` file does not match any pattern known to BIDS.",
                 # TODO - discover dandiset or actually BIDS dataset
                 # might want separate the two
                 # dandiset_path="TODO",  # might contain multiple datasets
@@ -104,14 +112,21 @@ def validate_bids(
         )
 
     for pattern in validation_result["schema_tracking"]:
-        if pattern["mandatory"]:  # TODO: future proof if gets renamed to required
+        # Future proofing for standard-compliant name.
+        if ("mandatory" in pattern and pattern["mandatory"]) or (
+            "required" in pattern and pattern["required"]
+        ):
+            dataset_path = _get_dataset_path(path, paths)
             our_validation_result.append(
                 ValidationResult(
+                    dataset_path=dataset_path,
                     origin=origin,
                     severity=Severity.ERROR,
-                    id="BIDS.MANDATORY_FILE_MISSING",  # we decided to generalize, and not have
+                    # For schema-integrated error code discussion, see:
+                    # https://github.com/bids-standard/bids-specification/issues/1262
+                    id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
                     scope=Scope.FILE,
-                    message="TODO",
+                    message=f'Missing file matching the following pattern: `{pattern["regex"]}`',
                     # TODO - discover dandiset or actually BIDS dataset
                     # might want separate the two
                     # dandiset_path="TODO",  # might contain multiple datasets
@@ -122,6 +137,13 @@ def validate_bids(
     return our_validation_result
 
 
+def _get_dataset_path(file_path, datasets):
+    # This logic might break with nested datasets.
+    for dataset in datasets:
+        if dataset in file_path:
+            return dataset
+
+
 def validate(
     *paths: str,
     schema_version: Optional[str] = None,

From 43ab1f511074866878249f8e52717f0697cfe5a6 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 20:22:08 -0400
Subject: [PATCH 004/124] Debugging

---
 dandi/files/bids.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index e6996345a..f399a8f38 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -66,11 +66,13 @@ def _validate(self) -> None:
                 bids_paths = [str(self.filepath)] + [
                     str(asset.filepath) for asset in self.dataset_files
                 ]
-                # TODO: use RFed data structures, avoid duplicating logic
+                # TODO gh-943: use RFed data structures, avoid duplicating logic
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[str] = []
-                if len(results["path_listing"]) == len(results["path_tracking"]):
-                    self._dataset_errors.append("No valid BIDS files were found")
+                # TODO gh-943: Should we add valid files to Validation result?
+                # This specific check seems difficult to implement.
+                # if len(results["path_listing"]) == len(results["path_tracking"]):
+                #     self._dataset_errors.append("No valid BIDS files were found")
                 for entry in results["schema_tracking"]:
                     if entry["mandatory"]:
                         self._dataset_errors.append(

From 1f88e4eeb9e4f4a12b268f30cee8ede03c99610a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 20:25:07 -0400
Subject: [PATCH 005/124] Debugging out of place cli invocation error

---
 dandi/tests/test_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index a639696b6..18653df12 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -17,7 +17,7 @@ def test_validate_bids(bids_examples, tmp_path):
 
     # Check if a report is being produced.
     pid = os.getpid()
-    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
+    log_dir = appdirs.user_log_dir("dandi")
     report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
     assert len(glob(report_expression)) == 1
 

From 8e8e4b722084ff39a9b0b794f27b509e92a34fd2 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 20:43:17 -0400
Subject: [PATCH 006/124] Starting to deprecate `print_validation_results()`

---
 dandi/cli/cmd_validate.py            | 10 +++++++---
 dandi/cli/tests/test_cmd_validate.py |  5 +++--
 dandi/validate.py                    |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 13776edb4..5a952f289 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -36,7 +36,7 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    from ..bids_utils import print_validation_results
+    # from ..bids_utils import print_validation_results
     from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
 
@@ -47,8 +47,12 @@ def validate_bids(
         schema_version=schema,
     )
 
-    if validator_result:
-        print_validation_results(validator_result)  # View
+    for i in validator_result:
+        click.secho(
+            i.message,
+            bold=True,
+            fg="red",
+        )
 
     validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]
 
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 228508e08..3ad3a7882 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -7,9 +7,10 @@ def test_validate_bids_error(bids_examples):
     from ..cmd_validate import validate_bids
 
     expected_error = (
-        "Summary: 1 filename pattern required by BIDS could not be found "
-        "and 1 filename did not match any pattern known to BIDS.\n"
+        "File does not match any pattern known to BIDS.\n"
+        "BIDS-required file is not present.\n"
     )
+
     broken_dataset = os.path.join(bids_examples, "invalid_pet001")
     r = CliRunner().invoke(validate_bids, [broken_dataset])
     # Does it break?
diff --git a/dandi/validate.py b/dandi/validate.py
index 68e7e2dcd..65e507867 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -102,7 +102,7 @@ def validate_bids(
                 id="BIDS.NON_BIDS_PATH_PLACEHOLDER",
                 scope=Scope.FILE,
                 path=path,
-                message=f"The `{path}` file does not match any pattern known to BIDS.",
+                message="File does not match any pattern known to BIDS.",
                 # TODO - discover dandiset or actually BIDS dataset
                 # might want separate the two
                 # dandiset_path="TODO",  # might contain multiple datasets
@@ -126,7 +126,7 @@ def validate_bids(
                     # https://github.com/bids-standard/bids-specification/issues/1262
                     id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
                     scope=Scope.FILE,
-                    message=f'Missing file matching the following pattern: `{pattern["regex"]}`',
+                    message="BIDS-required file is not present.",
                     # TODO - discover dandiset or actually BIDS dataset
                     # might want separate the two
                     # dandiset_path="TODO",  # might contain multiple datasets

From 85612152b57de23b1d09cdc0afb0d466c8542de4 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 20:47:32 -0400
Subject: [PATCH 007/124] comment for review

---
 dandi/validate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dandi/validate.py b/dandi/validate.py
index 65e507867..8b6456357 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -18,6 +18,8 @@ class ValidationResult:
     scope: Scope
     message: str
     dataset_path: Optional[Path]
+    # TODO gh-943: should this be relative to `dataset_path`?
+    # would make writing tests with tmp paths a lot easier :3
     path: Optional[Path] = None
     asset_paths: Optional[list[str]] = None
 

From f082382b6fcf8d8dc225f330b054f28f3603e899 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 1 Sep 2022 21:08:32 -0400
Subject: [PATCH 008/124] Incremental deprecation of direct upstream output
 format

---
 dandi/files/bids.py | 33 +++++++++++++++++----------------
 dandi/validate.py   |  2 ++
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index f399a8f38..687741826 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -69,25 +69,26 @@ def _validate(self) -> None:
                 # TODO gh-943: use RFed data structures, avoid duplicating logic
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[str] = []
-                # TODO gh-943: Should we add valid files to Validation result?
-                # This specific check seems difficult to implement.
-                # if len(results["path_listing"]) == len(results["path_tracking"]):
-                #     self._dataset_errors.append("No valid BIDS files were found")
-                for entry in results["schema_tracking"]:
-                    if entry["mandatory"]:
+                self._asset_errors = defaultdict(list)
+                for i in results:
+                    if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER":
+                        bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
+                        self._dataset_errors.append(
+                            f"The `{bids_path}` file was not matched by any regex schema entry."
+                        )
+                        self._asset_errors[bids_path].append(
+                            "File not matched by any regex schema entry"
+                        )
+                    elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER":
                         self._dataset_errors.append(
-                            f"The `{entry['regex']}` regex pattern file"
+                            f"The `{i.regex}` regex pattern file"
                             " required by BIDS was not found."
                         )
-                self._asset_errors = defaultdict(list)
-                for path in results["path_tracking"]:
-                    bids_path = Path(path).relative_to(self.bids_root).as_posix()
-                    self._dataset_errors.append(
-                        f"The `{bids_path}` file was not matched by any regex schema entry."
-                    )
-                    self._asset_errors[bids_path].append(
-                        "File not matched by any regex schema entry"
-                    )
+
+                # TODO gh-943: Should we add valid files to Validation result?
+                # The following checks seem difficult to implement.
+                if len(results["path_listing"]) == len(results["path_tracking"]):
+                    self._dataset_errors.append("No valid BIDS files were found")
                 self._asset_metadata = defaultdict(dict)
                 for meta in results["match_listing"]:
                     bids_path = (
diff --git a/dandi/validate.py b/dandi/validate.py
index 8b6456357..87fe680ec 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -21,6 +21,7 @@ class ValidationResult:
     # TODO gh-943: should this be relative to `dataset_path`?
     # would make writing tests with tmp paths a lot easier :3
     path: Optional[Path] = None
+    regex: Optional[str] = None
     asset_paths: Optional[list[str]] = None
 
 
@@ -128,6 +129,7 @@ def validate_bids(
                     # https://github.com/bids-standard/bids-specification/issues/1262
                     id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
                     scope=Scope.FILE,
+                    regex=pattern["regex"],
                     message="BIDS-required file is not present.",
                     # TODO - discover dandiset or actually BIDS dataset
                     # might want separate the two

From dd77af20745ac133bc7a5044ffeeca402b3787d5 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 2 Sep 2022 19:13:03 -0400
Subject: [PATCH 009/124] Review comments

---
 dandi/files/bids.py | 4 +++-
 dandi/validate.py   | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 687741826..25168c55e 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -70,6 +70,7 @@ def _validate(self) -> None:
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[str] = []
                 self._asset_errors = defaultdict(list)
+                # TODO gh-943: rename regex maybe and make sure you use messages from class.
                 for i in results:
                     if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER":
                         bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
@@ -81,12 +82,13 @@ def _validate(self) -> None:
                         )
                     elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER":
                         self._dataset_errors.append(
-                            f"The `{i.regex}` regex pattern file"
+                            f"The `{i.path_regex}` regex pattern file"
                             " required by BIDS was not found."
                         )
 
                 # TODO gh-943: Should we add valid files to Validation result?
                 # The following checks seem difficult to implement.
+                # Reimplement this at the object level, Validator Result does not have to be one per file.
                 if len(results["path_listing"]) == len(results["path_tracking"]):
                     self._dataset_errors.append("No valid BIDS files were found")
                 self._asset_metadata = defaultdict(dict)
diff --git a/dandi/validate.py b/dandi/validate.py
index 87fe680ec..7286aa1ab 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -18,10 +18,13 @@ class ValidationResult:
     scope: Scope
     message: str
     dataset_path: Optional[Path]
+    # TODO gh-943: add dandiset_path as attribute (optional).
+    dataset_path: Optional[Path]
     # TODO gh-943: should this be relative to `dataset_path`?
     # would make writing tests with tmp paths a lot easier :3
+    # keep it absolute!!!
     path: Optional[Path] = None
-    regex: Optional[str] = None
+    path_regex: Optional[str] = None
     asset_paths: Optional[list[str]] = None
 
 
@@ -129,7 +132,7 @@ def validate_bids(
                     # https://github.com/bids-standard/bids-specification/issues/1262
                     id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
                     scope=Scope.FILE,
-                    regex=pattern["regex"],
+                    path_regex=pattern["regex"],
                     message="BIDS-required file is not present.",
                     # TODO - discover dandiset or actually BIDS dataset
                     # might want separate the two

From aefb337f15fd6a52799d6b7896fa7b957b3b7e36 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 6 Sep 2022 13:35:12 -0400
Subject: [PATCH 010/124] Fixed test log path

---
 dandi/tests/test_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 18653df12..302c23ec8 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -17,7 +17,7 @@ def test_validate_bids(bids_examples, tmp_path):
 
     # Check if a report is being produced.
     pid = os.getpid()
-    log_dir = appdirs.user_log_dir("dandi")
+    log_dir = appdirs.user_log_dir("dandi-cli")
     report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
     assert len(glob(report_expression)) == 1
 

From 1c8ddd3ff7acda416c616f15ceaaf33cd991c18d Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 10:39:27 -0400
Subject: [PATCH 011/124] BIDS root detection

---
 dandi/validate.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 7286aa1ab..28dd9e6cf 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -97,7 +97,7 @@ def validate_bids(
         # https://github.com/bids-standard/bids-specification/issues/1272
         if path.endswith((".ERRORS", ".ERRORS.json")):
             continue
-        dataset_path = _get_dataset_path(path, paths)
+        dataset_path = _get_set_path(path, "dataset_description.json")
         our_validation_result.append(
             ValidationResult(
                 dataset_path=dataset_path,
@@ -122,7 +122,9 @@ def validate_bids(
         if ("mandatory" in pattern and pattern["mandatory"]) or (
             "required" in pattern and pattern["required"]
         ):
-            dataset_path = _get_dataset_path(path, paths)
+            # We don't have a path for this so we'll need some external logic to make sure
+            # that the dataset path is populated.
+            # dataset_path = _get_dataset_path(path, paths)
             our_validation_result.append(
                 ValidationResult(
                     dataset_path=dataset_path,
@@ -144,11 +146,27 @@ def validate_bids(
     return our_validation_result
 
 
-def _get_dataset_path(file_path, datasets):
-    # This logic might break with nested datasets.
-    for dataset in datasets:
-        if dataset in file_path:
-            return dataset
+def _get_set_path(in_path, marker):
+    """Get the path to the root of a file set (e.g. BIDS dataset or DANDIset).
+
+    Parameters
+    ----------
+    in_path : str, optional
+        File for which to determine set path.
+    marker : str, optional
+        Filename marker which identifies the set root.
+    """
+    import os
+
+    candidate = os.path.join(in_path, marker)
+    # Windows support... otherwise we could do `if in_path == "/"`.
+    if in_path == "/" or not any(i in in_path for i in ["/", "\\"]):
+        return None
+    if os.path.isfile(candidate):
+        return candidate
+    else:
+        level_up = os.path.dirname(in_path.rstrip("/\\"))
+        return _get_set_path(level_up, marker)
 
 
 def validate(

From cb6d4944a1775067a11fac5c6b7456fb6ac5eccd Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 12:04:25 -0400
Subject: [PATCH 012/124] Assigning dandiset and (BIDS) dataset path

---
 dandi/validate.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 28dd9e6cf..1c358efd5 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -17,12 +17,12 @@ class ValidationResult:
     id: str
     scope: Scope
     message: str
-    dataset_path: Optional[Path]
     # TODO gh-943: add dandiset_path as attribute (optional).
-    dataset_path: Optional[Path]
     # TODO gh-943: should this be relative to `dataset_path`?
     # would make writing tests with tmp paths a lot easier :3
     # keep it absolute!!!
+    dandiset_path: Optional[Path] = None
+    dataset_path: Optional[Path] = None
     path: Optional[Path] = None
     path_regex: Optional[str] = None
     asset_paths: Optional[list[str]] = None
@@ -98,9 +98,11 @@ def validate_bids(
         if path.endswith((".ERRORS", ".ERRORS.json")):
             continue
         dataset_path = _get_set_path(path, "dataset_description.json")
+        dandiset_path = _get_set_path(path, "dandiset.yaml")
         our_validation_result.append(
             ValidationResult(
                 dataset_path=dataset_path,
+                dandiset_path=dandiset_path,
                 origin=origin,
                 severity=Severity.ERROR,
                 # For schema-integrated error code discussion, see:
@@ -127,7 +129,7 @@ def validate_bids(
             # dataset_path = _get_dataset_path(path, paths)
             our_validation_result.append(
                 ValidationResult(
-                    dataset_path=dataset_path,
+                    #dataset_path=dataset_path,
                     origin=origin,
                     severity=Severity.ERROR,
                     # For schema-integrated error code discussion, see:
@@ -163,7 +165,7 @@ def _get_set_path(in_path, marker):
     if in_path == "/" or not any(i in in_path for i in ["/", "\\"]):
         return None
     if os.path.isfile(candidate):
-        return candidate
+        return os.path.dirname(candidate)
     else:
         level_up = os.path.dirname(in_path.rstrip("/\\"))
         return _get_set_path(level_up, marker)

From e2b21ed37b805d7596368c2e2181707d762608da Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 12:05:48 -0400
Subject: [PATCH 013/124] removed comment lines

---
 dandi/validate.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 1c358efd5..7df73cb3e 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -101,8 +101,6 @@ def validate_bids(
         dandiset_path = _get_set_path(path, "dandiset.yaml")
         our_validation_result.append(
             ValidationResult(
-                dataset_path=dataset_path,
-                dandiset_path=dandiset_path,
                 origin=origin,
                 severity=Severity.ERROR,
                 # For schema-integrated error code discussion, see:
@@ -113,9 +111,9 @@ def validate_bids(
                 message="File does not match any pattern known to BIDS.",
                 # TODO - discover dandiset or actually BIDS dataset
                 # might want separate the two
-                # dandiset_path="TODO",  # might contain multiple datasets
-                # dataset_path="TODO",  # BIDS dataset in this case
                 # asset_paths: Optional[list[str]] = None
+                dataset_path=dataset_path,
+                dandiset_path=dandiset_path,
             )
         )
 
@@ -129,7 +127,6 @@ def validate_bids(
             # dataset_path = _get_dataset_path(path, paths)
             our_validation_result.append(
                 ValidationResult(
-                    #dataset_path=dataset_path,
                     origin=origin,
                     severity=Severity.ERROR,
                     # For schema-integrated error code discussion, see:
@@ -140,9 +137,9 @@ def validate_bids(
                     message="BIDS-required file is not present.",
                     # TODO - discover dandiset or actually BIDS dataset
                     # might want separate the two
-                    # dandiset_path="TODO",  # might contain multiple datasets
-                    # dataset_path="TODO",  # BIDS dataset in this case
                     # asset_paths: Optional[list[str]] = None
+                    #dataset_path=dataset_path,
+                    #dandiset_path=dandiset_path,
                 )
             )
     return our_validation_result

From 8af720a42f08e8d6c855cfcbce9a506fc563e327 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 16:26:38 -0400
Subject: [PATCH 014/124] Integrated metadata recording

---
 dandi/files/bids.py          | 51 +++++++++----------------------
 dandi/tests/test_validate.py |  2 ++
 dandi/validate.py            | 59 ++++++++++++++++++++++--------------
 3 files changed, 53 insertions(+), 59 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 25168c55e..20cc5ffd9 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -15,10 +15,12 @@
 from ..metadata import add_common_metadata, prepare_metadata
 from ..misctypes import Digest
 
-BIDS_TO_DANDI = {
-    "subject": "subject_id",
-    "session": "session_id",
-}
+BIDS_ASSET_ERRORS = [
+    "BIDS.NON_BIDS_PATH_PLACEHOLDER",
+    ]
+BIDS_DATASET_ERRORS = [
+    "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
+    ]
 
 
 @dataclass
@@ -66,43 +68,18 @@ def _validate(self) -> None:
                 bids_paths = [str(self.filepath)] + [
                     str(asset.filepath) for asset in self.dataset_files
                 ]
-                # TODO gh-943: use RFed data structures, avoid duplicating logic
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[str] = []
                 self._asset_errors = defaultdict(list)
-                # TODO gh-943: rename regex maybe and make sure you use messages from class.
-                for i in results:
-                    if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER":
-                        bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
-                        self._dataset_errors.append(
-                            f"The `{bids_path}` file was not matched by any regex schema entry."
-                        )
-                        self._asset_errors[bids_path].append(
-                            "File not matched by any regex schema entry"
-                        )
-                    elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER":
-                        self._dataset_errors.append(
-                            f"The `{i.path_regex}` regex pattern file"
-                            " required by BIDS was not found."
-                        )
-
-                # TODO gh-943: Should we add valid files to Validation result?
-                # The following checks seem difficult to implement.
-                # Reimplement this at the object level, Validator Result does not have to be one per file.
-                if len(results["path_listing"]) == len(results["path_tracking"]):
-                    self._dataset_errors.append("No valid BIDS files were found")
                 self._asset_metadata = defaultdict(dict)
-                for meta in results["match_listing"]:
-                    bids_path = (
-                        Path(meta.pop("path")).relative_to(self.bids_root).as_posix()
-                    )
-                    meta = {
-                        BIDS_TO_DANDI[k]: v
-                        for k, v in meta.items()
-                        if k in BIDS_TO_DANDI
-                    }
-                    # meta["bids_schema_version"] = results["bids_schema_version"]
-                    self._asset_metadata[bids_path] = prepare_metadata(meta)
+                for i in results:
+                    if i.id in BIDS_ASSET_ERRORS:
+                        self._asset_errors[i.path].append(i.message)
+                    elif i.id in BIDS_DATASET_ERRORS:
+                        self._dataset_errors.append(i.message)
+                    elif i.id == "BIDS.MATCH":
+                        bids_path = Path(i.path.relative_to(self.bids_root).as_posix())
+                        self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
         """:meta private:"""
diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 302c23ec8..f0cdc1a80 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -51,6 +51,8 @@ def test_validate_bids_errors(bids_error_examples, dataset):
     with open(os.path.join(selected_dataset, ".ERRORS.json")) as f:
         expected_errors = json.load(f)
     for i in validation_result:
+        if i.id == "BIDS.MATCH":
+            continue
         error_id = i.id
         if i.path:
             error_path = i.path
diff --git a/dandi/validate.py b/dandi/validate.py
index 7df73cb3e..7dbc1d0a4 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -10,22 +10,25 @@
 from .files import find_dandi_files
 
 
+BIDS_TO_DANDI = {
+    "subject": "subject_id",
+    "session": "session_id",
+}
+
+
 @dataclass
 class ValidationResult:
-    origin: ValidationOrigin
-    severity: Severity
     id: str
+    origin: ValidationOrigin
     scope: Scope
-    message: str
-    # TODO gh-943: add dandiset_path as attribute (optional).
-    # TODO gh-943: should this be relative to `dataset_path`?
-    # would make writing tests with tmp paths a lot easier :3
-    # keep it absolute!!!
+    asset_paths: Optional[list[str]] = None
     dandiset_path: Optional[Path] = None
     dataset_path: Optional[Path] = None
+    message: Optional[str] = ""
+    metadata: Optional[dict] = None
     path: Optional[Path] = None
-    path_regex: Optional[str] = None
-    asset_paths: Optional[list[str]] = None
+    path_regex: Optional[str] = ""
+    severity: Optional[Severity] = None
 
 
 @dataclass
@@ -43,6 +46,7 @@ class Severity(Enum):
 class Scope(Enum):
     FILE = "file"
     DANDISET = "dandiset"
+    DATASET = "dataset"
 
 
 def validate_bids(
@@ -71,6 +75,11 @@ def validate_bids(
     dict
         Dictionary reporting required patterns not found and existing filenames not matching any
         patterns.
+
+    Notes
+    -----
+    * Eventually this should be migrated to BIDS schema specified errors, see discussion here:
+        https://github.com/bids-standard/bids-specification/issues/1262
     """
 
     import bidsschematools
@@ -92,6 +101,7 @@ def validate_bids(
         name="bidsschematools",
         version=bidsschematools.__version__,
     )
+
     for path in validation_result["path_tracking"]:
         # Hard-coding exclusion here pending feature + release in:
         # https://github.com/bids-standard/bids-specification/issues/1272
@@ -103,15 +113,10 @@ def validate_bids(
             ValidationResult(
                 origin=origin,
                 severity=Severity.ERROR,
-                # For schema-integrated error code discussion, see:
-                # https://github.com/bids-standard/bids-specification/issues/1262
                 id="BIDS.NON_BIDS_PATH_PLACEHOLDER",
                 scope=Scope.FILE,
                 path=path,
                 message="File does not match any pattern known to BIDS.",
-                # TODO - discover dandiset or actually BIDS dataset
-                # might want separate the two
-                # asset_paths: Optional[list[str]] = None
                 dataset_path=dataset_path,
                 dandiset_path=dandiset_path,
             )
@@ -129,19 +134,29 @@ def validate_bids(
                 ValidationResult(
                     origin=origin,
                     severity=Severity.ERROR,
-                    # For schema-integrated error code discussion, see:
-                    # https://github.com/bids-standard/bids-specification/issues/1262
                     id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
-                    scope=Scope.FILE,
+                    scope=Scope.DATASET,
                     path_regex=pattern["regex"],
                     message="BIDS-required file is not present.",
-                    # TODO - discover dandiset or actually BIDS dataset
-                    # might want separate the two
-                    # asset_paths: Optional[list[str]] = None
-                    #dataset_path=dataset_path,
-                    #dandiset_path=dandiset_path,
                 )
             )
+    for meta in validation_result["match_listing"]:
+        file_path = meta.pop("path")
+        meta = {
+            BIDS_TO_DANDI[k]: v
+            for k, v in meta.items()
+            if k in BIDS_TO_DANDI
+        }
+        our_validation_result.append(
+            ValidationResult(
+                origin=origin,
+                id="BIDS.MATCH",
+                scope=Scope.FILE,
+                path=file_path,
+                metadata=meta,
+            )
+        )
+
     return our_validation_result
 
 

From ea4e7907e222b2c937a0f567af781587a267e677 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 17:13:11 -0400
Subject: [PATCH 015/124] Processing input path type

---
 dandi/files/bids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 20cc5ffd9..6ffc45bab 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -78,7 +78,7 @@ def _validate(self) -> None:
                     elif i.id in BIDS_DATASET_ERRORS:
                         self._dataset_errors.append(i.message)
                     elif i.id == "BIDS.MATCH":
-                        bids_path = Path(i.path.relative_to(self.bids_root).as_posix())
+                        bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
                         self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:

From db175a2f1471358a29cce54ad8723e96683b1c8e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 7 Sep 2022 17:14:57 -0400
Subject: [PATCH 016/124] Indentation fix for lint check

---
 dandi/files/bids.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 6ffc45bab..d7b9aabaa 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -17,10 +17,10 @@
 
 BIDS_ASSET_ERRORS = [
     "BIDS.NON_BIDS_PATH_PLACEHOLDER",
-    ]
+]
 BIDS_DATASET_ERRORS = [
     "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
-    ]
+]
 
 
 @dataclass

From a90c99a7df68b9eb34e73f98d75483b645e2a12e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 13:16:57 -0400
Subject: [PATCH 017/124] Printing output based on severity

---
 dandi/cli/cmd_validate.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 5a952f289..92c4b3d3b 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -48,11 +48,18 @@ def validate_bids(
     )
 
     for i in validator_result:
-        click.secho(
-            i.message,
-            bold=True,
-            fg="red",
-        )
+        if i.severity == Severity.ERROR:
+            click.secho(
+                i.message,
+                bold=True,
+                fg="red",
+            )
+        if i.severity == Severity.WARNING:
+            click.secho(
+                i.message,
+                bold=True,
+                fg="orange",
+            )
 
     validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]
 

From 3304b45a8249fe464cee0373d32a1e0158f00a2c Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 14:17:23 -0400
Subject: [PATCH 018/124] =?UTF-8?q?Message=20attribute=20is=20empty=20stri?=
 =?UTF-8?q?ng=20when=20nonexistent=20=E2=80=94=20not=20none?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dandi/validate.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 7dbc1d0a4..9a20ef763 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -9,7 +9,6 @@
 
 from .files import find_dandi_files
 
-
 BIDS_TO_DANDI = {
     "subject": "subject_id",
     "session": "session_id",
@@ -24,7 +23,7 @@ class ValidationResult:
     asset_paths: Optional[list[str]] = None
     dandiset_path: Optional[Path] = None
     dataset_path: Optional[Path] = None
-    message: Optional[str] = ""
+    message: str = ""
     metadata: Optional[dict] = None
     path: Optional[Path] = None
     path_regex: Optional[str] = ""
@@ -142,11 +141,7 @@ def validate_bids(
             )
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
-        meta = {
-            BIDS_TO_DANDI[k]: v
-            for k, v in meta.items()
-            if k in BIDS_TO_DANDI
-        }
+        meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
         our_validation_result.append(
             ValidationResult(
                 origin=origin,

From 2a9377e3ce8d74e94d28264c06a150401ddc61f3 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 14:32:07 -0400
Subject: [PATCH 019/124] Force posix path for windows tests

---
 dandi/tests/test_validate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index f0cdc1a80..992022013 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -43,6 +43,7 @@ def test_validate_bids_errors(bids_error_examples, dataset):
     # This only checks that the error we found is correct, not that we found all errors.
     # ideally make a list and erode etc.
     import json
+    import pathlib
 
     from ..validate import validate_bids
 
@@ -57,6 +58,7 @@ def test_validate_bids_errors(bids_error_examples, dataset):
         if i.path:
             error_path = i.path
             relative_error_path = os.path.relpath(error_path, i.dataset_path)
+            relative_error_path = pathlib.Path(relative_error_path).as_posix()
             assert (
                 relative_error_path
                 in expected_errors[error_id.lstrip("BIDS.")]["scope"]

From 6a0875bea99e98fda435a8075c1789d5869ed00d Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 15:37:51 -0400
Subject: [PATCH 020/124] Debugging windows tests

---
 dandi/tests/test_validate.py | 3 ++-
 dandi/validate.py            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 992022013..e445ea350 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -17,7 +17,8 @@ def test_validate_bids(bids_examples, tmp_path):
 
     # Check if a report is being produced.
     pid = os.getpid()
-    log_dir = appdirs.user_log_dir("dandi-cli")
+    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
+    # appdirs.user_log_dir("dandi-cli")
     report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
     assert len(glob(report_expression)) == 1
 
diff --git a/dandi/validate.py b/dandi/validate.py
index 9a20ef763..dd3aa214f 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -90,6 +90,7 @@ def validate_bids(
         report_path = report_path.format(
             log_dir=log_dir,
         )
+
     validation_result = validate_bids_(
         paths,
         schema_version=schema_version,

From 4f248b966abb2b3d72f5ab352b2d7195cb3fa7e5 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 16:53:08 -0400
Subject: [PATCH 021/124] Typing fixes

---
 dandi/files/bids.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index d7b9aabaa..36d1b834f 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -74,12 +74,14 @@ def _validate(self) -> None:
                 self._asset_metadata = defaultdict(dict)
                 for i in results:
                     if i.id in BIDS_ASSET_ERRORS:
-                        self._asset_errors[i.path].append(i.message)
+                        self._asset_errors[str(i.path)].append(i.message)
                     elif i.id in BIDS_DATASET_ERRORS:
                         self._dataset_errors.append(i.message)
                     elif i.id == "BIDS.MATCH":
                         bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
-                        self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
+                        self._asset_metadata[bids_path] = prepare_metadata(
+                            dict(i.metadata)
+                        )
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
         """:meta private:"""

From a564d0d78287559fd42d5707b189040ab15ff58a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 8 Sep 2022 17:00:53 -0400
Subject: [PATCH 022/124] Trying more typing fixes

---
 dandi/files/bids.py | 4 +---
 dandi/validate.py   | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 36d1b834f..0be69df59 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -79,9 +79,7 @@ def _validate(self) -> None:
                         self._dataset_errors.append(i.message)
                     elif i.id == "BIDS.MATCH":
                         bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
-                        self._asset_metadata[bids_path] = prepare_metadata(
-                            dict(i.metadata)
-                        )
+                        self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
         """:meta private:"""
diff --git a/dandi/validate.py b/dandi/validate.py
index dd3aa214f..d716d8e45 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -115,7 +115,7 @@ def validate_bids(
                 severity=Severity.ERROR,
                 id="BIDS.NON_BIDS_PATH_PLACEHOLDER",
                 scope=Scope.FILE,
-                path=path,
+                path=Path(path),
                 message="File does not match any pattern known to BIDS.",
                 dataset_path=dataset_path,
                 dandiset_path=dandiset_path,

From cc57bfc7c5d56ce9ed74ad68a225e18931bac76b Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 9 Sep 2022 15:49:20 -0400
Subject: [PATCH 023/124] Debugging typing

---
 dandi/files/bids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 0be69df59..9fce160bc 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -78,7 +78,7 @@ def _validate(self) -> None:
                     elif i.id in BIDS_DATASET_ERRORS:
                         self._dataset_errors.append(i.message)
                     elif i.id == "BIDS.MATCH":
-                        bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
+                        bids_path = i.path.relative_to(self.bids_root).as_posix()
                         self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:

From 15e1cfba2e7a2c8bb91c69d733a1a4a973c5dc7c Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 9 Sep 2022 05:45:57 -0400
Subject: [PATCH 024/124] Removed unneeded module

---
 dandi/bids_utils.py | 45 ---------------------------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 dandi/bids_utils.py

diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py
deleted file mode 100644
index cf09f6ebf..000000000
--- a/dandi/bids_utils.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from .utils import pluralize
-from .validate import ValidationResult
-
-
-def print_validation_results(
-    validation_result: list[ValidationResult],
-    # TODO: options for control
-    #  - either report warnings, hints, ...
-    #  - either report groupped by severity, record.id
-) -> None:
-    raise NotImplementedError("TODO: RF to use ValidationResult records")
-    import click
-
-    missing_files = [
-        pattern["regex"]
-        for pattern in validation_result["schema_tracking"]
-        if pattern["mandatory"]
-    ]
-    error_list = []
-    if missing_files:
-        error_substring = (
-            f"{pluralize(len(missing_files), 'filename pattern')} required "
-            "by BIDS could not be found"
-        )
-        error_list.append(error_substring)
-    if validation_result["path_tracking"]:
-        error_substring = (
-            f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
-            "did not match any pattern known to BIDS"
-        )
-        error_list.append(error_substring)
-    if error_list:
-        error_string = " and ".join(error_list)
-        error_string = f"Summary: {error_string}."
-        click.secho(
-            error_string,
-            bold=True,
-            fg="red",
-        )
-    else:
-        click.secho(
-            "All filenames are BIDS-valid and no mandatory files are missing.",
-            bold=True,
-            fg="green",
-        )

From 89a070a7e375b263bb1c8b079d4d4cae59789074 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 9 Sep 2022 16:01:09 -0400
Subject: [PATCH 025/124] Use assert to ensure that Optional becomes
 non-optional for type checking

also used more descriptive "result" variable instead of "i". We contemplated some other
name (like vrs - for "validation results") but it was not liked
---
 dandi/files/bids.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 9fce160bc..1a174720c 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -72,14 +72,19 @@ def _validate(self) -> None:
                 self._dataset_errors: list[str] = []
                 self._asset_errors = defaultdict(list)
                 self._asset_metadata = defaultdict(dict)
-                for i in results:
-                    if i.id in BIDS_ASSET_ERRORS:
-                        self._asset_errors[str(i.path)].append(i.message)
-                    elif i.id in BIDS_DATASET_ERRORS:
-                        self._dataset_errors.append(i.message)
-                    elif i.id == "BIDS.MATCH":
-                        bids_path = i.path.relative_to(self.bids_root).as_posix()
-                        self._asset_metadata[bids_path] = prepare_metadata(i.metadata)
+                for result in results:
+                    if result.id in BIDS_ASSET_ERRORS:
+                        assert result.path
+                        self._asset_errors[str(result.path)].append(result.message)
+                    elif result.id in BIDS_DATASET_ERRORS:
+                        self._dataset_errors.append(result.message)
+                    elif result.id == "BIDS.MATCH":
+                        assert result.path
+                        bids_path = result.path.relative_to(self.bids_root).as_posix()
+                        assert result.metadata
+                        self._asset_metadata[bids_path] = prepare_metadata(
+                            result.metadata
+                        )
 
     def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
         """:meta private:"""

From fe238b66428026c3671a30f1664f0bc4b5b00a05 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 09:31:31 -0400
Subject: [PATCH 026/124] Further corrected type

---
 dandi/validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index d716d8e45..f045a1a3f 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -148,7 +148,7 @@ def validate_bids(
                 origin=origin,
                 id="BIDS.MATCH",
                 scope=Scope.FILE,
-                path=file_path,
+                path=Path(file_path),
                 metadata=meta,
             )
         )

From 25194eef54630efac6619f7d8c88eb56fe9a87ab Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 09:56:19 -0400
Subject: [PATCH 027/124] Filtered download for valid testdata

---
 dandi/tests/fixtures.py | 49 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index eab945083..872533715 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -39,6 +39,18 @@
 lgr = get_logger()
 
 
+BIDS_TESTDATA_SELECTION = [
+    "asl003",
+    "eeg_cbm",
+    "hcp_example_bids",
+    "micr_SEMzarr",
+    "micr_SPIM",
+    "pet003",
+    "qmri_tb1tfl",
+    "qmri_vfa",
+]
+
+
 def copytree(src, dst, symlinks=False, ignore=None):
     """Function mimicking `shutil.copytree()` behaviour but supporting existing target
     directories.
@@ -221,8 +233,43 @@ def fixture() -> Iterator[str]:
     return fixture
 
 
+def get_filtered_gitrepo_fixture(
+    url: str,
+    whitelist: Iterator[str],
+) -> Callable[[], Iterator[str]]:
+    @pytest.fixture(scope="session")
+    def fixture():
+        with tempfile.TemporaryDirectory() as path:
+            lgr.debug("Cloning %r into %r", url, path)
+            runout = run(
+                [
+                    "git",
+                    "clone",
+                    "--depth=1",
+                    "--filter=blob:none",
+                    "--sparse",
+                    url,
+                    path,
+                ],
+                capture_output=True,
+            )
+            if runout.returncode:
+                raise RuntimeError(f"Failed to clone {url} into {path}")
+            # cwd specification is VERY important, not only to achieve the correct
+            # effects, but also to avoid dropping files from your repository if you
+            # were to run `git sparse-checkout` inside the software repo.
+            _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
+            _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
+            yield path
+
+    return fixture
+
+
 nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data")
-bids_examples = get_gitrepo_fixture("https://github.com/dandi/bids-examples")
+bids_examples = get_filtered_gitrepo_fixture(
+    "https://github.com/dandi/bids-examples",
+    BIDS_TESTDATA_SELECTION,
+)
 bids_error_examples = get_gitrepo_fixture(
     "https://github.com/bids-standard/bids-error-examples"
 )

From e630fb86498fed5e44884a68574e0b306ba0e8c8 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 09:59:31 -0400
Subject: [PATCH 028/124] Type correction

---
 dandi/tests/fixtures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 872533715..bbb3fa279 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -235,7 +235,7 @@ def fixture() -> Iterator[str]:
 
 def get_filtered_gitrepo_fixture(
     url: str,
-    whitelist: Iterator[str],
+    whitelist: List[str],
 ) -> Callable[[], Iterator[str]]:
     @pytest.fixture(scope="session")
     def fixture():

From 72f47a0ba0266e32a6a19c3f6d57d9428ae8324b Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 10:14:14 -0400
Subject: [PATCH 029/124] Update error tests

---
 dandi/cli/tests/test_cmd_validate.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 3ad3a7882..fd448f6c2 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -3,15 +3,12 @@
 from click.testing import CliRunner
 
 
-def test_validate_bids_error(bids_examples):
+def test_validate_bids_error(bids_error_examples):
     from ..cmd_validate import validate_bids
 
-    expected_error = (
-        "File does not match any pattern known to BIDS.\n"
-        "BIDS-required file is not present.\n"
-    )
+    expected_error = "File does not match any pattern known to BIDS.\n"
 
-    broken_dataset = os.path.join(bids_examples, "invalid_pet001")
+    broken_dataset = os.path.join(bids_error_examples, "invalid_pet001")
     r = CliRunner().invoke(validate_bids, [broken_dataset])
     # Does it break?
     assert r.exit_code == 1

From b51e92e5b689fd26bbef12c1e75ceb1010770def Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 10:27:26 -0400
Subject: [PATCH 030/124] fixing windows fixture removal error

---
 dandi/tests/fixtures.py | 50 +++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index bbb3fa279..f298f9d6c 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -239,28 +239,34 @@ def get_filtered_gitrepo_fixture(
 ) -> Callable[[], Iterator[str]]:
     @pytest.fixture(scope="session")
     def fixture():
-        with tempfile.TemporaryDirectory() as path:
-            lgr.debug("Cloning %r into %r", url, path)
-            runout = run(
-                [
-                    "git",
-                    "clone",
-                    "--depth=1",
-                    "--filter=blob:none",
-                    "--sparse",
-                    url,
-                    path,
-                ],
-                capture_output=True,
-            )
-            if runout.returncode:
-                raise RuntimeError(f"Failed to clone {url} into {path}")
-            # cwd specification is VERY important, not only to achieve the correct
-            # effects, but also to avoid dropping files from your repository if you
-            # were to run `git sparse-checkout` inside the software repo.
-            _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
-            _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
-            yield path
+        try:
+            with tempfile.TemporaryDirectory() as path:
+                lgr.debug("Cloning %r into %r", url, path)
+                runout = run(
+                    [
+                        "git",
+                        "clone",
+                        "--depth=1",
+                        "--filter=blob:none",
+                        "--sparse",
+                        url,
+                        path,
+                    ],
+                    capture_output=True,
+                )
+                if runout.returncode:
+                    raise RuntimeError(f"Failed to clone {url} into {path}")
+                # cwd specification is VERY important, not only to achieve the correct
+                # effects, but also to avoid dropping files from your repository if you
+                # were to run `git sparse-checkout` inside the software repo.
+                _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
+                _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
+                yield path
+        finally:
+            try:
+                shutil.rmtree(path)
+            except BaseException as exc:
+                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 

From 94a17931934f645aca2ebfe5a3d85e73c2fe7b02 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 12:36:17 -0400
Subject: [PATCH 031/124] Debug assertion error fix

---
 dandi/files/bids.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 1a174720c..cb1ff8ce3 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -81,7 +81,13 @@ def _validate(self) -> None:
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
-                        assert result.metadata
+                        try:
+                            assert result.metadata
+                        except AssertionError:
+                            print(result)
+                            print(result)
+                            print(result.path)
+                            print(result.metadata)
                         self._asset_metadata[bids_path] = prepare_metadata(
                             result.metadata
                         )

From ec59dad6e9096210da6b145696868d0abd0aa3b8 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 12 Sep 2022 13:22:49 -0400
Subject: [PATCH 032/124] Using error fixture for error tests

---
 dandi/tests/fixtures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index f298f9d6c..691cbce3b 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -506,7 +506,7 @@ def bids_dandiset_invalid(
     new_dandiset: SampleDandiset, bids_examples: str
 ) -> SampleDandiset:
     copytree(
-        os.path.join(bids_examples, "invalid_pet001"),
+        os.path.join(bids_error_examples, "invalid_pet001"),
         str(new_dandiset.dspath) + "/",
     )
     return new_dandiset

From 43e17e95b857c51165ff47297cd1ae7df006e3ac Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 13 Sep 2022 00:22:48 -0400
Subject: [PATCH 033/124] Using correct fixture

---
 dandi/tests/fixtures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 691cbce3b..437d103a4 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -503,7 +503,7 @@ def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDan
 
 @pytest.fixture()
 def bids_dandiset_invalid(
-    new_dandiset: SampleDandiset, bids_examples: str
+    new_dandiset: SampleDandiset, bids_error_examples: str
 ) -> SampleDandiset:
     copytree(
         os.path.join(bids_error_examples, "invalid_pet001"),

From 4f2e4aef061e3aad1ea1d11b8bc6c1909e5d4f88 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 13 Sep 2022 01:38:17 -0400
Subject: [PATCH 034/124] Disabled debugging code

---
 dandi/files/bids.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index cb1ff8ce3..1a174720c 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -81,13 +81,7 @@ def _validate(self) -> None:
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
-                        try:
-                            assert result.metadata
-                        except AssertionError:
-                            print(result)
-                            print(result)
-                            print(result.path)
-                            print(result.metadata)
+                        assert result.metadata
                         self._asset_metadata[bids_path] = prepare_metadata(
                             result.metadata
                         )

From bf5c0f169834f3f0115d1530e2c177e485743b8d Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 14 Sep 2022 05:12:29 -0400
Subject: [PATCH 035/124] Debug assertion error

---
 dandi/files/bids.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 1a174720c..66cfa5c43 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -10,11 +10,14 @@
 
 from dandischema.models import BareAsset
 
+from . import get_logger
 from .bases import GenericAsset, LocalFileAsset, NWBAsset
 from .zarr import ZarrAsset
 from ..metadata import add_common_metadata, prepare_metadata
 from ..misctypes import Digest
 
+lgr = get_logger()
+
 BIDS_ASSET_ERRORS = [
     "BIDS.NON_BIDS_PATH_PLACEHOLDER",
 ]
@@ -81,7 +84,11 @@ def _validate(self) -> None:
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
-                        assert result.metadata
+                        try:
+                            assert result.metadata
+                        except AssertionError:
+                            lgr.warning("The path is %s", result.path)
+                            lgr.warning("The metadata is %s", result.metadata)
                         self._asset_metadata[bids_path] = prepare_metadata(
                             result.metadata
                         )

From 697b24d263dc915b782feda9ad77f47322613686 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 14 Sep 2022 05:34:57 -0400
Subject: [PATCH 036/124] Ensuring that metadata is non-empty

---
 dandi/files/bids.py | 9 +--------
 dandi/validate.py   | 5 +++++
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 66cfa5c43..1a174720c 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -10,14 +10,11 @@
 
 from dandischema.models import BareAsset
 
-from . import get_logger
 from .bases import GenericAsset, LocalFileAsset, NWBAsset
 from .zarr import ZarrAsset
 from ..metadata import add_common_metadata, prepare_metadata
 from ..misctypes import Digest
 
-lgr = get_logger()
-
 BIDS_ASSET_ERRORS = [
     "BIDS.NON_BIDS_PATH_PLACEHOLDER",
 ]
@@ -84,11 +81,7 @@ def _validate(self) -> None:
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
-                        try:
-                            assert result.metadata
-                        except AssertionError:
-                            lgr.warning("The path is %s", result.path)
-                            lgr.warning("The metadata is %s", result.metadata)
+                        assert result.metadata
                         self._asset_metadata[bids_path] = prepare_metadata(
                             result.metadata
                         )
diff --git a/dandi/validate.py b/dandi/validate.py
index f045a1a3f..043dddeae 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -143,6 +143,11 @@ def validate_bids(
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
+        # Top level files do not have any other metadata other than path.
+        if not meta:
+            meta = {
+                "subject": None,
+            }
         our_validation_result.append(
             ValidationResult(
                 origin=origin,

From bb52d56f329265549c4e844a04f450ff95adb0b0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 14 Sep 2022 18:11:40 -0400
Subject: [PATCH 037/124] Making example invalid dataset truly invalid

---
 dandi/tests/fixtures.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 437d103a4..b0a4c0b8d 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -505,10 +505,12 @@ def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDan
 def bids_dandiset_invalid(
     new_dandiset: SampleDandiset, bids_error_examples: str
 ) -> SampleDandiset:
+    dataset_path = new_dandiset.dspath
     copytree(
         os.path.join(bids_error_examples, "invalid_pet001"),
-        str(new_dandiset.dspath) + "/",
+        str(dataset_path) + "/",
     )
+    os.remove(os.path.join(dataset_path, "README"))
     return new_dandiset
 
 

From ee9492ae6e2d3ba03d4d141e697b436895f890f6 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 14 Sep 2022 19:47:50 -0400
Subject: [PATCH 038/124] Windows debugging

---
 dandi/tests/fixtures.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index b0a4c0b8d..522400328 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -228,7 +228,10 @@ def fixture() -> Iterator[str]:
             try:
                 shutil.rmtree(path)
             except BaseException as exc:
-                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("1Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("2Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("3Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("4Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 

From c828628a68a823c7d3058fcc357d925057a9281b Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 14 Sep 2022 21:55:45 -0400
Subject: [PATCH 039/124] Further winerror 5 debugging

---
 dandi/tests/fixtures.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 522400328..ea133af5a 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -228,10 +228,10 @@ def fixture() -> Iterator[str]:
             try:
                 shutil.rmtree(path)
             except BaseException as exc:
-                lgr.warning("1Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("2Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("3Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("4Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("1 Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("2 Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("3 Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("4 Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 
@@ -241,7 +241,9 @@ def get_filtered_gitrepo_fixture(
     whitelist: List[str],
 ) -> Callable[[], Iterator[str]]:
     @pytest.fixture(scope="session")
-    def fixture():
+    def fixture() -> Iterator[str]:
+        skipif.no_network()
+        skipif.no_git()
         try:
             with tempfile.TemporaryDirectory() as path:
                 lgr.debug("Cloning %r into %r", url, path)
@@ -268,8 +270,9 @@ def fixture():
         finally:
             try:
                 shutil.rmtree(path)
-            except BaseException as exc:
-                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
+            except PermissionError as exc:
+                lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 

From 2f1c6a7c60ffc87ed50dd908c6d860185b595dfb Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 15 Sep 2022 04:39:35 -0400
Subject: [PATCH 040/124] Using base exception instead of specific one

---
 dandi/tests/fixtures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index ea133af5a..45735a863 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -270,7 +270,7 @@ def fixture() -> Iterator[str]:
         finally:
             try:
                 shutil.rmtree(path)
-            except PermissionError as exc:
+            except BaseException as exc:
                 lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc)
                 lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc)
 

From 155e557e218d9fde3006ac65f4d6ef0edeec3d8a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 15 Sep 2022 16:18:09 -0400
Subject: [PATCH 041/124] Not using auto-cleaning temp dir via while

---
 dandi/tests/fixtures.py | 46 +++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 45735a863..c4e497e7b 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -244,29 +244,31 @@ def get_filtered_gitrepo_fixture(
     def fixture() -> Iterator[str]:
         skipif.no_network()
         skipif.no_git()
+        path = tempfile.mktemp()  # not using pytest's tmpdir fixture to not
+        # collide in different scopes etc. But we
+        # would need to remove it ourselves
         try:
-            with tempfile.TemporaryDirectory() as path:
-                lgr.debug("Cloning %r into %r", url, path)
-                runout = run(
-                    [
-                        "git",
-                        "clone",
-                        "--depth=1",
-                        "--filter=blob:none",
-                        "--sparse",
-                        url,
-                        path,
-                    ],
-                    capture_output=True,
-                )
-                if runout.returncode:
-                    raise RuntimeError(f"Failed to clone {url} into {path}")
-                # cwd specification is VERY important, not only to achieve the correct
-                # effects, but also to avoid dropping files from your repository if you
-                # were to run `git sparse-checkout` inside the software repo.
-                _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
-                _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
-                yield path
+            lgr.debug("Cloning %r into %r", url, path)
+            runout = run(
+                [
+                    "git",
+                    "clone",
+                    "--depth=1",
+                    "--filter=blob:none",
+                    "--sparse",
+                    url,
+                    path,
+                ],
+                capture_output=True,
+            )
+            if runout.returncode:
+                raise RuntimeError(f"Failed to clone {url} into {path}")
+            # cwd specification is VERY important, not only to achieve the correct
+            # effects, but also to avoid dropping files from your repository if you
+            # were to run `git sparse-checkout` inside the software repo.
+            _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
+            _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
+            yield path
         finally:
             try:
                 shutil.rmtree(path)

From f30ad9935aceda9a38a30416ab5d719c2fb43768 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 00:20:55 -0400
Subject: [PATCH 042/124] Removed debugging code

---
 dandi/tests/fixtures.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index c4e497e7b..84b0bce8b 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -228,10 +228,7 @@ def fixture() -> Iterator[str]:
             try:
                 shutil.rmtree(path)
             except BaseException as exc:
-                lgr.warning("1 Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("2 Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("3 Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("4 Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 
@@ -273,8 +270,7 @@ def fixture() -> Iterator[str]:
             try:
                 shutil.rmtree(path)
             except BaseException as exc:
-                lgr.warning("AA Failed to remove %s - using Windows?: %s", path, exc)
-                lgr.warning("BB Failed to remove %s - using Windows?: %s", path, exc)
+                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
 

From 676cbc3845443519a2f92d5ab57230566de9a9e0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 00:21:48 -0400
Subject: [PATCH 043/124] More verbose error printing

---
 dandi/cli/cmd_validate.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 92c4b3d3b..4f3c6b772 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -48,15 +48,22 @@ def validate_bids(
     )
 
     for i in validator_result:
+        if i.path:
+            scope = i.path
+        elif i.path_regex:
+            scope = i.path_regex
+        else:
+            scope = i.dataset_path
+        echo_string = f"[{i.id}] {scope}: {i.message}"
         if i.severity == Severity.ERROR:
             click.secho(
-                i.message,
+                echo_string,
                 bold=True,
                 fg="red",
             )
         if i.severity == Severity.WARNING:
             click.secho(
-                i.message,
+                echo_string,
                 bold=True,
                 fg="orange",
             )

From 9a8861bc5d60a2d2ad73cf331a0dd54afe541a00 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 00:32:57 -0400
Subject: [PATCH 044/124] Using entire valid dataset whitelist for testing

---
 dandi/tests/fixtures.py      |  2 +-
 dandi/tests/test_validate.py | 58 +++++++++++++++++++-----------------
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 84b0bce8b..96e1bd597 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -277,7 +277,7 @@ def fixture() -> Iterator[str]:
 
 nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data")
 bids_examples = get_filtered_gitrepo_fixture(
-    "https://github.com/dandi/bids-examples",
+    "https://github.com/bids-standard/bids-examples",
     BIDS_TESTDATA_SELECTION,
 )
 bids_error_examples = get_gitrepo_fixture(
diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index e445ea350..41aef93a4 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -1,40 +1,42 @@
-from glob import glob
+# from glob import glob
 import os
 
-import appdirs
+# import appdirs
 import pytest
 
+from .fixtures import BIDS_TESTDATA_SELECTION
 
-def test_validate_bids(bids_examples, tmp_path):
-    from ..validate import validate_bids
 
-    # TEST DEFAULT REPORT PATH:
-    # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we
-    # implement light data cloning analogous to (`[1]` because `[0]` is .git):
-    # https://github.com/bids-standard/bids-specification/pull/1143
-    selected_dataset = os.path.join(bids_examples, "asl003")
-    _ = validate_bids(selected_dataset, report=True)
+@pytest.mark.parametrize("dataset", BIDS_TESTDATA_SELECTION)
+def test_validate_bids(bids_examples, tmp_path, dataset):
+    from ..validate import validate_bids
 
-    # Check if a report is being produced.
-    pid = os.getpid()
-    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
-    # appdirs.user_log_dir("dandi-cli")
-    report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
-    assert len(glob(report_expression)) == 1
+    selected_dataset = os.path.join(bids_examples, dataset)
+    validation_result = validate_bids(selected_dataset, report=True)
+    for i in validation_result:
+        assert not hasattr(i, "severtiy")
 
-    # TEST CISTOM REPORT PATH:
-    # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we
-    # implement light data cloning analogous to (`[1]` because `[0]` is .git):
-    # https://github.com/bids-standard/bids-specification/pull/1143
-    report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
-    selected_dataset = os.path.join(bids_examples, "asl003")
-    _ = validate_bids(
-        selected_dataset,
-        report_path=report_path,
-    )
 
-    # Check if a report is being produced.
-    assert len(glob(report_path)) == 1
+#    # Check if a report is being produced.
+#    pid = os.getpid()
+#    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
+#    # appdirs.user_log_dir("dandi-cli")
+#    report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
+#    assert len(glob(report_expression)) == 1
+#
+#    # TEST CUSTOM REPORT PATH:
+#    # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we
+#    # implement light data cloning analogous to (`[1]` because `[0]` is .git):
+#    # https://github.com/bids-standard/bids-specification/pull/1143
+#    report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
+#    selected_dataset = os.path.join(bids_examples, "asl003")
+#    _ = validate_bids(
+#        selected_dataset,
+#        report_path=report_path,
+#    )
+#
+#    # Check if a report is being produced.
+#    assert len(glob(report_path)) == 1
 
 
 @pytest.mark.parametrize(

From 74cc79737ec05f7fc698fd3537df49efc95d00f0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 00:36:16 -0400
Subject: [PATCH 045/124] Log creation test using whitelist instead of
 hard-coded dataset

---
 dandi/tests/test_validate.py | 42 +++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 41aef93a4..4d6280981 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -1,7 +1,7 @@
-# from glob import glob
+from glob import glob
 import os
 
-# import appdirs
+import appdirs
 import pytest
 
 from .fixtures import BIDS_TESTDATA_SELECTION
@@ -17,26 +17,24 @@ def test_validate_bids(bids_examples, tmp_path, dataset):
         assert not hasattr(i, "severtiy")
 
 
-#    # Check if a report is being produced.
-#    pid = os.getpid()
-#    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
-#    # appdirs.user_log_dir("dandi-cli")
-#    report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
-#    assert len(glob(report_expression)) == 1
-#
-#    # TEST CUSTOM REPORT PATH:
-#    # Replace explicit dataset with `os.listdir(bids_examples)[1]` whenever we
-#    # implement light data cloning analogous to (`[1]` because `[0]` is .git):
-#    # https://github.com/bids-standard/bids-specification/pull/1143
-#    report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
-#    selected_dataset = os.path.join(bids_examples, "asl003")
-#    _ = validate_bids(
-#        selected_dataset,
-#        report_path=report_path,
-#    )
-#
-#    # Check if a report is being produced.
-#    assert len(glob(report_path)) == 1
+def test_report_path(bids_examples, tmp_path):
+    from ..validate import validate_bids
+
+    pid = os.getpid()
+    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
+    # appdirs.user_log_dir("dandi-cli")
+    report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
+    assert len(glob(report_expression)) == 1
+
+    report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
+    selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0])
+    _ = validate_bids(
+        selected_dataset,
+        report_path=report_path,
+    )
+
+    # Check if a report is being produced.
+    assert len(glob(report_path)) == 1
 
 
 @pytest.mark.parametrize(

From db6a81fb22a154680d37d61a3d0c0565976fd17c Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 00:40:58 -0400
Subject: [PATCH 046/124] Fixed cmd_validate tests

---
 dandi/cli/tests/test_cmd_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index fd448f6c2..173edba93 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -14,4 +14,4 @@ def test_validate_bids_error(bids_error_examples):
     assert r.exit_code == 1
 
     # Does it report the issue correctly?
-    assert r.output == expected_error
+    assert expected_error in r.output

From 715f70e604abf3d7e830dffc9e2348c497cb6ddc Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 15:56:08 -0400
Subject: [PATCH 047/124] Fixed report path test

---
 dandi/tests/test_validate.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 4d6280981..84ed73e11 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -1,7 +1,7 @@
 from glob import glob
 import os
 
-import appdirs
+# import appdirs
 import pytest
 
 from .fixtures import BIDS_TESTDATA_SELECTION
@@ -20,11 +20,10 @@ def test_validate_bids(bids_examples, tmp_path, dataset):
 def test_report_path(bids_examples, tmp_path):
     from ..validate import validate_bids
 
-    pid = os.getpid()
-    log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
-    # appdirs.user_log_dir("dandi-cli")
-    report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
-    assert len(glob(report_expression)) == 1
+    # pid = os.getpid()
+    # log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
+    # report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
+    # assert len(glob(report_expression)) == 1
 
     report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
     selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0])

From fb5847c5d5146cebd17c0b230751aa9237639861 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 16 Sep 2022 17:11:49 -0400
Subject: [PATCH 048/124] Testing the command line for all error cases

---
 dandi/cli/tests/test_cmd_validate.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 173edba93..eda2376fc 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -1,17 +1,26 @@
 import os
 
 from click.testing import CliRunner
+import pytest
 
 
-def test_validate_bids_error(bids_error_examples):
+@pytest.mark.parametrize(
+    "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"]
+)
+def test_validate_bids_error(bids_error_examples, dataset):
+    import json
+
     from ..cmd_validate import validate_bids
 
-    expected_error = "File does not match any pattern known to BIDS.\n"
+    # expected_error = "File does not match any pattern known to BIDS.\n"
 
-    broken_dataset = os.path.join(bids_error_examples, "invalid_pet001")
+    broken_dataset = os.path.join(bids_error_examples, dataset)
+    with open(os.path.join(broken_dataset, ".ERRORS.json")) as f:
+        expected_errors = json.load(f)
     r = CliRunner().invoke(validate_bids, [broken_dataset])
     # Does it break?
     assert r.exit_code == 1
 
-    # Does it report the issue correctly?
-    assert expected_error in r.output
+    # Does it detect all errors?
+    for key in expected_errors:
+        assert key in r.output

From e0027c3088cf95e08445ab5a97a81e45f59c3f24 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Sat, 17 Sep 2022 01:10:20 -0400
Subject: [PATCH 049/124] Removed deprecated comments

---
 dandi/cli/cmd_validate.py            | 1 -
 dandi/cli/tests/test_cmd_validate.py | 2 --
 dandi/tests/test_validate.py         | 5 -----
 3 files changed, 8 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 4f3c6b772..089644c70 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -36,7 +36,6 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    # from ..bids_utils import print_validation_results
     from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
 
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index eda2376fc..095d21907 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -12,8 +12,6 @@ def test_validate_bids_error(bids_error_examples, dataset):
 
     from ..cmd_validate import validate_bids
 
-    # expected_error = "File does not match any pattern known to BIDS.\n"
-
     broken_dataset = os.path.join(bids_error_examples, dataset)
     with open(os.path.join(broken_dataset, ".ERRORS.json")) as f:
         expected_errors = json.load(f)
diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 84ed73e11..111196975 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -20,11 +20,6 @@ def test_validate_bids(bids_examples, tmp_path, dataset):
 def test_report_path(bids_examples, tmp_path):
     from ..validate import validate_bids
 
-    # pid = os.getpid()
-    # log_dir = appdirs.user_log_dir("dandi-cli", "dandi")
-    # report_expression = os.path.join(log_dir, f"bids-validator-report_*-{pid}.log")
-    # assert len(glob(report_expression)) == 1
-
     report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
     selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0])
     _ = validate_bids(

From 453cd65c31e6c7bad63fd5b3623d39e7a9a5ec19 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Sat, 17 Sep 2022 01:13:06 -0400
Subject: [PATCH 050/124] Populating dandi and bids dataset paths

---
 dandi/validate.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 043dddeae..3d9841dfb 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -143,7 +143,10 @@ def validate_bids(
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
-        # Top level files do not have any other metadata other than path.
+        dataset_path = _get_set_path(file_path, "dataset_description.json")
+        dandiset_path = _get_set_path(file_path, "dandiset.yaml")
+        # Top level files do not have any other metadata other than path,
+        # which we pop and put in the object...
         if not meta:
             meta = {
                 "subject": None,
@@ -155,6 +158,8 @@ def validate_bids(
                 scope=Scope.FILE,
                 path=Path(file_path),
                 metadata=meta,
+                dataset_path=dataset_path,
+                dandiset_path=dandiset_path,
             )
         )
 

From 786729acc5b1288b7625dac108a125a89c675637 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 19 Sep 2022 14:27:28 -0400
Subject: [PATCH 051/124] Simple fixes suggested by jwodder

---
 dandi/files/bids.py          |  8 ++------
 dandi/tests/fixtures.py      |  4 ++--
 dandi/tests/test_validate.py | 10 +++-------
 dandi/validate.py            |  8 +++-----
 4 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 1a174720c..3827ea9cc 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -15,12 +15,8 @@
 from ..metadata import add_common_metadata, prepare_metadata
 from ..misctypes import Digest
 
-BIDS_ASSET_ERRORS = [
-    "BIDS.NON_BIDS_PATH_PLACEHOLDER",
-]
-BIDS_DATASET_ERRORS = [
-    "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
-]
+BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",)
+BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",)
 
 
 @dataclass
diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 96e1bd597..ba2037e88 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -263,8 +263,8 @@ def fixture() -> Iterator[str]:
             # cwd specification is VERY important, not only to achieve the correct
             # effects, but also to avoid dropping files from your repository if you
             # were to run `git sparse-checkout` inside the software repo.
-            _ = run(["git", "sparse-checkout", "init", "--cone"], cwd=path)
-            _ = run(["git", "sparse-checkout", "set"] + whitelist, cwd=path)
+            run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True)
+            run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True)
             yield path
         finally:
             try:
diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py
index 111196975..d2fc6d8d4 100644
--- a/dandi/tests/test_validate.py
+++ b/dandi/tests/test_validate.py
@@ -1,15 +1,16 @@
 from glob import glob
+import json
 import os
+import pathlib
 
-# import appdirs
 import pytest
 
 from .fixtures import BIDS_TESTDATA_SELECTION
+from ..validate import validate_bids
 
 
 @pytest.mark.parametrize("dataset", BIDS_TESTDATA_SELECTION)
 def test_validate_bids(bids_examples, tmp_path, dataset):
-    from ..validate import validate_bids
 
     selected_dataset = os.path.join(bids_examples, dataset)
     validation_result = validate_bids(selected_dataset, report=True)
@@ -18,7 +19,6 @@ def test_validate_bids(bids_examples, tmp_path, dataset):
 
 
 def test_report_path(bids_examples, tmp_path):
-    from ..validate import validate_bids
 
     report_path = os.path.join(tmp_path, "inplace_bids-validator-report.log")
     selected_dataset = os.path.join(bids_examples, BIDS_TESTDATA_SELECTION[0])
@@ -37,10 +37,6 @@ def test_report_path(bids_examples, tmp_path):
 def test_validate_bids_errors(bids_error_examples, dataset):
     # This only checks that the error we found is correct, not that we found all errors.
     # ideally make a list and erode etc.
-    import json
-    import pathlib
-
-    from ..validate import validate_bids
 
     selected_dataset = os.path.join(bids_error_examples, dataset)
     validation_result = validate_bids(selected_dataset, report=True)
diff --git a/dandi/validate.py b/dandi/validate.py
index 3d9841dfb..65b0ca272 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -23,10 +23,10 @@ class ValidationResult:
     asset_paths: Optional[list[str]] = None
     dandiset_path: Optional[Path] = None
     dataset_path: Optional[Path] = None
-    message: str = ""
+    message: Optional[str] = None
     metadata: Optional[dict] = None
     path: Optional[Path] = None
-    path_regex: Optional[str] = ""
+    path_regex: Optional[str] = None
     severity: Optional[Severity] = None
 
 
@@ -124,9 +124,7 @@ def validate_bids(
 
     for pattern in validation_result["schema_tracking"]:
         # Future proofing for standard-compliant name.
-        if ("mandatory" in pattern and pattern["mandatory"]) or (
-            "required" in pattern and pattern["required"]
-        ):
+        if pattern.get("mandatory") or pattern.get("required"):
             # We don't have a path for this so we'll need some external logic to make sure
             # that the dataset path is populated.
             # dataset_path = _get_dataset_path(path, paths)

From d0070bd6b3b54b63390129475a0a206b7221e466 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 20 Sep 2022 08:42:06 -0400
Subject: [PATCH 052/124] Fixed typing

---
 dandi/files/bids.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 3827ea9cc..39105ddf3 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -71,8 +71,10 @@ def _validate(self) -> None:
                 for result in results:
                     if result.id in BIDS_ASSET_ERRORS:
                         assert result.path
+                        assert result.message
                         self._asset_errors[str(result.path)].append(result.message)
                     elif result.id in BIDS_DATASET_ERRORS:
+                        assert result.message
                         self._dataset_errors.append(result.message)
                     elif result.id == "BIDS.MATCH":
                         assert result.path

From b205aafd0820c48202020274d11370372626175b Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 21 Sep 2022 11:41:42 -0400
Subject: [PATCH 053/124] Correcting type

---
 dandi/tests/fixtures.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index ba2037e88..ec803e44a 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -234,14 +234,15 @@ def fixture() -> Iterator[str]:
 
 
 def get_filtered_gitrepo_fixture(
+    tmp_path,
     url: str,
     whitelist: List[str],
 ) -> Callable[[], Iterator[str]]:
     @pytest.fixture(scope="session")
-    def fixture() -> Iterator[str]:
+    def fixture() -> Iterator[Path]:
         skipif.no_network()
         skipif.no_git()
-        path = tempfile.mktemp()  # not using pytest's tmpdir fixture to not
+        path = tmp_path  # not using pytest's tmpdir fixture to not
         # collide in different scopes etc. But we
         # would need to remove it ourselves
         try:
@@ -269,7 +270,7 @@ def fixture() -> Iterator[str]:
         finally:
             try:
                 shutil.rmtree(path)
-            except BaseException as exc:
+            except Exception as exc:
                 lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
 
     return fixture
@@ -277,8 +278,8 @@ def fixture() -> Iterator[str]:
 
 nwb_test_data = get_gitrepo_fixture("http://github.com/dandi-datasets/nwb_test_data")
 bids_examples = get_filtered_gitrepo_fixture(
-    "https://github.com/bids-standard/bids-examples",
-    BIDS_TESTDATA_SELECTION,
+    url="https://github.com/bids-standard/bids-examples",
+    whitelist=BIDS_TESTDATA_SELECTION,
 )
 bids_error_examples = get_gitrepo_fixture(
     "https://github.com/bids-standard/bids-error-examples"

From d04f4bda403dbc0fb9774c7a721536c5f63a9380 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 20 Sep 2022 14:16:34 -0400
Subject: [PATCH 054/124] Using pytest temp path generation

---
 dandi/tests/fixtures.py | 56 ++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index ec803e44a..42715df7d 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -239,39 +239,33 @@ def get_filtered_gitrepo_fixture(
     whitelist: List[str],
 ) -> Callable[[], Iterator[str]]:
     @pytest.fixture(scope="session")
-    def fixture() -> Iterator[Path]:
+    def fixture(
+        tmp_path_factory: pytest.TempPathFactory,
+    ) -> Iterator[str]:
         skipif.no_network()
         skipif.no_git()
-        path = tmp_path  # not using pytest's tmpdir fixture to not
-        # collide in different scopes etc. But we
-        # would need to remove it ourselves
-        try:
-            lgr.debug("Cloning %r into %r", url, path)
-            runout = run(
-                [
-                    "git",
-                    "clone",
-                    "--depth=1",
-                    "--filter=blob:none",
-                    "--sparse",
-                    url,
-                    path,
-                ],
-                capture_output=True,
-            )
-            if runout.returncode:
-                raise RuntimeError(f"Failed to clone {url} into {path}")
-            # cwd specification is VERY important, not only to achieve the correct
-            # effects, but also to avoid dropping files from your repository if you
-            # were to run `git sparse-checkout` inside the software repo.
-            run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True)
-            run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True)
-            yield path
-        finally:
-            try:
-                shutil.rmtree(path)
-            except Exception as exc:
-                lgr.warning("Failed to remove %s - using Windows?: %s", path, exc)
+        path = tmp_path_factory.mktemp("gitrepo")
+        lgr.debug("Cloning %r into %r", url, path)
+        runout = run(
+            [
+                "git",
+                "clone",
+                "--depth=1",
+                "--filter=blob:none",
+                "--sparse",
+                url,
+                str(path),
+            ],
+            capture_output=True,
+        )
+        if runout.returncode:
+            raise RuntimeError(f"Failed to clone {url} into {path}")
+        # cwd specification is VERY important, not only to achieve the correct
+        # effects, but also to avoid dropping files from your repository if you
+        # were to run `git sparse-checkout` inside the software repo.
+        run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True)
+        run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True)
+        yield path
 
     return fixture
 

From 7d21f4ed8990f045813a02f9eeab0e86c5ac1da0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 21 Sep 2022 11:51:03 -0400
Subject: [PATCH 055/124] Corrected function parameters

---
 dandi/tests/fixtures.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 42715df7d..bc000ba64 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -234,10 +234,9 @@ def fixture() -> Iterator[str]:
 
 
 def get_filtered_gitrepo_fixture(
-    tmp_path,
     url: str,
     whitelist: List[str],
-) -> Callable[[], Iterator[str]]:
+) -> Callable[[pytest.TempPathFactory], Iterator[str]]:
     @pytest.fixture(scope="session")
     def fixture(
         tmp_path_factory: pytest.TempPathFactory,

From 4ba4b7b0905379dc61935155735a710ee40c1dae Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 16:57:09 -0400
Subject: [PATCH 056/124] More parsimonious error reporting

---
 dandi/cli/cmd_validate.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 089644c70..b0b24f401 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -54,17 +54,16 @@ def validate_bids(
         else:
             scope = i.dataset_path
         echo_string = f"[{i.id}] {scope}: {i.message}"
+        i_fg = ""
         if i.severity == Severity.ERROR:
+            i_fg = "red"
+        elif i.severity == Severity.WARNING:
+            i_fg = "orange"
+        if i_fg:
             click.secho(
                 echo_string,
                 bold=True,
-                fg="red",
-            )
-        if i.severity == Severity.WARNING:
-            click.secho(
-                echo_string,
-                bold=True,
-                fg="orange",
+                fg=i_fg,
             )
 
     validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]

From 23e54be748da0fbf4135d8b63e868b90cf66e99e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 16:58:52 -0400
Subject: [PATCH 057/124] Global import

---
 dandi/validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 65b0ca272..6655b8e07 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -6,6 +6,7 @@
 from typing import Iterator, List, Optional, Tuple, Union
 
 import appdirs
+import os
 
 from .files import find_dandi_files
 
@@ -174,7 +175,6 @@ def _get_set_path(in_path, marker):
     marker : str, optional
         Filename marker which identifies the set root.
     """
-    import os
 
     candidate = os.path.join(in_path, marker)
     # Windows support... otherwise we could do `if in_path == "/"`.

From 28e7374c1a083b73e49751ffd28fe1d3bbd8fbfd Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 17:04:48 -0400
Subject: [PATCH 058/124] Using built-in find_parent_directory_containing
 function

---
 dandi/validate.py | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 6655b8e07..0df8f0533 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -9,6 +9,7 @@
 import os
 
 from .files import find_dandi_files
+from .utils import find_parent_directory_containing
 
 BIDS_TO_DANDI = {
     "subject": "subject_id",
@@ -108,8 +109,8 @@ def validate_bids(
         # https://github.com/bids-standard/bids-specification/issues/1272
         if path.endswith((".ERRORS", ".ERRORS.json")):
             continue
-        dataset_path = _get_set_path(path, "dataset_description.json")
-        dandiset_path = _get_set_path(path, "dandiset.yaml")
+        dataset_path = find_parent_directory_containing("dataset_description.json", path)
+        dandiset_path = find_parent_directory_containing("dandiset.yaml", path)
         our_validation_result.append(
             ValidationResult(
                 origin=origin,
@@ -128,7 +129,7 @@ def validate_bids(
         if pattern.get("mandatory") or pattern.get("required"):
             # We don't have a path for this so we'll need some external logic to make sure
             # that the dataset path is populated.
-            # dataset_path = _get_dataset_path(path, paths)
+            # dataset_path = find_parent_directory_containing(paths, path)
             our_validation_result.append(
                 ValidationResult(
                     origin=origin,
@@ -142,8 +143,8 @@ def validate_bids(
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
-        dataset_path = _get_set_path(file_path, "dataset_description.json")
-        dandiset_path = _get_set_path(file_path, "dandiset.yaml")
+        dataset_path = find_parent_directory_containing("dataset_description.json", file_path)
+        dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path)
         # Top level files do not have any other metadata other than path,
         # which we pop and put in the object...
         if not meta:
@@ -165,28 +166,6 @@ def validate_bids(
     return our_validation_result
 
 
-def _get_set_path(in_path, marker):
-    """Get the path to the root of a file set (e.g. BIDS dataset or DANDIset).
-
-    Parameters
-    ----------
-    in_path : str, optional
-        File for which to determine set path.
-    marker : str, optional
-        Filename marker which identifies the set root.
-    """
-
-    candidate = os.path.join(in_path, marker)
-    # Windows support... otherwise we could do `if in_path == "/"`.
-    if in_path == "/" or not any(i in in_path for i in ["/", "\\"]):
-        return None
-    if os.path.isfile(candidate):
-        return os.path.dirname(candidate)
-    else:
-        level_up = os.path.dirname(in_path.rstrip("/\\"))
-        return _get_set_path(level_up, marker)
-
-
 def validate(
     *paths: str,
     schema_version: Optional[str] = None,

From 3702eacb74ef222d926a8ef029f9d411e017a0d0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 17:07:19 -0400
Subject: [PATCH 059/124] Dropped unused import

---
 dandi/validate.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 0df8f0533..d4f369206 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -6,7 +6,6 @@
 from typing import Iterator, List, Optional, Tuple, Union
 
 import appdirs
-import os
 
 from .files import find_dandi_files
 from .utils import find_parent_directory_containing
@@ -147,10 +146,6 @@ def validate_bids(
         dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path)
         # Top level files do not have any other metadata other than path,
         # which we pop and put in the object...
-        if not meta:
-            meta = {
-                "subject": None,
-            }
         our_validation_result.append(
             ValidationResult(
                 origin=origin,

From 2d9723ccc0f0ba5df789b14c8d5a75822615458a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 17:37:12 -0400
Subject: [PATCH 060/124] Re-added placeholder metadata to satisfy assert

---
 dandi/validate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dandi/validate.py b/dandi/validate.py
index d4f369206..819e9d02a 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -146,6 +146,10 @@ def validate_bids(
         dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path)
         # Top level files do not have any other metadata other than path,
         # which we pop and put in the object...
+        if not meta:
+            meta = {
+                "subject": None,
+            }
         our_validation_result.append(
             ValidationResult(
                 origin=origin,

From 61e5d3a22199e46ff059e9973b2743684dc7b4c5 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 18:20:05 -0400
Subject: [PATCH 061/124] No longer recomputing dandi/data-set path for files
 in same directory

---
 dandi/validate.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 819e9d02a..4beffdeb0 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -6,6 +6,7 @@
 from typing import Iterator, List, Optional, Tuple, Union
 
 import appdirs
+import os
 
 from .files import find_dandi_files
 from .utils import find_parent_directory_containing
@@ -103,13 +104,17 @@ def validate_bids(
         version=bidsschematools.__version__,
     )
 
+    # Storing variable to not re-compute set paths for each individual file.
+    parent_path = False
     for path in validation_result["path_tracking"]:
         # Hard-coding exclusion here pending feature + release in:
         # https://github.com/bids-standard/bids-specification/issues/1272
         if path.endswith((".ERRORS", ".ERRORS.json")):
             continue
-        dataset_path = find_parent_directory_containing("dataset_description.json", path)
-        dandiset_path = find_parent_directory_containing("dandiset.yaml", path)
+        if parent_path != os.path.dirname(path):
+            parent_path = os.path.dirname(path)
+            dataset_path = find_parent_directory_containing("dataset_description.json", parent_path)
+            dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path)
         our_validation_result.append(
             ValidationResult(
                 origin=origin,
@@ -139,9 +144,16 @@ def validate_bids(
                     message="BIDS-required file is not present.",
                 )
             )
+
+    # Storing variable to not re-compute set paths for each individual file.
+    parent_path = False
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
+        if parent_path != os.path.dirname(file_path):
+            parent_path = os.path.dirname(file_path)
+            dataset_path = find_parent_directory_containing("dataset_description.json", parent_path)
+            dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path)
         dataset_path = find_parent_directory_containing("dataset_description.json", file_path)
         dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path)
         # Top level files do not have any other metadata other than path,

From 17113543e3d21d9d95f93dcf6c07373a06aef9f0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 26 Sep 2022 19:43:47 -0400
Subject: [PATCH 062/124] Trying to make sure typechecking understands logic

---
 dandi/validate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 4beffdeb0..e060b5cc8 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -105,7 +105,7 @@ def validate_bids(
     )
 
     # Storing variable to not re-compute set paths for each individual file.
-    parent_path = False
+    parent_path = None
     for path in validation_result["path_tracking"]:
         # Hard-coding exclusion here pending feature + release in:
         # https://github.com/bids-standard/bids-specification/issues/1272
@@ -146,7 +146,7 @@ def validate_bids(
             )
 
     # Storing variable to not re-compute set paths for each individual file.
-    parent_path = False
+    parent_path = None
     for meta in validation_result["match_listing"]:
         file_path = meta.pop("path")
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}

From 122581ad47e3fbf22747b7773ea172a940484464 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 27 Sep 2022 14:50:38 -0400
Subject: [PATCH 063/124] Removed redundant code

and black fixes
---
 dandi/validate.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index e060b5cc8..6d61a61aa 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -2,11 +2,11 @@
 
 from dataclasses import dataclass
 from enum import Enum
+import os
 from pathlib import Path
 from typing import Iterator, List, Optional, Tuple, Union
 
 import appdirs
-import os
 
 from .files import find_dandi_files
 from .utils import find_parent_directory_containing
@@ -113,8 +113,12 @@ def validate_bids(
             continue
         if parent_path != os.path.dirname(path):
             parent_path = os.path.dirname(path)
-            dataset_path = find_parent_directory_containing("dataset_description.json", parent_path)
-            dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path)
+            dataset_path = find_parent_directory_containing(
+                "dataset_description.json", parent_path
+            )
+            dandiset_path = find_parent_directory_containing(
+                "dandiset.yaml", parent_path
+            )
         our_validation_result.append(
             ValidationResult(
                 origin=origin,
@@ -152,10 +156,12 @@ def validate_bids(
         meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI}
         if parent_path != os.path.dirname(file_path):
             parent_path = os.path.dirname(file_path)
-            dataset_path = find_parent_directory_containing("dataset_description.json", parent_path)
-            dandiset_path = find_parent_directory_containing("dandiset.yaml", parent_path)
-        dataset_path = find_parent_directory_containing("dataset_description.json", file_path)
-        dandiset_path = find_parent_directory_containing("dandiset.yaml", file_path)
+            dataset_path = find_parent_directory_containing(
+                "dataset_description.json", parent_path
+            )
+            dandiset_path = find_parent_directory_containing(
+                "dandiset.yaml", parent_path
+            )
         # Top level files do not have any other metadata other than path,
         # which we pop and put in the object...
         if not meta:

From e25375bce4ca29c91880372ac42d1dfa091a0395 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 27 Sep 2022 14:53:27 -0400
Subject: [PATCH 064/124] No placeholder metadata

---
 dandi/files/bids.py | 2 +-
 dandi/validate.py   | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 39105ddf3..d7ca279b1 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -79,7 +79,7 @@ def _validate(self) -> None:
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
-                        assert result.metadata
+                        assert result.metadata is not None
                         self._asset_metadata[bids_path] = prepare_metadata(
                             result.metadata
                         )
diff --git a/dandi/validate.py b/dandi/validate.py
index 6d61a61aa..dc8bb5471 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -162,12 +162,6 @@ def validate_bids(
             dandiset_path = find_parent_directory_containing(
                 "dandiset.yaml", parent_path
             )
-        # Top level files do not have any other metadata other than path,
-        # which we pop and put in the object...
-        if not meta:
-            meta = {
-                "subject": None,
-            }
         our_validation_result.append(
             ValidationResult(
                 origin=origin,

From e292718c86ae0cf56ccccc5ebf15c98ab4475e4e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 27 Sep 2022 21:49:44 -0400
Subject: [PATCH 065/124] Unified error reporting across validation types

also removed part of error reporting which was reporting valid files,
let me know if you need it back.
---
 dandi/cli/cmd_validate.py | 43 +++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index b0b24f401..4700fc33b 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -3,7 +3,9 @@
 
 import click
 
-from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
+from .base import devel_debug_option, devel_option, map_to_click_exceptions
+from ..utils import pluralize
+from ..validate import Severity
 
 
 @click.command()
@@ -36,7 +38,6 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
 
     validator_result = validate_bids_(  # Controller
@@ -47,24 +48,15 @@ def validate_bids(
     )
 
     for i in validator_result:
+        if not i.severity:
+            continue
         if i.path:
             scope = i.path
         elif i.path_regex:
             scope = i.path_regex
         else:
             scope = i.dataset_path
-        echo_string = f"[{i.id}] {scope}: {i.message}"
-        i_fg = ""
-        if i.severity == Severity.ERROR:
-            i_fg = "red"
-        elif i.severity == Severity.WARNING:
-            i_fg = "orange"
-        if i_fg:
-            click.secho(
-                echo_string,
-                bold=True,
-                fg=i_fg,
-            )
+        display_errors(scope, [i.id], [i.severity], [i.message])
 
     validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]
 
@@ -155,10 +147,21 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
         )
 
 
-def display_errors(path, errors):
-    if not errors:
-        lgr.info("%s: ok", path)
+def display_errors(scope, errors, severities=[], messages=[]):
+    if Severity.ERROR in severities:
+        fg = "red"
+    elif Severity.WARNING in severities:
+        fg = "orange"
     else:
-        lgr.error("%s: %d error(s)", path, len(errors))
-        for error in errors:
-            lgr.error("  %s", error)
+        fg = "blue"
+    summary = f"{scope}: {pluralize(len(errors), 'issue')} detected."
+    click.secho(summary, bold=True, fg="red")
+    for error, severity, message in zip(errors, severities, messages):
+        if severity == Severity.ERROR:
+            fg = "red"
+        elif severity == Severity.WARNING:
+            fg = "green"
+        else:
+            fg = "blue"
+        error_message = f"  [{error}] {message}"
+        click.secho(error_message, fg=fg)

From c4a36ca2db773cd0451ff944661b4623b43b351f Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 27 Sep 2022 22:28:00 -0400
Subject: [PATCH 066/124] Improve style

---
 dandi/cli/cmd_validate.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 4700fc33b..a90a5fc55 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -108,7 +108,10 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
     ):
         nfiles += 1
         if view == "one-at-a-time":
-            display_errors(path, errors)
+            if errors:
+                display_errors(
+                    path, ["NWBError"] * len(errors), [""] * len(errors), errors
+                )
         all_files_errors[path] = errors
 
     if view == "groupped":
@@ -155,7 +158,7 @@ def display_errors(scope, errors, severities=[], messages=[]):
     else:
         fg = "blue"
     summary = f"{scope}: {pluralize(len(errors), 'issue')} detected."
-    click.secho(summary, bold=True, fg="red")
+    click.secho(summary, fg=fg)
     for error, severity, message in zip(errors, severities, messages):
         if severity == Severity.ERROR:
             fg = "red"

From cf34de38d36ab190a15c56be5019296fe771d339 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 27 Sep 2022 22:52:09 -0400
Subject: [PATCH 067/124] Import dandi.validate.Severity in local scope to
 prevent heavy dep load

---
 dandi/cli/cmd_validate.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index a90a5fc55..e4fa2a255 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -5,7 +5,6 @@
 
 from .base import devel_debug_option, devel_option, map_to_click_exceptions
 from ..utils import pluralize
-from ..validate import Severity
 
 
 @click.command()
@@ -38,6 +37,7 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
+    from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
 
     validator_result = validate_bids_(  # Controller
@@ -151,6 +151,8 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
 
 
 def display_errors(scope, errors, severities=[], messages=[]):
+    from ..validate import Severity
+
     if Severity.ERROR in severities:
         fg = "red"
     elif Severity.WARNING in severities:

From e3636704cf24bb1973b5cec7f5d3c2b896c06d36 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 29 Sep 2022 05:51:25 -0400
Subject: [PATCH 068/124] Unified error reporting logic and draft grouping
 implementation.

---
 dandi/cli/cmd_validate.py | 183 +++++++++++++++++++++++++++-----------
 1 file changed, 133 insertions(+), 50 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index e4fa2a255..ea5d56836 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -21,6 +21,11 @@
     is_flag=True,
     help="Whether to write a report under a unique path in the DANDI log directory.",
 )
+@click.option(
+    "--grouping",
+    "-g",
+    help="Write report under path, this option implies `--report/-r`.",
+)
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @map_to_click_exceptions
 def validate_bids(
@@ -28,9 +33,16 @@ def validate_bids(
     schema,
     report,
     report_path,
+    grouping=None,
 ):
     """Validate BIDS paths.
 
+    Parameters
+    ----------
+
+    grouping : str, optional
+        A string which is either "", "error", or "path" — "error" implemented.
+
     Notes
     -----
     Used from bash, eg:
@@ -47,18 +59,40 @@ def validate_bids(
         schema_version=schema,
     )
 
+    issues = []
     for i in validator_result:
         if not i.severity:
             continue
-        if i.path:
-            scope = i.path
-        elif i.path_regex:
-            scope = i.path_regex
         else:
-            scope = i.dataset_path
-        display_errors(scope, [i.id], [i.severity], [i.message])
+            issues.append(i)
+
+    purviews = [
+        list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
+    ]
+    if not grouping:
+        display_errors(
+            purviews,
+            [i.id for i in issues],
+            [i.severity for i in issues],
+            [i.message for i in issues],
+        )
+    elif grouping == "path":
+        for purview in purviews:
+            applies_to = [
+                i for i in issues if purview in [i.path, i.path_regex, i.dataset_path]
+            ]
+            display_errors(
+                [purview],
+                [i.id for i in applies_to],
+                [i.severity for i in applies_to],
+                [i.message for i in applies_to],
+            )
+    else:
+        raise NotImplementedError(
+            "The `grouping` parameter values currently supported are " "path or None."
+        )
 
-    validation_errors = [e for e in validator_result if e.severity == Severity.ERROR]
+    validation_errors = [i for i in issues if i.severity == Severity.ERROR]
 
     if validation_errors:
         raise SystemExit(1)
@@ -75,10 +109,18 @@ def validate_bids(
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @devel_debug_option()
 @map_to_click_exceptions
-def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
+def validate(
+    paths, schema=None, devel_debug=False, allow_any_path=False, grouping=None
+):
     """Validate files for NWB and DANDI compliance.
 
     Exits with non-0 exit code if any file is not compliant.
+
+    Parameters
+    ----------
+
+    grouping : str, optional
+        A string which is either "", "error", or "path" — "error" not yet implemented.
     """
     from ..pynwb_utils import ignore_benign_pynwb_warnings
     from ..validate import validate as validate_
@@ -96,42 +138,35 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
     # at this point, so we ignore it although ideally there should be a formal
     # way to get relevant warnings (not errors) from PyNWB
     ignore_benign_pynwb_warnings()
-    view = "one-at-a-time"  # TODO: rename, add groupped
 
-    all_files_errors = {}
+    all_files_error_messages = {}
     nfiles = 0
-    for path, errors in validate_(
+    for path, messages in validate_(
         *paths,
         schema_version=schema,
         devel_debug=devel_debug,
         allow_any_path=allow_any_path,
     ):
         nfiles += 1
-        if view == "one-at-a-time":
-            if errors:
-                display_errors(
-                    path, ["NWBError"] * len(errors), [""] * len(errors), errors
-                )
-        all_files_errors[path] = errors
-
-    if view == "groupped":
-        # TODO: Most likely we want to summarize errors across files since they
-        # are likely to be similar
-        # TODO: add our own criteria for validation (i.e. having needed metadata)
-
-        # # can't be done since fails to compare different types of errors
-        # all_errors = sum(errors.values(), [])
-        # all_error_types = []
-        # errors_unique = sorted(set(all_errors))
-        # from collections import Counter
-        # # Let's make it
-        # print(
-        #     "{} unique errors in {} files".format(
-        #     len(errors_unique), len(errors))
-        # )
-        raise NotImplementedError("TODO")
-
-    files_with_errors = [f for f, errors in all_files_errors.items() if errors]
+        if not grouping:
+            error_paths = [path] * len(messages)
+            errors = ["NWBError"] * len(messages)
+            severities = [""] * len(messages)
+            display_errors(error_paths, errors, severities, messages)
+        elif grouping == "path":
+            error_paths = [path]
+            errors = ["NWBError"] * len(messages)
+            severities = [""] * len(messages)
+            display_errors(error_paths, errors, severities, messages)
+        else:
+            raise NotImplementedError(
+                "The `grouping` parameter values currently supported are "
+                "path or None."
+            )
+
+        all_files_error_messages[path] = messages
+
+    files_with_errors = [f for f, errors in all_files_error_messages.items() if errors]
 
     if files_with_errors:
         click.secho(
@@ -150,23 +185,71 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
         )
 
 
-def display_errors(scope, errors, severities=[], messages=[]):
+def _get_severity_color(severities):
     from ..validate import Severity
 
     if Severity.ERROR in severities:
-        fg = "red"
+        return "red"
     elif Severity.WARNING in severities:
-        fg = "orange"
+        return "green"
     else:
-        fg = "blue"
-    summary = f"{scope}: {pluralize(len(errors), 'issue')} detected."
-    click.secho(summary, fg=fg)
-    for error, severity, message in zip(errors, severities, messages):
-        if severity == Severity.ERROR:
-            fg = "red"
-        elif severity == Severity.WARNING:
-            fg = "green"
-        else:
-            fg = "blue"
-        error_message = f"  [{error}] {message}"
+        return "blue"
+
+
+def display_errors(purviews, errors, severities, messages):
+    """
+    Unified error display for validation CLI, which auto-resolves grouping logic based on
+    the length of input lists.
+
+    Parameters
+    ----------
+    purviews: list of str
+    errors: list of str
+    severities: list of dandi.validate.Severity
+    messages: list of str
+
+
+    Notes
+    -----
+    * There is a bit of roundabout and currently untestable logic to deal with potential cases
+    where the same error has multiple error message, could be removed in the future and removed
+    by assert if this won't ever be the case.
+    """
+
+    if all([len(i) == 1 for i in [purviews, errors, severities, messages]]):
+        fg = _get_severity_color(severities)
+        error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}"
         click.secho(error_message, fg=fg)
+    elif len(purviews) == 1:
+        group_message = f"{purviews[0]}: {pluralize(len(errors), 'issue')} detected."
+        fg = _get_severity_color(severities)
+        click.secho(group_message, fg=fg)
+        for error, severity, message in zip(errors, severities, messages):
+            error_message = f"  [{error}] {message}"
+            fg = _get_severity_color([severity])
+            click.secho(error_message, fg=fg)
+    elif len(errors) == 1:
+        fg = _get_severity_color(severities)
+        group_message = (
+            f"{errors[0]}: detected in {pluralize(len(purviews), 'purviews')}"
+        )
+        if len(set(messages)) == 1:
+            error_message += f" — {messages[0]}."
+            click.secho(group_message, fg=fg)
+            for purview, severity in zip(purviews, severities):
+                error_message = f"  {purview}"
+                fg = _get_severity_color([severity])
+                click.secho(error_message, fg=fg)
+        else:
+            error_message += "."
+            for purview, severity, message in zip(purviews, severities, messages):
+                error_message = f"  {purview} — {message}"
+                fg = _get_severity_color([severity])
+                click.secho(error_message, fg=fg)
+    else:
+        for purview, error, severity, message in zip(
+            purviews, errors, severities, messages
+        ):
+            fg = _get_severity_color([severity])
+            error_message = f"[{error}] {purview} — {message}"
+            click.secho(error_message, fg=fg)

From 6a56acdd34aeff6b51b32ef2e4ba01e4fc105f8a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 29 Sep 2022 13:33:04 -0400
Subject: [PATCH 069/124] Corrected warning color

---
 dandi/cli/cmd_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index ea5d56836..d105d2a0f 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -191,7 +191,7 @@ def _get_severity_color(severities):
     if Severity.ERROR in severities:
         return "red"
     elif Severity.WARNING in severities:
-        return "green"
+        return "yellow"
     else:
         return "blue"
 

From b4ad1bcfe62fc3ff5c47797ba7f3ef5ad3f3cab8 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 29 Sep 2022 15:21:21 -0400
Subject: [PATCH 070/124] =?UTF-8?q?BROKEN=20=E2=80=94=20reusage=20of=20gen?=
 =?UTF-8?q?eric=20validator=20output=20for=20NWB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dandi/files/bases.py | 75 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index d43bf41a9..33dbfa1b7 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -19,6 +19,7 @@
 from dandischema.models import get_schema_version
 from etelemetry import get_project
 from nwbinspector import Importance, inspect_nwb, load_config
+import nwbinspector.__version__ as nwbinspector_version
 from nwbinspector.utils import get_package_version
 from packaging.version import Version
 from pydantic import ValidationError
@@ -31,6 +32,7 @@
 from dandi.pynwb_utils import validate as pynwb_validate
 from dandi.support.digests import get_dandietag, get_digest
 from dandi.utils import yaml_load
+from dandi.validate import Scope, Severity, ValidationOrigin, ValidationResult
 
 lgr = get_logger()
 
@@ -38,6 +40,15 @@
 _required_dandiset_metadata_fields = ["identifier", "name", "description"]
 
 
+NWB_IMPORTANCE_TO_DANDI_SEVERITY = {
+    "ERROR": Severity.ERROR,
+    "PYNWB_VALIDATION": Severity.ERROR,
+    "CRITICAL": Severity.ERROR,  # when using --config dandi
+    "BEST_PRACTICE_VIOLATION": Severity.WARNING,
+    "BEST_PRACTICE_SUGGESTION": Severity.HINT,
+}
+
+
 @dataclass  # type: ignore[misc]  # <https://github.com/python/mypy/issues/5374>
 class DandiFile(ABC):
     """Abstract base class for local files & directories of interest to DANDI"""
@@ -463,6 +474,8 @@ def get_validation_errors(
         devel_debug: bool = False,
     ) -> list[str]:
         errors: list[str] = pynwb_validate(self.filepath, devel_debug=devel_debug)
+        # Q gh-943 do we want to manage this thing as an error?
+        # It should be a separate thing possibly via logging IMHO.
         if schema_version is not None:
             errors.extend(
                 super().get_validation_errors(
@@ -473,15 +486,23 @@ def get_validation_errors(
             # make sure that we have some basic metadata fields we require
             try:
                 # Ensure latest version of NWB Inspector is installed and used client-side
+                # Q gh-943
+                # Are you sure we want to do this? Maybe some version-selection
+                # logic analogous to BIDS would be better, so people can prospectively specify
+                # what they used.... even if for the time being we're just chasing upstream.
                 try:
                     max_version = Version(
                         get_project(repo="NeurodataWithoutBorders/nwbinspector")[
                             "version"
                         ]
                     )
+                    # Any reason we're not using `nwbinspector.__version__` ?
                     current_version = get_package_version(name="nwbinspector")
 
                     if current_version < max_version:
+                        # Q gh-943 do we want to manage this thing as an error?
+                        # It should be a separate thing possibly via logging IMHO.
+                        # commenting out temporarily for tests
                         errors.append(
                             f"NWB Inspector version {current_version} is installed - please "
                             f"use the latest release of the NWB Inspector ({max_version}) "
@@ -496,20 +517,50 @@ def get_validation_errors(
                         type(e).__name__,
                         str(e),
                     )
+                origin = ValidationOrigin(
+                    name="nwbinspector",
+                    version=nwbinspector_version,
+                )
 
-                # Run NWB Inspector with 'dandi' config
-                # CRITICAL errors and above are equivalent to validation errors
-                errors.extend(
-                    [
-                        error.message
-                        for error in inspect_nwb(
-                            nwbfile_path=self.filepath,
-                            skip_validate=True,
-                            config=load_config(filepath_or_keyword="dandi"),
-                            importance_threshold=Importance.CRITICAL,
+                scope = (
+                    Scope.FILE
+                )  # There's technically one folder-level thing but it gets registered at file level
+                for error in inspect_nwb(
+                    nwbfile_path=self.filepath,
+                    skip_validate=True,
+                    config=load_config(filepath_or_keyword="dandi"),
+                    importance_threshold=Importance.CRITICAL,
+                ):
+                    id = error.check_function_name
+                    asset_paths = [error.file_path]
+                    # Assuming multiple sessions per multiple subjects, otherwise nesting level
+                    # might differ
+                    dandiset_path = error.file_path.parent.parent
+                    dataset_path = error.dandiset_path
+                    metadata = None
+                    # assuming this is HDF5 Group information within the file
+                    path = error.location
+                    # unfortunate name collision; the InspectorMessage.severity is used only for
+                    # minor ordering of final report - will have to remove the
+                    # `importance_treshold` currently used to filter DANDI-level CRITICAL
+                    # evaluations as errors vs. validation results
+                    severity = NWB_IMPORTANCE_TO_DANDI_SEVERITY[error.importance]
+                    message = error.message
+
+                    errors.append(
+                        ValidationResult(
+                            asset_paths=asset_paths,
+                            origin=origin,
+                            severity=severity,
+                            id=id,
+                            scope=scope,
+                            metadata=metadata,
+                            path=path,
+                            message=message,
+                            dataset_path=dataset_path,
+                            dandiset_path=dandiset_path,
                         )
-                    ]
-                )
+                    )
             except Exception as e:
                 if devel_debug:
                     raise

From de27873cfba0bf5fd1c1a85527cfc570292d5f78 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 29 Sep 2022 16:41:53 -0400
Subject: [PATCH 071/124] RF+BF: address circular import, unify more of
 validations into ValidationResults

TODO: pretty much go through all other instances were "str" is returned for an
error.  Most of them could be caught up via   `tox -e typing`  or just inspecting
other code paths.

We also raised following concerns/topics for possible future development
- do not return error structs for exception handling where we catch
  errors from opening files etc.  But may be there was a reason why
  we decided to work with it this way instead of just handling exceptions.
- there is .location (within .nwb / hdf5 file) to point more specifically
  within nwb file internal structure.  We need to provide location for that
  in our data structure. But as we have asset_paths, we might want to do that
  as a dict.
---
 dandi/cli/cmd_validate.py |   6 +-
 dandi/files/bases.py      | 122 ++++++++++++++++++++++----------------
 dandi/pynwb_utils.py      |   6 +-
 dandi/validate.py         |  36 +----------
 dandi/validate_types.py   |  46 ++++++++++++++
 5 files changed, 126 insertions(+), 90 deletions(-)
 create mode 100644 dandi/validate_types.py

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index d105d2a0f..d1b3351ea 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -49,8 +49,8 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    from ..validate import Severity
     from ..validate import validate_bids as validate_bids_
+    from ..validate_types import Severity
 
     validator_result = validate_bids_(  # Controller
         *paths,
@@ -186,7 +186,7 @@ def validate(
 
 
 def _get_severity_color(severities):
-    from ..validate import Severity
+    from ..validate_types import Severity
 
     if Severity.ERROR in severities:
         return "red"
@@ -205,7 +205,7 @@ def display_errors(purviews, errors, severities, messages):
     ----------
     purviews: list of str
     errors: list of str
-    severities: list of dandi.validate.Severity
+    severities: list of dandi.validate_types.Severity
     messages: list of str
 
 
diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 33dbfa1b7..12db75990 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -13,18 +13,19 @@
 from typing import Any, BinaryIO, Generic, Optional
 from xml.etree.ElementTree import fromstring
 
+import dandischema
 from dandischema.digests.dandietag import DandiETag
 from dandischema.models import BareAsset, CommonModel
 from dandischema.models import Dandiset as DandisetMeta
 from dandischema.models import get_schema_version
 from etelemetry import get_project
 from nwbinspector import Importance, inspect_nwb, load_config
-import nwbinspector.__version__ as nwbinspector_version
 from nwbinspector.utils import get_package_version
 from packaging.version import Version
 from pydantic import ValidationError
 import requests
 
+import dandi
 from dandi import get_logger
 from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
 from dandi.metadata import get_default_metadata, nwb2asset
@@ -32,7 +33,7 @@
 from dandi.pynwb_utils import validate as pynwb_validate
 from dandi.support.digests import get_dandietag, get_digest
 from dandi.utils import yaml_load
-from dandi.validate import Scope, Severity, ValidationOrigin, ValidationResult
+from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 
 lgr = get_logger()
 
@@ -40,7 +41,7 @@
 _required_dandiset_metadata_fields = ["identifier", "name", "description"]
 
 
-NWB_IMPORTANCE_TO_DANDI_SEVERITY = {
+NWBI_IMPORTANCE_TO_DANDI_SEVERITY = {
     "ERROR": Severity.ERROR,
     "PYNWB_VALIDATION": Severity.ERROR,
     "CRITICAL": Severity.ERROR,  # when using --config dandi
@@ -81,7 +82,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         """
         Attempt to validate the file and return a list of errors encountered
         """
@@ -106,7 +107,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         with open(self.filepath) as f:
             meta = yaml_load(f, typ="safe")
         if schema_version is None:
@@ -130,7 +131,7 @@ def get_validation_errors(
                     e,
                     extra={"validating": True},
                 )
-                return [str(e)]
+                return [str(e)]  # TODO: proper record, scope dandiset
             except Exception as e:
                 if devel_debug:
                     raise
@@ -140,7 +141,9 @@ def get_validation_errors(
                     e,
                     extra={"validating": True},
                 )
-                return [f"Failed to initialize Dandiset meta: {e}"]
+                return [
+                    f"Failed to initialize Dandiset meta: {e}"
+                ]  # TODO: proper record
             return []
 
 
@@ -177,7 +180,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         if schema_version is not None:
             current_version = get_schema_version()
             if schema_version != current_version:
@@ -196,7 +199,23 @@ def get_validation_errors(
                     e,
                     extra={"validating": True},
                 )
-                return [str(e)]
+                # TODO: how do we get **all** errors from validation - there must be a way
+                return [
+                    ValidationResult(
+                        origin=ValidationOrigin(
+                            name="dandischema",
+                            version=dandischema.__version__,
+                        ),
+                        severity=Severity.ERROR,
+                        id="dandischema.TODO",
+                        scope=Scope.FILE,
+                        # metadata=metadata,
+                        path=self.filepath,  # note that it is not relative .path
+                        message=str(e),
+                        # TODO? dataset_path=dataset_path,
+                        # TODO? dandiset_path=dandiset_path,
+                    )
+                ]
             except Exception as e:
                 if devel_debug:
                     raise
@@ -206,7 +225,22 @@ def get_validation_errors(
                     e,
                     extra={"validating": True},
                 )
-                return [f"Failed to read metadata: {e}"]
+                return [
+                    ValidationResult(
+                        origin=ValidationOrigin(
+                            name="dandi",
+                            version=dandi.__version__,
+                        ),
+                        severity=Severity.ERROR,
+                        id="dandi.SOFTWARE_ERROR",
+                        scope=Scope.FILE,
+                        # metadata=metadata,
+                        path=self.filepath,  # note that it is not relative .path
+                        message=f"Failed to read metadata: {e}",
+                        # TODO? dataset_path=dataset_path,
+                        # TODO? dandiset_path=dandiset_path,
+                    )
+                ]
             return []
         else:
             # TODO: Do something else?
@@ -472,8 +506,10 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
-        errors: list[str] = pynwb_validate(self.filepath, devel_debug=devel_debug)
+    ) -> list[ValidationResult]:
+        errors: list[ValidationResult] = pynwb_validate(
+            self.filepath, devel_debug=devel_debug
+        )
         # Q gh-943 do we want to manage this thing as an error?
         # It should be a separate thing possibly via logging IMHO.
         if schema_version is not None:
@@ -485,31 +521,25 @@ def get_validation_errors(
         else:
             # make sure that we have some basic metadata fields we require
             try:
+                current_version = get_package_version(name="nwbinspector")
+
                 # Ensure latest version of NWB Inspector is installed and used client-side
-                # Q gh-943
-                # Are you sure we want to do this? Maybe some version-selection
-                # logic analogous to BIDS would be better, so people can prospectively specify
-                # what they used.... even if for the time being we're just chasing upstream.
                 try:
                     max_version = Version(
                         get_project(repo="NeurodataWithoutBorders/nwbinspector")[
                             "version"
                         ]
                     )
-                    # Any reason we're not using `nwbinspector.__version__` ?
-                    current_version = get_package_version(name="nwbinspector")
 
                     if current_version < max_version:
-                        # Q gh-943 do we want to manage this thing as an error?
-                        # It should be a separate thing possibly via logging IMHO.
-                        # commenting out temporarily for tests
-                        errors.append(
-                            f"NWB Inspector version {current_version} is installed - please "
-                            f"use the latest release of the NWB Inspector ({max_version}) "
+                        lgr.warning(
+                            "NWB Inspector version %s is installed - please "
+                            "use the latest release of the NWB Inspector (%s) "
                             "when performing `dandi validate`. To update, run "
-                            "`pip install -U nwbinspector` if you installed it with `pip`."
+                            "`pip install -U nwbinspector` if you installed it with `pip`.",
+                            current_version,
+                            max_version,
                         )
-                        return errors
 
                 except Exception as e:  # In case of no internet connection or other error
                     lgr.warning(
@@ -517,48 +547,36 @@ def get_validation_errors(
                         type(e).__name__,
                         str(e),
                     )
+
                 origin = ValidationOrigin(
                     name="nwbinspector",
-                    version=nwbinspector_version,
+                    version=str(current_version),
                 )
 
-                scope = (
-                    Scope.FILE
-                )  # There's technically one folder-level thing but it gets registered at file level
                 for error in inspect_nwb(
                     nwbfile_path=self.filepath,
                     skip_validate=True,
                     config=load_config(filepath_or_keyword="dandi"),
                     importance_threshold=Importance.CRITICAL,
                 ):
-                    id = error.check_function_name
-                    asset_paths = [error.file_path]
-                    # Assuming multiple sessions per multiple subjects, otherwise nesting level
-                    # might differ
-                    dandiset_path = error.file_path.parent.parent
-                    dataset_path = error.dandiset_path
-                    metadata = None
-                    # assuming this is HDF5 Group information within the file
-                    path = error.location
                     # unfortunate name collision; the InspectorMessage.severity is used only for
                     # minor ordering of final report - will have to remove the
-                    # `importance_treshold` currently used to filter DANDI-level CRITICAL
+                    # `importance_threshold` currently used to filter DANDI-level CRITICAL
                     # evaluations as errors vs. validation results
-                    severity = NWB_IMPORTANCE_TO_DANDI_SEVERITY[error.importance]
-                    message = error.message
-
+                    severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance]
                     errors.append(
                         ValidationResult(
-                            asset_paths=asset_paths,
                             origin=origin,
                             severity=severity,
-                            id=id,
-                            scope=scope,
-                            metadata=metadata,
-                            path=path,
-                            message=message,
-                            dataset_path=dataset_path,
-                            dandiset_path=dandiset_path,
+                            id=f"NWBI.{error.check_function_name}",
+                            scope=Scope.FILE,
+                            path=Path(error.file_path),
+                            # TODO inner_path=error.location,
+                            message=error.message,
+                            # Assuming multiple sessions per multiple subjects,
+                            # otherwise nesting level might differ
+                            dataset_path=Path(error.file_path).parent.parent,  # TODO
+                            dandiset_path=Path(error.file_path).parent,  # TODO
                         )
                     )
             except Exception as e:
@@ -570,6 +588,8 @@ def get_validation_errors(
                     e,
                     extra={"validating": True},
                 )
+                # TODO: might reraise instead of making it into an error
+                # TODO: provide proper ValidationResult struct
                 errors.append(f"Failed to inspect NWBFile: {e}")
         return errors
 
diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index 8c2f0a1c8..ab0759d51 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -23,6 +23,7 @@
     metadata_nwb_subject_fields,
 )
 from .utils import get_module_version, is_url
+from .validate_types import ValidationResult
 
 lgr = get_logger()
 
@@ -319,7 +320,9 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None:
 
 
 @validate_cache.memoize_path
-def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
+def validate(
+    path: Union[str, Path], devel_debug: bool = False
+) -> List[ValidationResult]:
     """Run validation on a file and return errors
 
     In case of an exception being thrown, an error message added to the
@@ -334,6 +337,7 @@ def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
     try:
         with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader:
             errors = pynwb.validate(reader)
+            # TODO: return ValidationResult structs
         lgr.warning(
             "pynwb validation errors for %s: %s",
             path,
diff --git a/dandi/validate.py b/dandi/validate.py
index dc8bb5471..914e993c6 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
-from enum import Enum
 import os
 from pathlib import Path
 from typing import Iterator, List, Optional, Tuple, Union
@@ -10,6 +8,7 @@
 
 from .files import find_dandi_files
 from .utils import find_parent_directory_containing
+from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 
 BIDS_TO_DANDI = {
     "subject": "subject_id",
@@ -17,39 +16,6 @@
 }
 
 
-@dataclass
-class ValidationResult:
-    id: str
-    origin: ValidationOrigin
-    scope: Scope
-    asset_paths: Optional[list[str]] = None
-    dandiset_path: Optional[Path] = None
-    dataset_path: Optional[Path] = None
-    message: Optional[str] = None
-    metadata: Optional[dict] = None
-    path: Optional[Path] = None
-    path_regex: Optional[str] = None
-    severity: Optional[Severity] = None
-
-
-@dataclass
-class ValidationOrigin:
-    name: str
-    version: str
-
-
-class Severity(Enum):
-    ERROR = "ERROR"
-    WARNING = "WARNING"
-    HINT = "HINT"
-
-
-class Scope(Enum):
-    FILE = "file"
-    DANDISET = "dandiset"
-    DATASET = "dataset"
-
-
 def validate_bids(
     *paths: Union[str, Path],
     schema_version: Optional[str] = None,
diff --git a/dandi/validate_types.py b/dandi/validate_types.py
new file mode 100644
index 000000000..0f168ebc3
--- /dev/null
+++ b/dandi/validate_types.py
@@ -0,0 +1,46 @@
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class ValidationOrigin:
+    name: str
+    version: str
+
+
+class Severity(Enum):
+    ERROR = "ERROR"
+    WARNING = "WARNING"
+    HINT = "HINT"
+
+
+class Scope(Enum):
+    FILE = "file"
+    DANDISET = "dandiset"
+    DATASET = "dataset"
+
+
+@dataclass
+class ValidationResult:
+    id: str
+    origin: ValidationOrigin
+    scope: Scope
+    # asset_paths, if not populated, assumes [.path], but could be smth like
+    # {"path": "task-broken_bold.json",
+    #  "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json",
+    #                  "sub-02/func/sub-02_task-broken_bold.json"]}
+    asset_paths: Optional[list[str]] = None
+    dandiset_path: Optional[Path] = None
+    dataset_path: Optional[Path] = None
+    # TODO: locations analogous to nwbinspector.InspectorMessage.location
+    # but due to multiple possible asset_paths, we might want to have it
+    # as a dict to point to location in some or each affected assets
+    message: Optional[str] = None
+    metadata: Optional[dict] = None
+    # ??? should it become a list e.g. for errors which rely on
+    # multiple files, like mismatch between .nii.gz header and .json sidecar
+    path: Optional[Path] = None
+    path_regex: Optional[str] = None
+    severity: Optional[Severity] = None

From 6b7e680f5d3aebc2acc131f2f93f89056d76a047 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 29 Sep 2022 16:50:13 -0400
Subject: [PATCH 072/124] ENH: add within_asset_paths to point within
 files/assets

---
 dandi/files/bases.py    | 7 ++++++-
 dandi/validate_types.py | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 12db75990..c1088607d 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -564,6 +564,11 @@ def get_validation_errors(
                     # `importance_threshold` currently used to filter DANDI-level CRITICAL
                     # evaluations as errors vs. validation results
                     severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance]
+                    kw = {}
+                    if error.location:
+                        kw["within_asset_paths"] = {
+                            error.file_path: error.location,
+                        }
                     errors.append(
                         ValidationResult(
                             origin=origin,
@@ -571,12 +576,12 @@ def get_validation_errors(
                             id=f"NWBI.{error.check_function_name}",
                             scope=Scope.FILE,
                             path=Path(error.file_path),
-                            # TODO inner_path=error.location,
                             message=error.message,
                             # Assuming multiple sessions per multiple subjects,
                             # otherwise nesting level might differ
                             dataset_path=Path(error.file_path).parent.parent,  # TODO
                             dandiset_path=Path(error.file_path).parent,  # TODO
+                            **kw,
                         )
                     )
             except Exception as e:
diff --git a/dandi/validate_types.py b/dandi/validate_types.py
index 0f168ebc3..24d775204 100644
--- a/dandi/validate_types.py
+++ b/dandi/validate_types.py
@@ -32,6 +32,9 @@ class ValidationResult:
     #  "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json",
     #                  "sub-02/func/sub-02_task-broken_bold.json"]}
     asset_paths: Optional[list[str]] = None
+    # e.g. path within hdf5 file hierarchy
+    # As a dict we will map asset_paths into location within them
+    within_asset_paths: Optional[dict[str, str]] = None
     dandiset_path: Optional[Path] = None
     dataset_path: Optional[Path] = None
     # TODO: locations analogous to nwbinspector.InspectorMessage.location

From ccf9491c633037268d4ff0859a040adcd32afb20 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 29 Sep 2022 16:58:02 -0400
Subject: [PATCH 073/124] BF: map importance to our severity based on enum"s
 name

---
 dandi/files/bases.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index c1088607d..8397e6f0b 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -41,7 +41,7 @@
 _required_dandiset_metadata_fields = ["identifier", "name", "description"]
 
 
-NWBI_IMPORTANCE_TO_DANDI_SEVERITY = {
+NWBI_IMPORTANCE_TO_DANDI_SEVERITY: dict[Importance.name, Severity] = {
     "ERROR": Severity.ERROR,
     "PYNWB_VALIDATION": Severity.ERROR,
     "CRITICAL": Severity.ERROR,  # when using --config dandi
@@ -563,7 +563,7 @@ def get_validation_errors(
                     # minor ordering of final report - will have to remove the
                     # `importance_threshold` currently used to filter DANDI-level CRITICAL
                     # evaluations as errors vs. validation results
-                    severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance]
+                    severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance.name]
                     kw = {}
                     if error.location:
                         kw["within_asset_paths"] = {

From b3106f49eaa5b9749656c4ad30a3917f100fca50 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 29 Sep 2022 17:10:30 -0400
Subject: [PATCH 074/124] ENH+RF: make sure we issue a warning about
 nwbinspector only once

---
 dandi/files/bases.py | 62 ++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 8397e6f0b..99cfabdc0 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -521,36 +521,9 @@ def get_validation_errors(
         else:
             # make sure that we have some basic metadata fields we require
             try:
-                current_version = get_package_version(name="nwbinspector")
-
-                # Ensure latest version of NWB Inspector is installed and used client-side
-                try:
-                    max_version = Version(
-                        get_project(repo="NeurodataWithoutBorders/nwbinspector")[
-                            "version"
-                        ]
-                    )
-
-                    if current_version < max_version:
-                        lgr.warning(
-                            "NWB Inspector version %s is installed - please "
-                            "use the latest release of the NWB Inspector (%s) "
-                            "when performing `dandi validate`. To update, run "
-                            "`pip install -U nwbinspector` if you installed it with `pip`.",
-                            current_version,
-                            max_version,
-                        )
-
-                except Exception as e:  # In case of no internet connection or other error
-                    lgr.warning(
-                        "Failed to retrieve NWB Inspector version due to %s: %s",
-                        type(e).__name__,
-                        str(e),
-                    )
-
                 origin = ValidationOrigin(
                     name="nwbinspector",
-                    version=str(current_version),
+                    version=str(_get_nwb_inspector_version()),
                 )
 
                 for error in inspect_nwb(
@@ -711,3 +684,36 @@ def _check_required_fields(d: dict, required: list[str]) -> list[str]:
         if v in ("REQUIRED", "PLACEHOLDER"):
             errors += [f"Required field {f!r} has value {v!r}"]
     return errors
+
+
+_current_nwbinspector_version = None
+
+
+def _get_nwb_inspector_version():
+    global _current_nwbinspector_version
+    if _current_nwbinspector_version is not None:
+        return _current_nwbinspector_version
+    _current_nwbinspector_version = get_package_version(name="nwbinspector")
+    # Ensure latest version of NWB Inspector is installed and used client-side
+    try:
+        max_version = Version(
+            get_project(repo="NeurodataWithoutBorders/nwbinspector")["version"]
+        )
+
+        if _current_nwbinspector_version < max_version:
+            lgr.warning(
+                "NWB Inspector version %s is installed - please "
+                "use the latest release of the NWB Inspector (%s) "
+                "when performing `dandi validate`. To update, run "
+                "`pip install -U nwbinspector` if you installed it with `pip`.",
+                _current_nwbinspector_version,
+                max_version,
+            )
+
+    except Exception as e:  # In case of no internet connection or other error
+        lgr.warning(
+            "Failed to retrieve NWB Inspector version due to %s: %s",
+            type(e).__name__,
+            str(e),
+        )
+    return _current_nwbinspector_version

From 49c9a0a58c4b4da00fc5edea700f745fd3185cc1 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 02:39:43 -0400
Subject: [PATCH 075/124] Integrating pydantic error into custom object.

---
 dandi/files/bases.py | 62 ++++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 99cfabdc0..fccfcfc5b 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -125,25 +125,11 @@ def get_validation_errors(
             except ValidationError as e:
                 if devel_debug:
                     raise
-                lgr.warning(
-                    "Validation error for %s: %s",
-                    self.filepath,
-                    e,
-                    extra={"validating": True},
-                )
-                return [str(e)]  # TODO: proper record, scope dandiset
+                return _pydantic_errors_to_validation_results(e, self.filepath)
             except Exception as e:
                 if devel_debug:
                     raise
-                lgr.warning(
-                    "Unexpected validation error for %s: %s",
-                    self.filepath,
-                    e,
-                    extra={"validating": True},
-                )
-                return [
-                    f"Failed to initialize Dandiset meta: {e}"
-                ]  # TODO: proper record
+                return _pydantic_errors_to_validation_results(e, self.filepath)
             return []
 
 
@@ -560,14 +546,8 @@ def get_validation_errors(
             except Exception as e:
                 if devel_debug:
                     raise
-                lgr.warning(
-                    "Failed to inspect NWBFile in %s: %s",
-                    self.filepath,
-                    e,
-                    extra={"validating": True},
-                )
                 # TODO: might reraise instead of making it into an error
-                # TODO: provide proper ValidationResult struct
+                return _pydantic_errors_to_validation_results(e, self.filepath)
                 errors.append(f"Failed to inspect NWBFile: {e}")
         return errors
 
@@ -717,3 +697,39 @@ def _get_nwb_inspector_version():
             str(e),
         )
     return _current_nwbinspector_version
+
+
+def _pydantic_errors_to_validation_results(
+    errors: list[dict],
+    file_path: str,
+) -> list[ValidationResult]:
+    """Convert list of dict from pydantic into our custom object."""
+    out = []
+    for e in errors:
+        id = dict(
+            id=":".join(
+                filter(
+                    bool,
+                    (
+                        "dandischema",
+                        e.get("type", "UNKNOWN"),
+                        "+".join(e.get("loc", [])),
+                    ),
+                )
+            )
+        )
+        ValidationResult(
+            origin=ValidationOrigin(
+                name="dandischema",
+                version=dandischema.__version__,
+            ),
+            severity=Severity.ERROR,
+            id=id,
+            scope=Scope.DANDISET,
+            path=Path(file_path),
+            message=e.get("message", None),
+            # TODO? dataset_path=dataset_path,
+            # TODO? dandiset_path=dandiset_path,
+        )
+        out.append(e)
+    return e

From 9861be1d50ac19f4d9644b20bb6fbead83ccc546 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 02:47:03 -0400
Subject: [PATCH 076/124] Corrected function return

---
 dandi/files/bases.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index fccfcfc5b..cd033d20e 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -718,18 +718,19 @@ def _pydantic_errors_to_validation_results(
                 )
             )
         )
-        ValidationResult(
-            origin=ValidationOrigin(
-                name="dandischema",
-                version=dandischema.__version__,
-            ),
-            severity=Severity.ERROR,
-            id=id,
-            scope=Scope.DANDISET,
-            path=Path(file_path),
-            message=e.get("message", None),
-            # TODO? dataset_path=dataset_path,
-            # TODO? dandiset_path=dandiset_path,
+        out.append(
+            ValidationResult(
+                origin=ValidationOrigin(
+                    name="dandischema",
+                    version=dandischema.__version__,
+                ),
+                severity=Severity.ERROR,
+                id=id,
+                scope=Scope.DANDISET,
+                path=Path(file_path),
+                message=e.get("message", None),
+                # TODO? dataset_path=dataset_path,
+                # TODO? dandiset_path=dandiset_path,
+            )
         )
-        out.append(e)
-    return e
+    return out

From f387998a31fa37008db7d5ddff27faf1a8f03940 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 03:06:17 -0400
Subject: [PATCH 077/124] Parsing further errors into the custom dandi
 validation object

---
 dandi/files/bases.py | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index cd033d20e..060e3cc66 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -113,7 +113,9 @@ def get_validation_errors(
         if schema_version is None:
             schema_version = meta.get("schemaVersion")
         if schema_version is None:
-            return _check_required_fields(meta, _required_dandiset_metadata_fields)
+            return _check_required_fields(
+                meta, _required_dandiset_metadata_fields, self.filepath
+            )
         else:
             current_version = get_schema_version()
             if schema_version != current_version:
@@ -655,14 +657,42 @@ def _upload_blob_part(
     }
 
 
-def _check_required_fields(d: dict, required: list[str]) -> list[str]:
+def _check_required_fields(
+    d: dict, required: list[str], file_path: str
+) -> list[ValidationResult]:
     errors: list[str] = []
     for f in required:
         v = d.get(f, None)
         if not v or (isinstance(v, str) and not v.strip()):
-            errors += [f"Required field {f!r} has no value"]
+            message = f"Required field {f!r} has no value"
+            errors.append(
+                ValidationResult(
+                    origin=ValidationOrigin(
+                        name="dandischema",
+                        version=dandischema.__version__,
+                    ),
+                    severity=Severity.ERROR,
+                    id="dandischema.requred_field",
+                    scope=Scope.FILE,
+                    path=Path(file_path),
+                    message=message,
+                )
+            )
         if v in ("REQUIRED", "PLACEHOLDER"):
-            errors += [f"Required field {f!r} has value {v!r}"]
+            message = f"Required field {f!r} has value {v!r}"
+            errors.append(
+                ValidationResult(
+                    origin=ValidationOrigin(
+                        name="dandischema",
+                        version=dandischema.__version__,
+                    ),
+                    severity=Severity.WARNING,
+                    id="dandischema.placeholder_value",
+                    scope=Scope.FILE,
+                    path=Path(file_path),
+                    message=message,
+                )
+            )
     return errors
 
 

From 04ee73c1ac89fe3a61e7c72912ce3f5e3a0e51e2 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 13:55:45 -0400
Subject: [PATCH 078/124] Trying to fix types

---
 dandi/files/bases.py | 6 +++---
 dandi/validate.py    | 9 +++------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 060e3cc66..95af1544c 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -114,7 +114,7 @@ def get_validation_errors(
             schema_version = meta.get("schemaVersion")
         if schema_version is None:
             return _check_required_fields(
-                meta, _required_dandiset_metadata_fields, self.filepath
+                meta, _required_dandiset_metadata_fields, str(self.filepath)
             )
         else:
             current_version = get_schema_version()
@@ -131,7 +131,7 @@ def get_validation_errors(
             except Exception as e:
                 if devel_debug:
                     raise
-                return _pydantic_errors_to_validation_results(e, self.filepath)
+                return _pydantic_errors_to_validation_results(e, str(self.filepath))
             return []
 
 
@@ -696,7 +696,7 @@ def _check_required_fields(
     return errors
 
 
-_current_nwbinspector_version = None
+_current_nwbinspector_version: str = None
 
 
 def _get_nwb_inspector_version():
diff --git a/dandi/validate.py b/dandi/validate.py
index 914e993c6..4430d4dc2 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -148,7 +148,7 @@ def validate(
     schema_version: Optional[str] = None,
     devel_debug: bool = False,
     allow_any_path: bool = False,
-) -> Iterator[Tuple[str, List[str]]]:
+) -> Iterator[ValidationResult]:
     """Validate content
 
     Parameters
@@ -162,9 +162,6 @@ def validate(
       errors for a path
     """
     for df in find_dandi_files(*paths, dandiset_path=None, allow_all=allow_any_path):
-        yield (
-            str(df.filepath),
-            df.get_validation_errors(
+        yield from df.get_validation_errors(
                 schema_version=schema_version, devel_debug=devel_debug
-            ),
-        )
+                )

From d161b463ec493655684b26474d273e0e98b7d1b8 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 16:34:10 -0400
Subject: [PATCH 079/124] Type fixing

---
 dandi/files/bids.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index d7ca279b1..f2f5dcd7c 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -5,7 +5,7 @@
 from datetime import datetime
 from pathlib import Path
 from threading import Lock
-from typing import Any, Optional
+from typing import Any, List, Optional
 import weakref
 
 from dandischema.models import BareAsset
@@ -14,6 +14,7 @@
 from .zarr import ZarrAsset
 from ..metadata import add_common_metadata, prepare_metadata
 from ..misctypes import Digest
+from ..validate_types import ValidationResult
 
 BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",)
 BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",)
@@ -65,17 +66,15 @@ def _validate(self) -> None:
                     str(asset.filepath) for asset in self.dataset_files
                 ]
                 results = validate_bids(*bids_paths)
-                self._dataset_errors: list[str] = []
-                self._asset_errors = defaultdict(list)
+                self._dataset_errors: list[ValidationResult] = []
+                self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(list)
                 self._asset_metadata = defaultdict(dict)
                 for result in results:
                     if result.id in BIDS_ASSET_ERRORS:
                         assert result.path
-                        assert result.message
-                        self._asset_errors[str(result.path)].append(result.message)
+                        self._asset_errors[str(result.path)].append(result)
                     elif result.id in BIDS_DATASET_ERRORS:
-                        assert result.message
-                        self._dataset_errors.append(result.message)
+                        self._dataset_errors.append(result)
                     elif result.id == "BIDS.MATCH":
                         assert result.path
                         bids_path = result.path.relative_to(self.bids_root).as_posix()
@@ -104,10 +103,10 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         self._validate()
         assert self._dataset_errors is not None
-        return list(self._dataset_errors)
+        return self._dataset_errors + self._asset_errors.values()
 
     # get_metadata(): inherit use of default metadata from LocalFileAsset
 
@@ -154,7 +153,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         return self.bids_dataset_description.get_asset_errors(self)
 
     def get_metadata(
@@ -208,7 +207,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         return ZarrBIDSAsset.get_validation_errors(
             self, schema_version, devel_debug
         ) + BIDSAsset.get_validation_errors(self)
@@ -227,7 +226,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> List[ValidationResult]:
         return GenericAsset.get_validation_errors(
             self, schema_version, devel_debug
         ) + BIDSAsset.get_validation_errors(self)

From 482246a533a084b17081f228135885c929399ff0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Fri, 30 Sep 2022 17:02:39 -0400
Subject: [PATCH 080/124] New object structure for zarr validation

---
 dandi/files/zarr.py | 58 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py
index 9204eb5a8..0b04a9d83 100644
--- a/dandi/files/zarr.py
+++ b/dandi/files/zarr.py
@@ -11,6 +11,7 @@
 from time import sleep
 from typing import Any, Optional, cast
 
+from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 from dandischema.digests.zarr import get_checksum
 from dandischema.models import BareAsset, DigestType
 import requests
@@ -184,36 +185,59 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         try:
             data = zarr.open(self.filepath)
         except Exception as e:
             if devel_debug:
                 raise
-            lgr.warning(
-                "Error opening %s: %s: %s",
-                self.filepath,
-                type(e).__name__,
-                e,
-                extra={"validating": True},
-            )
-            return [str(e)]
+            return [
+                ValidationResult(
+                    origin=ValidationOrigin(
+                        name="zarr",
+                        version= zarr.version.version,
+                    ),
+                    severity=Severity.ERROR,
+                    id="zarr.cannot_open",
+                    scope=Scope.FILE,
+                    path=self.filepath,
+                    message="Error opening file.",
+                    )
+                ]
         if isinstance(data, zarr.Group) and not data:
-            msg = "Zarr group is empty"
-            if devel_debug:
-                raise ValueError(msg)
-            lgr.warning("%s: %s", self.filepath, msg, extra={"validating": True})
-            return [msg]
+            return [
+                ValidationResult(
+                    origin=ValidationOrigin(
+                        name="zarr",
+                        version= zarr.version.version,
+                    ),
+                    severity=Severity.ERROR,
+                    id="zarr.empty_group",
+                    scope=Scope.FILE,
+                    path=self.filepath,
+                    message="Zarr group is empty.",
+                    )
+                ]
         try:
             next(self.filepath.glob(f"*{os.sep}" + os.sep.join(["*"] * MAX_ZARR_DEPTH)))
         except StopIteration:
             pass
         else:
-            msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep"
             if devel_debug:
                 raise ValueError(msg)
-            lgr.warning("%s: %s", self.filepath, msg, extra={"validating": True})
-            return [msg]
+            return [
+                ValidationResult(
+                    origin=ValidationOrigin(
+                        name="zarr",
+                        version= zarr.version.version,
+                    ),
+                    severity=Severity.ERROR,
+                    id="zarr.tree_depth_exceeded",
+                    scope=Scope.FILE,
+                    path=self.filepath,
+                    message=f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep",
+                    )
+                ]
         # TODO: Should this be appended to the above errors?
         return super().get_validation_errors(
             schema_version=schema_version, devel_debug=devel_debug

From 1691547f900352a112789b5b6cd2bc7455d3b8b5 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 11:52:43 -0400
Subject: [PATCH 081/124] Further type fixes

---
 dandi/files/bases.py | 19 +++++++++++++------
 dandi/files/bids.py  | 14 ++++++++------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 95af1544c..270f602c5 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -181,12 +181,19 @@ def get_validation_errors(
             except ValidationError as e:
                 if devel_debug:
                     raise
-                lgr.warning(
-                    "Validation error for %s: %s",
-                    self.filepath,
-                    e,
-                    extra={"validating": True},
-                )
+                return [
+                    ValidationResult(
+                        origin=ValidationOrigin(
+                            name="dandischema",
+                            version=dandischema.__version__,
+                        ),
+                        severity=Severity.ERROR,
+                        id="dandischema.validationerror",
+                        scope=Scope.FILE,
+                        path=self.filepath,  # note that it is not relative .path
+                        message=str(e),
+                    )
+                ]
                 # TODO: how do we get **all** errors from validation - there must be a way
                 return [
                     ValidationResult(
diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index f2f5dcd7c..814df1309 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -38,7 +38,7 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset):
 
     #: A list of validation error messages for individual assets in the
     #: dataset, keyed by `bids_path` properties; populated by `_validate()`
-    _asset_errors: Optional[dict[str, list[str]]] = None
+    _asset_errors: Optional[dict[str, list[ValidationResult]]] = None
 
     #: Asset metadata (in the form of a `dict` of BareAsset fields) for
     #: individual assets in the dataset, keyed by `bids_path` properties;
@@ -67,7 +67,9 @@ def _validate(self) -> None:
                 ]
                 results = validate_bids(*bids_paths)
                 self._dataset_errors: list[ValidationResult] = []
-                self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(list)
+                self._asset_errors: dict[str, list[ValidationResult]] = defaultdict(
+                    list
+                )
                 self._asset_metadata = defaultdict(dict)
                 for result in results:
                     if result.id in BIDS_ASSET_ERRORS:
@@ -83,12 +85,12 @@ def _validate(self) -> None:
                             result.metadata
                         )
 
-    def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
+    def get_asset_errors(self, asset: BIDSAsset) -> list[ValidationResult]:
         """:meta private:"""
         self._validate()
-        errors: list[str] = []
+        errors: list[ValidationResult] = []
         if self._dataset_errors:
-            errors.append("BIDS dataset is invalid")
+            errors.extend(self._dataset_errors)
         assert self._asset_errors is not None
         errors.extend(self._asset_errors[asset.bids_path])
         return errors
@@ -179,7 +181,7 @@ def get_validation_errors(
         self,
         schema_version: Optional[str] = None,
         devel_debug: bool = False,
-    ) -> list[str]:
+    ) -> list[ValidationResult]:
         return NWBAsset.get_validation_errors(
             self, schema_version, devel_debug
         ) + BIDSAsset.get_validation_errors(self)

From 889d2e78cbf11bd5d22b0a94ea5848268e5481ae Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 13:53:24 -0400
Subject: [PATCH 082/124] Attempting to fix typing errors

---
 dandi/files/bids.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index 814df1309..d53e13d8f 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -34,7 +34,7 @@ class BIDSDatasetDescriptionAsset(LocalFileAsset):
 
     #: A list of validation error messages pertaining to the dataset as a
     #: whole, populated by `_validate()`
-    _dataset_errors: Optional[list[str]] = None
+    _dataset_errors: Optional[list[ValidationResult]] = None
 
     #: A list of validation error messages for individual assets in the
     #: dataset, keyed by `bids_path` properties; populated by `_validate()`
@@ -108,7 +108,10 @@ def get_validation_errors(
     ) -> list[ValidationResult]:
         self._validate()
         assert self._dataset_errors is not None
-        return self._dataset_errors + self._asset_errors.values()
+        if self._asset_errors is not None:
+            return self._dataset_errors + [*self._asset_errors.values()]
+        else:
+            return self._dataset_errors
 
     # get_metadata(): inherit use of default metadata from LocalFileAsset
 

From dd03eb583e70224e299603c4f60cb29f05b71875 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 14:35:55 -0400
Subject: [PATCH 083/124] Fixed more typing errors

---
 dandi/files/bases.py | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 270f602c5..d2bf67bd8 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -181,19 +181,6 @@ def get_validation_errors(
             except ValidationError as e:
                 if devel_debug:
                     raise
-                return [
-                    ValidationResult(
-                        origin=ValidationOrigin(
-                            name="dandischema",
-                            version=dandischema.__version__,
-                        ),
-                        severity=Severity.ERROR,
-                        id="dandischema.validationerror",
-                        scope=Scope.FILE,
-                        path=self.filepath,  # note that it is not relative .path
-                        message=str(e),
-                    )
-                ]
                 # TODO: how do we get **all** errors from validation - there must be a way
                 return [
                     ValidationResult(
@@ -557,7 +544,6 @@ def get_validation_errors(
                     raise
                 # TODO: might reraise instead of making it into an error
                 return _pydantic_errors_to_validation_results(e, self.filepath)
-                errors.append(f"Failed to inspect NWBFile: {e}")
         return errors
 
 
@@ -667,7 +653,7 @@ def _upload_blob_part(
 def _check_required_fields(
     d: dict, required: list[str], file_path: str
 ) -> list[ValidationResult]:
-    errors: list[str] = []
+    errors: list[ValidationResult] = []
     for f in required:
         v = d.get(f, None)
         if not v or (isinstance(v, str) and not v.strip()):
@@ -703,7 +689,7 @@ def _check_required_fields(
     return errors
 
 
-_current_nwbinspector_version: str = None
+_current_nwbinspector_version: str = ""
 
 
 def _get_nwb_inspector_version():
@@ -743,16 +729,14 @@ def _pydantic_errors_to_validation_results(
     """Convert list of dict from pydantic into our custom object."""
     out = []
     for e in errors:
-        id = dict(
-            id=":".join(
-                filter(
-                    bool,
-                    (
-                        "dandischema",
-                        e.get("type", "UNKNOWN"),
-                        "+".join(e.get("loc", [])),
-                    ),
-                )
+        id = ":".join(
+            filter(
+                bool,
+                (
+                    "dandischema",
+                    e.get("type", "UNKNOWN"),
+                    "+".join(e.get("loc", [])),
+                ),
             )
         )
         out.append(

From b2f4c2e7c43bf568565ea699815a82b0ab04abed Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 14:43:34 -0400
Subject: [PATCH 084/124] Fixed list unpacking issue discussed in meeting

---
 dandi/files/bids.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dandi/files/bids.py b/dandi/files/bids.py
index d53e13d8f..7806f80a2 100644
--- a/dandi/files/bids.py
+++ b/dandi/files/bids.py
@@ -109,7 +109,9 @@ def get_validation_errors(
         self._validate()
         assert self._dataset_errors is not None
         if self._asset_errors is not None:
-            return self._dataset_errors + [*self._asset_errors.values()]
+            return self._dataset_errors + [
+                i for j in self._asset_errors.values() for i in j
+            ]
         else:
             return self._dataset_errors
 

From 0d77a0d0e39148a117b6adf16fba372691c7f770 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 14:54:11 -0400
Subject: [PATCH 085/124] Further typing fixes

---
 dandi/files/bases.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index d2bf67bd8..176937934 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -127,7 +127,7 @@ def get_validation_errors(
             except ValidationError as e:
                 if devel_debug:
                     raise
-                return _pydantic_errors_to_validation_results(e, self.filepath)
+                return _pydantic_errors_to_validation_results(e, str(self.filepath))
             except Exception as e:
                 if devel_debug:
                     raise
@@ -543,7 +543,7 @@ def get_validation_errors(
                 if devel_debug:
                     raise
                 # TODO: might reraise instead of making it into an error
-                return _pydantic_errors_to_validation_results(e, self.filepath)
+                return _pydantic_errors_to_validation_results(e, str(self.filepath))
         return errors
 
 
@@ -723,7 +723,7 @@ def _get_nwb_inspector_version():
 
 
 def _pydantic_errors_to_validation_results(
-    errors: list[dict],
+    errors: Any[list[dict], Exception],
     file_path: str,
 ) -> list[ValidationResult]:
     """Convert list of dict from pydantic into our custom object."""

From e263bf483e1e4892bec208a48ac1fcb1ea1c2f6a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 15:00:07 -0400
Subject: [PATCH 086/124] Defined missing and removed unused variable

---
 dandi/files/zarr.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py
index 0b04a9d83..088bdab6e 100644
--- a/dandi/files/zarr.py
+++ b/dandi/files/zarr.py
@@ -11,7 +11,6 @@
 from time import sleep
 from typing import Any, Optional, cast
 
-from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 from dandischema.digests.zarr import get_checksum
 from dandischema.models import BareAsset, DigestType
 import requests
@@ -37,6 +36,7 @@
 from dandi.utils import chunked, pluralize
 
 from .bases import LocalDirectoryAsset
+from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 
 lgr = get_logger()
 
@@ -188,56 +188,57 @@ def get_validation_errors(
     ) -> list[ValidationResult]:
         try:
             data = zarr.open(self.filepath)
-        except Exception as e:
+        except Exception:
             if devel_debug:
                 raise
             return [
                 ValidationResult(
                     origin=ValidationOrigin(
                         name="zarr",
-                        version= zarr.version.version,
+                        version=zarr.version.version,
                     ),
                     severity=Severity.ERROR,
                     id="zarr.cannot_open",
                     scope=Scope.FILE,
                     path=self.filepath,
                     message="Error opening file.",
-                    )
-                ]
+                )
+            ]
         if isinstance(data, zarr.Group) and not data:
             return [
                 ValidationResult(
                     origin=ValidationOrigin(
                         name="zarr",
-                        version= zarr.version.version,
+                        version=zarr.version.version,
                     ),
                     severity=Severity.ERROR,
                     id="zarr.empty_group",
                     scope=Scope.FILE,
                     path=self.filepath,
                     message="Zarr group is empty.",
-                    )
-                ]
+                )
+            ]
         try:
             next(self.filepath.glob(f"*{os.sep}" + os.sep.join(["*"] * MAX_ZARR_DEPTH)))
         except StopIteration:
             pass
         else:
+            msg = f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep"
             if devel_debug:
                 raise ValueError(msg)
             return [
                 ValidationResult(
                     origin=ValidationOrigin(
                         name="zarr",
-                        version= zarr.version.version,
+                        version=zarr.version.version,
                     ),
                     severity=Severity.ERROR,
                     id="zarr.tree_depth_exceeded",
                     scope=Scope.FILE,
                     path=self.filepath,
-                    message=f"Zarr directory tree more than {MAX_ZARR_DEPTH} directories deep",
-                    )
-                ]
+                    message=msg,
+                )
+            ]
         # TODO: Should this be appended to the above errors?
         return super().get_validation_errors(
             schema_version=schema_version, devel_debug=devel_debug

From 8b58dce30e5b27b226299c54f95b4d1af817cd0c Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 16:19:32 -0400
Subject: [PATCH 087/124] Trying to standardize error output

---
 dandi/pynwb_utils.py | 43 +++++++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index ab0759d51..ebf78e6ec 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -23,7 +23,7 @@
     metadata_nwb_subject_fields,
 )
 from .utils import get_module_version, is_url
-from .validate_types import ValidationResult
+from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 
 lgr = get_logger()
 
@@ -333,22 +333,39 @@ def validate(
     path: str or Path
     """
     path = str(path)  # Might come in as pathlib's PATH
-    errors: List[str]
+    errors: List[ValidationResult] = []
     try:
         with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader:
-            errors = pynwb.validate(reader)
+            error_outputs = pynwb.validate(reader)
             # TODO: return ValidationResult structs
-        lgr.warning(
-            "pynwb validation errors for %s: %s",
-            path,
-            errors,
-            extra={"validating": True},
-        )
+            for error_output in error_outputs:
+                errors.append = ValidationResult(
+                    origin=ValidationOrigin(
+                        name="pynwb",
+                        version=pynwb.__version__,
+                    ),
+                    severity=Severity.WARNING,
+                    id=f"pywnb.{error_output}",
+                    scope=Scope.FILE,
+                    path=Path(path),
+                    message="Failed to validate.",
+                )
     except Exception as exc:
         if devel_debug:
             raise
-        lgr.warning("Failed to validate %s: %s", path, exc, extra={"validating": True})
-        errors = [f"Failed to validate {path}: {exc}"]
+        errors.extend = [
+            ValidationResult(
+                origin=ValidationOrigin(
+                    name="pynwb",
+                    version=pynwb.__version__,
+                ),
+                severity=Severity.ERROR,
+                id=f"pywnb.{error_output}",
+                scope=Scope.FILE,
+                path=Path(path),
+                message=f"{exc}",
+            )
+        ]
 
     # To overcome
     #   https://github.com/NeurodataWithoutBorders/pynwb/issues/1090
@@ -366,6 +383,7 @@ def validate(
         if version is not None:
             # Explicitly sanitize so we collect warnings.
             # TODO: later cast into proper ERRORs
+            # Do we really need to do these acrobatics? string comparison works fine...
             version = _sanitize_nwb_version(version, log=errors.append)
             try:
                 v = semantic_version.Version(version)
@@ -373,7 +391,8 @@ def validate(
                 v = None
             if v is not None and v < semantic_version.Version("2.1.0"):
                 errors_ = errors[:]
-                errors = [e for e in errors if not re_ok_prior_210.search(str(e))]
+                errors = [e for e in errors if not re_ok_prior_210.search(e.message)]
+                # This is not an error, just logging about the process, hence logging:
                 if errors != errors_:
                     lgr.debug(
                         "Filtered out %d validation errors on %s",

From f2f577d40e5ae4dd34c511ebc1ae141461619125 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 3 Oct 2022 16:42:58 -0400
Subject: [PATCH 088/124] Fixed syntax

---
 dandi/pynwb_utils.py | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index ebf78e6ec..3398d4363 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -339,33 +339,37 @@ def validate(
             error_outputs = pynwb.validate(reader)
             # TODO: return ValidationResult structs
             for error_output in error_outputs:
-                errors.append = ValidationResult(
+                errors.append(
+                    ValidationResult(
+                        origin=ValidationOrigin(
+                            name="pynwb",
+                            version=pynwb.__version__,
+                        ),
+                        severity=Severity.WARNING,
+                        id=f"pywnb.{error_output}",
+                        scope=Scope.FILE,
+                        path=Path(path),
+                        message="Failed to validate.",
+                    )
+                )
+    except Exception as exc:
+        if devel_debug:
+            raise
+        errors.extend(
+            [
+                ValidationResult(
                     origin=ValidationOrigin(
                         name="pynwb",
                         version=pynwb.__version__,
                     ),
-                    severity=Severity.WARNING,
+                    severity=Severity.ERROR,
                     id=f"pywnb.{error_output}",
                     scope=Scope.FILE,
                     path=Path(path),
-                    message="Failed to validate.",
+                    message=f"{exc}",
                 )
-    except Exception as exc:
-        if devel_debug:
-            raise
-        errors.extend = [
-            ValidationResult(
-                origin=ValidationOrigin(
-                    name="pynwb",
-                    version=pynwb.__version__,
-                ),
-                severity=Severity.ERROR,
-                id=f"pywnb.{error_output}",
-                scope=Scope.FILE,
-                path=Path(path),
-                message=f"{exc}",
-            )
-        ]
+            ]
+        )
 
     # To overcome
     #   https://github.com/NeurodataWithoutBorders/pynwb/issues/1090

From 5c1890c61701bf785cb8b46103d80e7062d65a8b Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 09:52:35 -0400
Subject: [PATCH 089/124] Expanded list comprehension to assert attribute
 presence

---
 dandi/pynwb_utils.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index 3398d4363..dd5532559 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -395,7 +395,15 @@ def validate(
                 v = None
             if v is not None and v < semantic_version.Version("2.1.0"):
                 errors_ = errors[:]
-                errors = [e for e in errors if not re_ok_prior_210.search(e.message)]
+                errors = []
+                for e in errors:
+                    try:
+                        assert e.message
+                        if not re_ok_prior_210.search(e.message):
+                            errors.append(e)
+                    except AssertionError:
+                        errors.append(e)
+                # errors = [e for e in errors if not re_ok_prior_210.search(e.message)]
                 # This is not an error, just logging about the process, hence logging:
                 if errors != errors_:
                     lgr.debug(

From e30809d619c1765602c7323661e3aad8ae7b71ae Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 10:00:08 -0400
Subject: [PATCH 090/124] No more typing errors

---
 dandi/files/bases.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 176937934..0c513b7bd 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -519,7 +519,7 @@ def get_validation_errors(
                     # `importance_threshold` currently used to filter DANDI-level CRITICAL
                     # evaluations as errors vs. validation results
                     severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance.name]
-                    kw = {}
+                    kw: Any = {}
                     if error.location:
                         kw["within_asset_paths"] = {
                             error.file_path: error.location,

From 87048c73b219105a45b703ad569e6f4acb533bb9 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 10:03:08 -0400
Subject: [PATCH 091/124] Removed unused imports

---
 dandi/validate.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dandi/validate.py b/dandi/validate.py
index 4430d4dc2..85b155a1c 100644
--- a/dandi/validate.py
+++ b/dandi/validate.py
@@ -2,7 +2,7 @@
 
 import os
 from pathlib import Path
-from typing import Iterator, List, Optional, Tuple, Union
+from typing import Iterator, Optional, Union
 
 import appdirs
 
@@ -163,5 +163,5 @@ def validate(
     """
     for df in find_dandi_files(*paths, dandiset_path=None, allow_all=allow_any_path):
         yield from df.get_validation_errors(
-                schema_version=schema_version, devel_debug=devel_debug
-                )
+            schema_version=schema_version, devel_debug=devel_debug
+        )

From d4b35b4b3050b62110a4a294889e72df929a2309 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 10:08:08 -0400
Subject: [PATCH 092/124] Fixed weird typing issues only appearing on GH

---
 dandi/validate_types.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dandi/validate_types.py b/dandi/validate_types.py
index 24d775204..dfd920e11 100644
--- a/dandi/validate_types.py
+++ b/dandi/validate_types.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Optional
+from typing import Dict, List, Optional
 
 
 @dataclass
@@ -31,10 +31,10 @@ class ValidationResult:
     # {"path": "task-broken_bold.json",
     #  "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json",
     #                  "sub-02/func/sub-02_task-broken_bold.json"]}
-    asset_paths: Optional[list[str]] = None
+    asset_paths: Optional[List[str]] = None
     # e.g. path within hdf5 file hierarchy
     # As a dict we will map asset_paths into location within them
-    within_asset_paths: Optional[dict[str, str]] = None
+    within_asset_paths: Optional[Dict[str, str]] = None
     dandiset_path: Optional[Path] = None
     dataset_path: Optional[Path] = None
     # TODO: locations analogous to nwbinspector.InspectorMessage.location

From acd78b774a236673814cadd73594f5ede1bb8ee4 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 10:15:49 -0400
Subject: [PATCH 093/124] Placeholder for undefined variable string

---
 dandi/pynwb_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index dd5532559..dddedef3e 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -363,7 +363,7 @@ def validate(
                         version=pynwb.__version__,
                     ),
                     severity=Severity.ERROR,
-                    id=f"pywnb.{error_output}",
+                    id="pywnb.GENERIC",
                     scope=Scope.FILE,
                     path=Path(path),
                     message=f"{exc}",

From cc2ad2610ec43ad4b3092ce31c2a0d7249ae2da6 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 11:33:01 -0400
Subject: [PATCH 094/124] Fixed assertion in test to match object output

---
 dandi/tests/test_files.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py
index 2a333aca8..86a40c44b 100644
--- a/dandi/tests/test_files.py
+++ b/dandi/tests/test_files.py
@@ -232,7 +232,7 @@ def test_validate_simple1(simple1_nwb):
 
 def test_validate_simple1_no_subject(simple1_nwb):
     errors = dandi_file(simple1_nwb).get_validation_errors()
-    assert errors == ["Subject is missing."]
+    assert [e.message for e in errors] == ["Subject is missing."]
 
 
 def test_validate_simple2(simple2_nwb):
@@ -251,7 +251,7 @@ def test_validate_simple2_new(simple2_nwb):
 
 def test_validate_simple3_no_subject_id(simple3_nwb):
     errors = dandi_file(simple3_nwb).get_validation_errors()
-    assert errors == ["subject_id is missing."]
+    assert [e.message for e in errors] == ["subject_id is missing."]
 
 
 def test_validate_bogus(tmp_path):

From b5e16b19204edc16c3f3246210929819a3c75571 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 13:58:44 -0400
Subject: [PATCH 095/124] Integrate OSError into object-based validation
 reporting

---
 dandi/files/bases.py      | 31 +++++++++++++++++++------------
 dandi/tests/test_files.py |  2 +-
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 0c513b7bd..5c5371627 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -543,7 +543,7 @@ def get_validation_errors(
                 if devel_debug:
                     raise
                 # TODO: might reraise instead of making it into an error
-                return _pydantic_errors_to_validation_results(e, str(self.filepath))
+                return _pydantic_errors_to_validation_results([e], str(self.filepath))
         return errors
 
 
@@ -729,16 +729,23 @@ def _pydantic_errors_to_validation_results(
     """Convert list of dict from pydantic into our custom object."""
     out = []
     for e in errors:
-        id = ":".join(
-            filter(
-                bool,
-                (
-                    "dandischema",
-                    e.get("type", "UNKNOWN"),
-                    "+".join(e.get("loc", [])),
-                ),
+        if isinstance(e, OSError):
+            id = "OSError"
+            message = e.__str__()
+            scope = Scope.FILE
+        else:
+            id = ":".join(
+                filter(
+                    bool,
+                    (
+                        "dandischema",
+                        e.get("type", "UNKNOWN"),
+                        "+".join(e.get("loc", [])),
+                    ),
+                )
             )
-        )
+            message = e.get("message", None)
+            scope = Scope.DANDISET
         out.append(
             ValidationResult(
                 origin=ValidationOrigin(
@@ -747,9 +754,9 @@ def _pydantic_errors_to_validation_results(
                 ),
                 severity=Severity.ERROR,
                 id=id,
-                scope=Scope.DANDISET,
+                scope=scope,
                 path=Path(file_path),
-                message=e.get("message", None),
+                message=message,
                 # TODO? dataset_path=dataset_path,
                 # TODO? dandiset_path=dandiset_path,
             )
diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py
index 86a40c44b..3d9859f83 100644
--- a/dandi/tests/test_files.py
+++ b/dandi/tests/test_files.py
@@ -263,7 +263,7 @@ def test_validate_bogus(tmp_path):
     errors = dandi_file(path).get_validation_errors()
     # ATM we would get 2 errors -- since could not be open in two places,
     # but that would be too rigid to test. Let's just see that we have expected errors
-    assert any(e.startswith("Failed to inspect NWBFile") for e in errors)
+    assert any(e.message.startswith("Unable to open file") for e in errors)
 
 
 def test_upload_zarr(new_dandiset, tmp_path):

From 9f49cc9882596c850d13b103221bf33e869e5cd6 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 15:00:39 -0400
Subject: [PATCH 096/124] Standardized dandi import

---
 dandi/files/bases.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 5c5371627..4a87fde11 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -26,7 +26,6 @@
 import requests
 
 import dandi
-from dandi import get_logger
 from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
 from dandi.metadata import get_default_metadata, nwb2asset
 from dandi.misctypes import DUMMY_DIGEST, Digest, P
@@ -35,7 +34,7 @@
 from dandi.utils import yaml_load
 from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult
 
-lgr = get_logger()
+lgr = dandi.get_logger()
 
 # TODO -- should come from schema.  This is just a simplistic example for now
 _required_dandiset_metadata_fields = ["identifier", "name", "description"]

From 382c3aef1885ebd5ffe27159e1db97e0d129cf9e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 16:25:10 -0400
Subject: [PATCH 097/124] General-purpose exception catching

---
 dandi/files/bases.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 4a87fde11..95c3637ea 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -728,9 +728,12 @@ def _pydantic_errors_to_validation_results(
     """Convert list of dict from pydantic into our custom object."""
     out = []
     for e in errors:
-        if isinstance(e, OSError):
-            id = "OSError"
-            message = e.__str__()
+        if isinstance(e, Exception):
+            if hasattr(e, "message"):
+                message = e.message
+            else:
+                message = str(e)
+            id = "exception"
             scope = Scope.FILE
         else:
             id = ":".join(

From 8d7148370991bce10885b6d278d421da229a5a9e Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 4 Oct 2022 16:27:09 -0400
Subject: [PATCH 098/124] More standardized error IDs

---
 dandi/files/bases.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 95c3637ea..b469df812 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -736,7 +736,7 @@ def _pydantic_errors_to_validation_results(
             id = "exception"
             scope = Scope.FILE
         else:
-            id = ":".join(
+            id = ".".join(
                 filter(
                     bool,
                     (

From 2d4e284247b0a17650f2f1805eada7fd8e53f17a Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 14:09:00 -0400
Subject: [PATCH 099/124] Typing fix for exception message

---
 dandi/files/bases.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index b469df812..1bfbcfe62 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -730,6 +730,7 @@ def _pydantic_errors_to_validation_results(
     for e in errors:
         if isinstance(e, Exception):
             if hasattr(e, "message"):
+                assert e.message
                 message = e.message
             else:
                 message = str(e)

From 2c8d034c628167283ae7ca2eb70150a6253402c4 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 14:54:27 -0400
Subject: [PATCH 100/124] Description/testing of command line validation
 `--grouping` option.

---
 dandi/cli/cmd_validate.py            |  2 +-
 dandi/cli/tests/test_cmd_validate.py | 21 +++++++++++++++++++++
 dandi/files/bases.py                 |  1 -
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index d1b3351ea..37dd54c99 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -24,7 +24,7 @@
 @click.option(
     "--grouping",
     "-g",
-    help="Write report under path, this option implies `--report/-r`.",
+    help="How to group error/warning reporting.",
 )
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @map_to_click_exceptions
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 095d21907..fed388e2d 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -22,3 +22,24 @@ def test_validate_bids_error(bids_error_examples, dataset):
     # Does it detect all errors?
     for key in expected_errors:
         assert key in r.output
+
+
+def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"):
+    """
+    This is currently a placeholder test, and should be updated once we have paths with
+    multiple errors.
+    """
+    import json
+
+    from ..cmd_validate import validate_bids
+
+    broken_dataset = os.path.join(bids_error_examples, dataset)
+    with open(os.path.join(broken_dataset, ".ERRORS.json")) as f:
+        expected_errors = json.load(f)
+    r = CliRunner().invoke(validate_bids, ["--grouping=path", broken_dataset])
+    # Does it break?
+    assert r.exit_code == 1
+
+    # Does it detect all errors?
+    for key in expected_errors:
+        assert key in r.output
diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 1bfbcfe62..b469df812 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -730,7 +730,6 @@ def _pydantic_errors_to_validation_results(
     for e in errors:
         if isinstance(e, Exception):
             if hasattr(e, "message"):
-                assert e.message
                 message = e.message
             else:
                 message = str(e)

From 80db60e052bce7ab3b78b5d6c9ccad78b4417428 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Wed, 5 Oct 2022 14:46:33 -0400
Subject: [PATCH 101/124] Workaround e.message typechecking dillema

---
 dandi/files/bases.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index b469df812..b5c5e627a 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -729,10 +729,7 @@ def _pydantic_errors_to_validation_results(
     out = []
     for e in errors:
         if isinstance(e, Exception):
-            if hasattr(e, "message"):
-                message = e.message
-            else:
-                message = str(e)
+            message = getattr(e, "message", str(e))
             id = "exception"
             scope = Scope.FILE
         else:

From af48872e1babb9d2cf84eff35756f125ee416072 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 16:19:15 -0400
Subject: [PATCH 102/124] Removed unneeded imports, added tests for broken nwb
 dataset

---
 dandi/cli/tests/test_cmd_validate.py | 51 +++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index fed388e2d..72b6e9633 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -1,3 +1,4 @@
+import json
 import os
 
 from click.testing import CliRunner
@@ -8,7 +9,6 @@
     "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"]
 )
 def test_validate_bids_error(bids_error_examples, dataset):
-    import json
 
     from ..cmd_validate import validate_bids
 
@@ -24,22 +24,55 @@ def test_validate_bids_error(bids_error_examples, dataset):
         assert key in r.output
 
 
-def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"):
+def test_validate_bids_error_grouping_nwb(
+    bids_error_examples, dataset="invalid_asl003"
+):
     """
     This is currently a placeholder test, and should be updated once we have paths with
     multiple errors.
     """
-    import json
 
     from ..cmd_validate import validate_bids
 
-    broken_dataset = os.path.join(bids_error_examples, dataset)
-    with open(os.path.join(broken_dataset, ".ERRORS.json")) as f:
-        expected_errors = json.load(f)
-    r = CliRunner().invoke(validate_bids, ["--grouping=path", broken_dataset])
+    dataset = os.path.join(bids_error_examples, dataset)
+    r = CliRunner().invoke(validate_bids, ["--grouping=path", dataset])
     # Does it break?
     assert r.exit_code == 1
 
     # Does it detect all errors?
-    for key in expected_errors:
-        assert key in r.output
+    assert dataset in r.output
+
+
+def test_validate_bids_error_grouping_bids(simple3_nwb):
+    """
+    This is currently a placeholder test, and should be updated once we have paths with
+    multiple errors.
+    """
+
+    from ..cmd_validate import validate_bids
+
+    r = CliRunner().invoke(validate_bids, ["--grouping=path", simple3_nwb])
+    # Does it break?
+    assert r.exit_code == 1
+
+    # Does it detect all errors?
+    assert simple3_nwb in r.output
+
+
+def test_validate_bids_error_grouping_notification(
+    bids_error_examples, dataset="invalid_asl003"
+):
+    """Test user notification for unimplemented parameter value."""
+
+    from ..cmd_validate import validate_bids
+
+    broken_dataset = os.path.join(bids_error_examples, dataset)
+    r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset])
+    # Does it break?
+    assert r.exit_code == 1
+
+    # Does it notify the user correctly?
+    notification_substring = (
+        "`grouping` parameter values currently supported are path or None"
+    )
+    assert notification_substring in r.output

From 15ce3c7bbe4d1e664d217522e2abb621087febb7 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 16:21:38 -0400
Subject: [PATCH 103/124] Removed deprecated comment

---
 dandi/files/bases.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index b5c5e627a..7732732ec 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -491,8 +491,6 @@ def get_validation_errors(
         errors: list[ValidationResult] = pynwb_validate(
             self.filepath, devel_debug=devel_debug
         )
-        # Q gh-943 do we want to manage this thing as an error?
-        # It should be a separate thing possibly via logging IMHO.
         if schema_version is not None:
             errors.extend(
                 super().get_validation_errors(

From 8c430f64f7e59d1e5e05e4d17371a5148bc9b4b4 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 16:51:27 -0400
Subject: [PATCH 104/124] Added click decorator for grouping parameter in nwb
 validator

---
 dandi/cli/cmd_validate.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 37dd54c99..fc400733a 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -106,6 +106,11 @@ def validate_bids(
     default=False,
     is_flag=True,
 )
+@click.option(
+    "--grouping",
+    "-g",
+    help="How to group error/warning reporting.",
+)
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @devel_debug_option()
 @map_to_click_exceptions

From 6348f644831239a7429e8e30b249477d30f538f0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 17:33:06 -0400
Subject: [PATCH 105/124] Standardized CLI NWB validator wrt BIDS validator
 draft and added tests

Still using 2 separate functions, merging of validation functions best
for subsequent PR
---
 dandi/cli/cmd_validate.py            | 70 ++++++++++++++--------------
 dandi/cli/tests/test_cmd_validate.py | 17 ++++---
 2 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index fc400733a..342d9eadb 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -129,6 +129,7 @@ def validate(
     """
     from ..pynwb_utils import ignore_benign_pynwb_warnings
     from ..validate import validate as validate_
+    from ..validate_types import Severity
 
     # Don't log validation warnings, as this command reports them to the user
     # anyway:
@@ -144,51 +145,50 @@ def validate(
     # way to get relevant warnings (not errors) from PyNWB
     ignore_benign_pynwb_warnings()
 
-    all_files_error_messages = {}
-    nfiles = 0
-    for path, messages in validate_(
+    validator_result = validate_(
         *paths,
         schema_version=schema,
         devel_debug=devel_debug,
         allow_any_path=allow_any_path,
-    ):
-        nfiles += 1
-        if not grouping:
-            error_paths = [path] * len(messages)
-            errors = ["NWBError"] * len(messages)
-            severities = [""] * len(messages)
-            display_errors(error_paths, errors, severities, messages)
-        elif grouping == "path":
-            error_paths = [path]
-            errors = ["NWBError"] * len(messages)
-            severities = [""] * len(messages)
-            display_errors(error_paths, errors, severities, messages)
+    )
+    issues = []
+    for i in validator_result:
+        if not i.severity:
+            continue
         else:
-            raise NotImplementedError(
-                "The `grouping` parameter values currently supported are "
-                "path or None."
-            )
-
-        all_files_error_messages[path] = messages
-
-    files_with_errors = [f for f, errors in all_files_error_messages.items() if errors]
+            issues.append(i)
 
-    if files_with_errors:
-        click.secho(
-            "Summary: Validation errors in {} out of {} files".format(
-                len(files_with_errors), nfiles
-            ),
-            bold=True,
-            fg="red",
+    purviews = [
+        list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
+    ]
+    if not grouping:
+        display_errors(
+            purviews,
+            [i.id for i in issues],
+            [i.severity for i in issues],
+            [i.message for i in issues],
         )
-        raise SystemExit(1)
+    elif grouping == "path":
+        for purview in purviews:
+            applies_to = [
+                i for i in issues if purview in [i.path, i.path_regex, i.dataset_path]
+            ]
+            display_errors(
+                [purview],
+                [i.id for i in applies_to],
+                [i.severity for i in applies_to],
+                [i.message for i in applies_to],
+            )
     else:
-        click.secho(
-            "Summary: No validation errors among {} file(s)".format(nfiles),
-            bold=True,
-            fg="green",
+        raise NotImplementedError(
+            "The `grouping` parameter values currently supported are " "path or None."
         )
 
+    validation_errors = [i for i in issues if i.severity == Severity.ERROR]
+
+    if validation_errors:
+        raise SystemExit(1)
+
 
 def _get_severity_color(severities):
     from ..validate_types import Severity
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 72b6e9633..d6c9b91ca 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -24,9 +24,7 @@ def test_validate_bids_error(bids_error_examples, dataset):
         assert key in r.output
 
 
-def test_validate_bids_error_grouping_nwb(
-    bids_error_examples, dataset="invalid_asl003"
-):
+def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"):
     """
     This is currently a placeholder test, and should be updated once we have paths with
     multiple errors.
@@ -43,20 +41,21 @@ def test_validate_bids_error_grouping_nwb(
     assert dataset in r.output
 
 
-def test_validate_bids_error_grouping_bids(simple3_nwb):
+def test_validate_nwb_grouping_severity(simple3_nwb):
     """
     This is currently a placeholder test, and should be updated once we have paths with
     multiple errors.
     """
 
-    from ..cmd_validate import validate_bids
+    from ..cmd_validate import validate
 
-    r = CliRunner().invoke(validate_bids, ["--grouping=path", simple3_nwb])
-    # Does it break?
-    assert r.exit_code == 1
+    r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb])
+    # Does it pass?
+    assert r.exit_code == 0
 
-    # Does it detect all errors?
+    # Does it give required warnings for required path?
     assert simple3_nwb in r.output
+    assert "NWBI.check_subject_id_exists" in r.output
 
 
 def test_validate_bids_error_grouping_notification(

From ed5f3534846bf87e0ee6ee9952c1adcd98ea4119 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 5 Oct 2022 17:48:08 -0400
Subject: [PATCH 106/124] Reporting lack of errors explicitly in CLI validation

---
 dandi/cli/cmd_validate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 342d9eadb..99047245f 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -96,6 +96,8 @@ def validate_bids(
 
     if validation_errors:
         raise SystemExit(1)
+    else:
+        click.secho("No errors found.", fg="green")
 
 
 @click.command()
@@ -188,6 +190,8 @@ def validate(
 
     if validation_errors:
         raise SystemExit(1)
+    else:
+        click.secho("No errors found.", fg="green")
 
 
 def _get_severity_color(severities):

From 65ae2c98fe99b68091d9c550b6894dd3fcadca64 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 10 Oct 2022 13:59:35 -0400
Subject: [PATCH 107/124] Simplified git command execution

---
 dandi/tests/fixtures.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index bc000ba64..448b152ba 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -245,7 +245,7 @@ def fixture(
         skipif.no_git()
         path = tmp_path_factory.mktemp("gitrepo")
         lgr.debug("Cloning %r into %r", url, path)
-        runout = run(
+        run(
             [
                 "git",
                 "clone",
@@ -255,10 +255,8 @@ def fixture(
                 url,
                 str(path),
             ],
-            capture_output=True,
+            check=True,
         )
-        if runout.returncode:
-            raise RuntimeError(f"Failed to clone {url} into {path}")
         # cwd specification is VERY important, not only to achieve the correct
         # effects, but also to avoid dropping files from your repository if you
         # were to run `git sparse-checkout` inside the software repo.

From b4d5dcdbeeb290d4a27bfc55c4ec9e88246da8b0 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 11:54:14 -0400
Subject: [PATCH 108/124] Using click option specification

---
 dandi/cli/cmd_validate.py            | 6 ++++--
 dandi/cli/tests/test_cmd_validate.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 99047245f..df8ada8c0 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -25,6 +25,8 @@
     "--grouping",
     "-g",
     help="How to group error/warning reporting.",
+    type=click.Choice(['none', 'path'], case_sensitive=False),
+    default="none",
 )
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @map_to_click_exceptions
@@ -33,7 +35,7 @@ def validate_bids(
     schema,
     report,
     report_path,
-    grouping=None,
+    grouping="none",
 ):
     """Validate BIDS paths.
 
@@ -69,7 +71,7 @@ def validate_bids(
     purviews = [
         list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
     ]
-    if not grouping:
+    if grouping == "none":
         display_errors(
             purviews,
             [i.id for i in issues],
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index d6c9b91ca..b96c86559 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -68,10 +68,10 @@ def test_validate_bids_error_grouping_notification(
     broken_dataset = os.path.join(bids_error_examples, dataset)
     r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset])
     # Does it break?
-    assert r.exit_code == 1
+    assert r.exit_code == 2
 
     # Does it notify the user correctly?
     notification_substring = (
-        "`grouping` parameter values currently supported are path or None"
+        "Invalid value for '--grouping'"
     )
     assert notification_substring in r.output

From 624e3b0ab8417eb01d3031d30dc43584a95aba26 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 11:56:48 -0400
Subject: [PATCH 109/124] List comprehension for more compact code.

---
 dandi/cli/cmd_validate.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index df8ada8c0..ff901b318 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -61,12 +61,7 @@ def validate_bids(
         schema_version=schema,
     )
 
-    issues = []
-    for i in validator_result:
-        if not i.severity:
-            continue
-        else:
-            issues.append(i)
+    issues = [i for i in validator_result if i.severity]
 
     purviews = [
         list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
@@ -155,12 +150,7 @@ def validate(
         devel_debug=devel_debug,
         allow_any_path=allow_any_path,
     )
-    issues = []
-    for i in validator_result:
-        if not i.severity:
-            continue
-        else:
-            issues.append(i)
+    issues = [i for i in validator_result if i.severity]
 
     purviews = [
         list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues

From 41df99d9d862543a894eafff6fee1a0e0ca52dcd Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 11:58:03 -0400
Subject: [PATCH 110/124] Removed superfluous list bracketing

---
 dandi/cli/cmd_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index ff901b318..230f8201c 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -217,7 +217,7 @@ def display_errors(purviews, errors, severities, messages):
     by assert if this won't ever be the case.
     """
 
-    if all([len(i) == 1 for i in [purviews, errors, severities, messages]]):
+    if all(len(i) == 1 for i in [purviews, errors, severities, messages]):
         fg = _get_severity_color(severities)
         error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}"
         click.secho(error_message, fg=fg)

From 09e526b5a23206d6f061b0de13ddf0f2f895502d Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 12:06:00 -0400
Subject: [PATCH 111/124] clarified comment, removed commented code

---
 dandi/pynwb_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index dddedef3e..c2fbf0771 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -387,7 +387,7 @@ def validate(
         if version is not None:
             # Explicitly sanitize so we collect warnings.
             # TODO: later cast into proper ERRORs
-            # Do we really need to do these acrobatics? string comparison works fine...
+            # Do we really need this custom internal function? string comparison works fine..
             version = _sanitize_nwb_version(version, log=errors.append)
             try:
                 v = semantic_version.Version(version)
@@ -403,7 +403,6 @@ def validate(
                             errors.append(e)
                     except AssertionError:
                         errors.append(e)
-                # errors = [e for e in errors if not re_ok_prior_210.search(e.message)]
                 # This is not an error, just logging about the process, hence logging:
                 if errors != errors_:
                     lgr.debug(

From e38577b0844774d7b69c1b0e16d6480f5f001908 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 13:27:37 -0400
Subject: [PATCH 112/124] Extracted validation variant common code into
 internal function

and renamed tests for more consistency
---
 dandi/cli/cmd_validate.py            | 45 +++++-----------------------
 dandi/cli/tests/test_cmd_validate.py |  5 ++--
 2 files changed, 11 insertions(+), 39 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 230f8201c..11fe3ce21 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -52,7 +52,6 @@ def validate_bids(
     """
 
     from ..validate import validate_bids as validate_bids_
-    from ..validate_types import Severity
 
     validator_result = validate_bids_(  # Controller
         *paths,
@@ -61,40 +60,7 @@ def validate_bids(
         schema_version=schema,
     )
 
-    issues = [i for i in validator_result if i.severity]
-
-    purviews = [
-        list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
-    ]
-    if grouping == "none":
-        display_errors(
-            purviews,
-            [i.id for i in issues],
-            [i.severity for i in issues],
-            [i.message for i in issues],
-        )
-    elif grouping == "path":
-        for purview in purviews:
-            applies_to = [
-                i for i in issues if purview in [i.path, i.path_regex, i.dataset_path]
-            ]
-            display_errors(
-                [purview],
-                [i.id for i in applies_to],
-                [i.severity for i in applies_to],
-                [i.message for i in applies_to],
-            )
-    else:
-        raise NotImplementedError(
-            "The `grouping` parameter values currently supported are " "path or None."
-        )
-
-    validation_errors = [i for i in issues if i.severity == Severity.ERROR]
-
-    if validation_errors:
-        raise SystemExit(1)
-    else:
-        click.secho("No errors found.", fg="green")
+    _process_issues(validator_result, grouping)
 
 
 @click.command()
@@ -128,7 +94,6 @@ def validate(
     """
     from ..pynwb_utils import ignore_benign_pynwb_warnings
     from ..validate import validate as validate_
-    from ..validate_types import Severity
 
     # Don't log validation warnings, as this command reports them to the user
     # anyway:
@@ -150,12 +115,18 @@ def validate(
         devel_debug=devel_debug,
         allow_any_path=allow_any_path,
     )
+
+    _process_issues(validator_result, grouping)
+
+
+def _process_issues(validator_result, grouping):
+    from ..validate_types import Severity
     issues = [i for i in validator_result if i.severity]
 
     purviews = [
         list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
     ]
-    if not grouping:
+    if grouping == "none":
         display_errors(
             purviews,
             [i.id for i in issues],
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index b96c86559..d832cadab 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -24,7 +24,7 @@ def test_validate_bids_error(bids_error_examples, dataset):
         assert key in r.output
 
 
-def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl003"):
+def test_validate_bids_grouping_error(bids_error_examples, dataset="invalid_asl003"):
     """
     This is currently a placeholder test, and should be updated once we have paths with
     multiple errors.
@@ -41,7 +41,7 @@ def test_validate_bids_error_grouping(bids_error_examples, dataset="invalid_asl0
     assert dataset in r.output
 
 
-def test_validate_nwb_grouping_severity(simple3_nwb):
+def test_validate_nwb_path_grouping(simple3_nwb):
     """
     This is currently a placeholder test, and should be updated once we have paths with
     multiple errors.
@@ -51,6 +51,7 @@ def test_validate_nwb_grouping_severity(simple3_nwb):
 
     r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb])
     # Does it pass?
+    print(r.output)
     assert r.exit_code == 0
 
     # Does it give required warnings for required path?

From 456fafd1444d5c0bb48090df8c4ff537443299a2 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 13:30:35 -0400
Subject: [PATCH 113/124] Importing outside of test functions

---
 dandi/cli/tests/test_cmd_validate.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index d832cadab..37be9ac25 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -4,14 +4,13 @@
 from click.testing import CliRunner
 import pytest
 
+from ..cmd_validate import validate, validate_bids
 
 @pytest.mark.parametrize(
     "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"]
 )
 def test_validate_bids_error(bids_error_examples, dataset):
 
-    from ..cmd_validate import validate_bids
-
     broken_dataset = os.path.join(bids_error_examples, dataset)
     with open(os.path.join(broken_dataset, ".ERRORS.json")) as f:
         expected_errors = json.load(f)
@@ -30,8 +29,6 @@ def test_validate_bids_grouping_error(bids_error_examples, dataset="invalid_asl0
     multiple errors.
     """
 
-    from ..cmd_validate import validate_bids
-
     dataset = os.path.join(bids_error_examples, dataset)
     r = CliRunner().invoke(validate_bids, ["--grouping=path", dataset])
     # Does it break?
@@ -47,8 +44,6 @@ def test_validate_nwb_path_grouping(simple3_nwb):
     multiple errors.
     """
 
-    from ..cmd_validate import validate
-
     r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb])
     # Does it pass?
     print(r.output)
@@ -64,8 +59,6 @@ def test_validate_bids_error_grouping_notification(
 ):
     """Test user notification for unimplemented parameter value."""
 
-    from ..cmd_validate import validate_bids
-
     broken_dataset = os.path.join(bids_error_examples, dataset)
     r = CliRunner().invoke(validate_bids, ["--grouping=error", broken_dataset])
     # Does it break?

From 857d7b8f9026f24894d35c8b874fb2f28c688819 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Tue, 11 Oct 2022 13:44:23 -0400
Subject: [PATCH 114/124] Updated grouping for vanilla validator

---
 dandi/cli/cmd_validate.py | 4 +++-
 dandi/pynwb_utils.py      | 9 +--------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 11fe3ce21..f60ac3d2f 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -75,12 +75,14 @@ def validate_bids(
     "--grouping",
     "-g",
     help="How to group error/warning reporting.",
+    type=click.Choice(['none', 'path'], case_sensitive=False),
+    default="none",
 )
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @devel_debug_option()
 @map_to_click_exceptions
 def validate(
-    paths, schema=None, devel_debug=False, allow_any_path=False, grouping=None
+    paths, schema=None, devel_debug=False, allow_any_path=False, grouping="none",
 ):
     """Validate files for NWB and DANDI compliance.
 
diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index c2fbf0771..5ed0e90fd 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -395,14 +395,7 @@ def validate(
                 v = None
             if v is not None and v < semantic_version.Version("2.1.0"):
                 errors_ = errors[:]
-                errors = []
-                for e in errors:
-                    try:
-                        assert e.message
-                        if not re_ok_prior_210.search(e.message):
-                            errors.append(e)
-                    except AssertionError:
-                        errors.append(e)
+                errors = [e for e in errors if not re_ok_prior_210.search(cast(str, getattr(e, "message", "")))]
                 # This is not an error, just logging about the process, hence logging:
                 if errors != errors_:
                     lgr.debug(

From 56973d3df7d330c1ac5be7b47a4a8d5fcaff9d3f Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 17 Oct 2022 08:18:17 -0400
Subject: [PATCH 115/124] Linting fixes

---
 dandi/cli/tests/test_cmd_validate.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 37be9ac25..ac9a2fe60 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -6,6 +6,7 @@
 
 from ..cmd_validate import validate, validate_bids
 
+
 @pytest.mark.parametrize(
     "dataset", ["invalid_asl003", "invalid_eeg_cbm", "invalid_pet001"]
 )
@@ -65,7 +66,5 @@ def test_validate_bids_error_grouping_notification(
     assert r.exit_code == 2
 
     # Does it notify the user correctly?
-    notification_substring = (
-        "Invalid value for '--grouping'"
-    )
+    notification_substring = "Invalid value for '--grouping'"
     assert notification_substring in r.output

From 4eb0c703d93c77fb481087b14fe784dd00440094 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 17 Oct 2022 08:36:01 -0400
Subject: [PATCH 116/124] Importing cast function from typing

---
 dandi/pynwb_utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py
index 5ed0e90fd..f4d809664 100644
--- a/dandi/pynwb_utils.py
+++ b/dandi/pynwb_utils.py
@@ -3,7 +3,7 @@
 import os.path as op
 from pathlib import Path
 import re
-from typing import Any, Dict, List, Tuple, TypeVar, Union
+from typing import Any, Dict, List, Tuple, TypeVar, Union, cast
 import warnings
 
 import dandischema
@@ -395,7 +395,11 @@ def validate(
                 v = None
             if v is not None and v < semantic_version.Version("2.1.0"):
                 errors_ = errors[:]
-                errors = [e for e in errors if not re_ok_prior_210.search(cast(str, getattr(e, "message", "")))]
+                errors = [
+                    e
+                    for e in errors
+                    if not re_ok_prior_210.search(cast(str, getattr(e, "message", "")))
+                ]
                 # This is not an error, just logging about the process, hence logging:
                 if errors != errors_:
                     lgr.debug(

From 404968b8a4fda0c9d182f9f5c1df750a6d1b8a23 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 17 Oct 2022 08:53:38 -0400
Subject: [PATCH 117/124] Type annotations

---
 dandi/cli/cmd_validate.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index f60ac3d2f..0c264b9af 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -25,7 +25,7 @@
     "--grouping",
     "-g",
     help="How to group error/warning reporting.",
-    type=click.Choice(['none', 'path'], case_sensitive=False),
+    type=click.Choice(["none", "path"], case_sensitive=False),
     default="none",
 )
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
@@ -75,14 +75,18 @@ def validate_bids(
     "--grouping",
     "-g",
     help="How to group error/warning reporting.",
-    type=click.Choice(['none', 'path'], case_sensitive=False),
+    type=click.Choice(["none", "path"], case_sensitive=False),
     default="none",
 )
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @devel_debug_option()
 @map_to_click_exceptions
 def validate(
-    paths, schema=None, devel_debug=False, allow_any_path=False, grouping="none",
+    paths,
+    schema=None,
+    devel_debug=False,
+    allow_any_path=False,
+    grouping="none",
 ):
     """Validate files for NWB and DANDI compliance.
 
@@ -123,6 +127,7 @@ def validate(
 
 def _process_issues(validator_result, grouping):
     from ..validate_types import Severity
+
     issues = [i for i in validator_result if i.severity]
 
     purviews = [
@@ -170,7 +175,12 @@ def _get_severity_color(severities):
         return "blue"
 
 
-def display_errors(purviews, errors, severities, messages):
+def display_errors(
+    purviews: list[str],
+    errors: list[str],
+    severities: list[str],
+    messages: list[str],
+) -> None:
     """
     Unified error display for validation CLI, which auto-resolves grouping logic based on
     the length of input lists.

From 6ac5b999016611c42a4c489a8c72f2163fcdd91c Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Mon, 17 Oct 2022 08:59:08 -0400
Subject: [PATCH 118/124] Fixed typing

---
 dandi/cli/cmd_validate.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 0c264b9af..cb209b2cc 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from typing import List
 
 import click
 
@@ -176,10 +177,10 @@ def _get_severity_color(severities):
 
 
 def display_errors(
-    purviews: list[str],
-    errors: list[str],
-    severities: list[str],
-    messages: list[str],
+    purviews: List[str],
+    errors: List[str],
+    severities: List[str],
+    messages: List[str],
 ) -> None:
     """
     Unified error display for validation CLI, which auto-resolves grouping logic based on

From abc8e8ded048ad0d72c9c134486b1a718dbd79a1 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 19 Oct 2022 14:38:20 -0400
Subject: [PATCH 119/124] Removed superfluous print call

---
 dandi/cli/tests/test_cmd_validate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index ac9a2fe60..4298a8bc8 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -47,7 +47,6 @@ def test_validate_nwb_path_grouping(simple3_nwb):
 
     r = CliRunner().invoke(validate, ["--grouping=path", simple3_nwb])
     # Does it pass?
-    print(r.output)
     assert r.exit_code == 0
 
     # Does it give required warnings for required path?

From f80b5b498592c1852597d2c63985217ab0290d21 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 19 Oct 2022 15:41:04 -0400
Subject: [PATCH 120/124] Fixes proposed by Yaro

---
 dandi/cli/cmd_validate.py | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index cb209b2cc..08fe14638 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -1,11 +1,12 @@
 import logging
 import os
-from typing import List
+from typing import List, cast
 
 import click
 
 from .base import devel_debug_option, devel_option, map_to_click_exceptions
 from ..utils import pluralize
+from ..validate_types import Severity
 
 
 @click.command()
@@ -40,12 +41,6 @@ def validate_bids(
 ):
     """Validate BIDS paths.
 
-    Parameters
-    ----------
-
-    grouping : str, optional
-        A string which is either "", "error", or "path" — "error" implemented.
-
     Notes
     -----
     Used from bash, eg:
@@ -127,7 +122,6 @@ def validate(
 
 
 def _process_issues(validator_result, grouping):
-    from ..validate_types import Severity
 
     issues = [i for i in validator_result if i.severity]
 
@@ -166,7 +160,6 @@ def _process_issues(validator_result, grouping):
 
 
 def _get_severity_color(severities):
-    from ..validate_types import Severity
 
     if Severity.ERROR in severities:
         return "red"
@@ -179,20 +172,13 @@ def _get_severity_color(severities):
 def display_errors(
     purviews: List[str],
     errors: List[str],
-    severities: List[str],
+    severities: List[Severity],
     messages: List[str],
 ) -> None:
     """
     Unified error display for validation CLI, which auto-resolves grouping logic based on
     the length of input lists.
 
-    Parameters
-    ----------
-    purviews: list of str
-    errors: list of str
-    severities: list of dandi.validate_types.Severity
-    messages: list of str
-
 
     Notes
     -----
@@ -201,7 +187,7 @@ def display_errors(
     by assert if this won't ever be the case.
     """
 
-    if all(len(i) == 1 for i in [purviews, errors, severities, messages]):
+    if all(len(cast(list, i)) == 1 for i in [purviews, errors, severities, messages]):
         fg = _get_severity_color(severities)
         error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}"
         click.secho(error_message, fg=fg)

From 011b45db992930d9cd13581c2c276ae83660188f Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 19 Oct 2022 17:33:49 -0400
Subject: [PATCH 121/124] Not duplicating code for error subclass

---
 dandi/files/bases.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/dandi/files/bases.py b/dandi/files/bases.py
index 7732732ec..94138c887 100644
--- a/dandi/files/bases.py
+++ b/dandi/files/bases.py
@@ -123,10 +123,6 @@ def get_validation_errors(
                 )
             try:
                 DandisetMeta(**meta)
-            except ValidationError as e:
-                if devel_debug:
-                    raise
-                return _pydantic_errors_to_validation_results(e, str(self.filepath))
             except Exception as e:
                 if devel_debug:
                     raise

From 76ddf068d6c3b3aa01ddfd83969a099d712c7177 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Wed, 19 Oct 2022 19:59:38 -0400
Subject: [PATCH 122/124] Orthography

---
 dandi/cli/cmd_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 08fe14638..bd050e469 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -11,7 +11,7 @@
 
 @click.command()
 @click.option(
-    "--schema", help="Validate against new BIDS schema version", metavar="VERSION"
+    "--schema", help="Validate against new BIDS schema version.", metavar="VERSION"
 )
 @click.option(
     "--report-path",

From 2c7dd6abafb1c5404e2cfc8a2edeb1da3fa1684f Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 27 Oct 2022 02:13:51 -0400
Subject: [PATCH 123/124] We need newer bidsschematools

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 270525efb..7b00e6d8b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ project_urls =
 python_requires = >=3.7
 install_requires =
     appdirs
-    bidsschematools ~= 0.4.0
+    bidsschematools >= 0.5.0
     click
     click-didyoumean
     dandischema ~= 0.7.0

From 620510922ee6762aaad3d3a38981d6d59f0a9680 Mon Sep 17 00:00:00 2001
From: Horea Christian <chr@chymera.eu>
Date: Thu, 27 Oct 2022 16:33:42 -0400
Subject: [PATCH 124/124] Type fix

---
 dandi/tests/fixtures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py
index 448b152ba..ef92d5266 100644
--- a/dandi/tests/fixtures.py
+++ b/dandi/tests/fixtures.py
@@ -262,7 +262,7 @@ def fixture(
         # were to run `git sparse-checkout` inside the software repo.
         run(["git", "sparse-checkout", "init", "--cone"], cwd=path, check=True)
         run(["git", "sparse-checkout", "set"] + whitelist, cwd=path, check=True)
-        yield path
+        yield str(path)
 
     return fixture