Merge pull request #2372 from AllenInstitute/vbn_nwb_writer

Vbn nwb writer
AllenInstitute · Apr 22, 2022 · cb59b50 · cb59b50
2 parents 1691921 + 1092def
commit cb59b50
Show file tree

Hide file tree

Showing 15 changed files with 388 additions and 51 deletions.
diff --git a/allensdk/brain_observatory/behavior/behavior_session.py b/allensdk/brain_observatory/behavior/behavior_session.py
@@ -1373,7 +1373,7 @@ def _get_metadata(self, behavior_metadata: BehaviorMetadata) -> dict:
         }
 
     def _get_identifier(self) -> str:
-        return str(self._behavior_session_id)
+        return str(self._behavior_session_id.value)
 
     def _get_session_type(self) -> str:
         return self._metadata.session_type

diff --git a/allensdk/brain_observatory/behavior/data_objects/eye_tracking/eye_tracking_table.py b/allensdk/brain_observatory/behavior/data_objects/eye_tracking/eye_tracking_table.py
@@ -207,7 +207,7 @@ def from_data_file(cls, data_file: EyeTrackingFile,
         drop_frames : List[int], optional
             List of frame indices to be dropped from the table.
             If provided, will drop the corresponding frame frame times read
-            from the sync file to syncronize frame times and frames.
+            from the sync file to synchronize frame times and frames.
         z_threshold : float, optional
             See EyeTracking.from_lims
         dilation_frames : int, optional

diff --git a/allensdk/brain_observatory/behavior/write_behavior_nwb/__main__.py b/allensdk/brain_observatory/behavior/write_behavior_nwb/__main__.py
@@ -7,8 +7,8 @@
 
 from allensdk.brain_observatory.behavior.behavior_session import (
     BehaviorSession)
-from allensdk.brain_observatory.behavior.write_behavior_nwb._schemas import (
-    InputSchema, OutputSchema)
+from allensdk.brain_observatory.behavior.write_behavior_nwb.schemas import (
+    BehaviorInputSchema, OutputSchema)
 from allensdk.brain_observatory.argschema_utilities import (
     write_or_print_outputs)
 from allensdk.brain_observatory.session_api_utils import sessions_are_equal
@@ -71,7 +71,7 @@ def main():
     try:
         parser = argschema.ArgSchemaParser(
             args=args,
-            schema_type=InputSchema,
+            schema_type=BehaviorInputSchema,
             output_schema_type=OutputSchema,
         )
         logging.info('Input successfully parsed')

diff --git a/...y/behavior/write_behavior_nwb/_schemas.py → ...ry/behavior/write_behavior_nwb/schemas.py b/...y/behavior/write_behavior_nwb/_schemas.py → ...ry/behavior/write_behavior_nwb/schemas.py
@@ -1,13 +1,14 @@
+import argschema.fields
 from argschema import ArgSchema
-from argschema.fields import (LogLevel, String, Int, Nested, List, Float)
+from argschema.fields import (LogLevel, String, Int, Nested, List)
 import marshmallow as mm
 import pandas as pd
 
 from allensdk.brain_observatory.argschema_utilities import (
-    check_read_access, check_write_access_overwrite, RaisingSchema)
+    check_write_access_overwrite, RaisingSchema)
 
 
-class BehaviorSessionData(RaisingSchema):
+class BaseBehaviorSessionDataSchema(RaisingSchema):
     behavior_session_id = Int(required=True,
                               description=("Unique identifier for the "
                                            "behavior session to write into "
@@ -17,9 +18,11 @@ class BehaviorSessionData(RaisingSchema):
                                       "session"))
     driver_line = List(String,
                        required=True,
+                       cli_as_single_argument=True,
                        description='Genetic driver line(s) of subject')
     reporter_line = List(String,
                          required=True,
+                         cli_as_single_argument=True,
                          description='Genetic reporter line(s) of subject')
     full_genotype = String(required=True,
                            description='Full genotype of subject')
@@ -32,18 +35,19 @@ class BehaviorSessionData(RaisingSchema):
                                               "format"))
     external_specimen_name = Int(required=True,
                                  description='LabTracks ID of the subject')
-    behavior_stimulus_file = String(required=True,
-                                    validate=check_read_access,
-                                    description=("Path of behavior_stimulus "
-                                                 "camstim *.pkl file"))
+    behavior_stimulus_file = argschema.fields.InputFile(
+        required=True,
+        description=("Path of behavior_stimulus "
+                     "camstim *.pkl file"))
     date_of_birth = String(required=True, description="Subject date of birth")
     sex = String(required=True, description="Subject sex")
     age = String(required=True, description="Subject age")
+
+
+class BehaviorSessionData(BaseBehaviorSessionDataSchema):
     stimulus_name = String(required=True,
                            description=("Name of stimulus presented during "
                                         "behavior session"))
-    monitor_delay = Float(required=False,
-                          description=("Value of delay to adjust timestamps"))
 
     @mm.pre_load
     def set_stimulus_name(self, data, **kwargs):
@@ -63,7 +67,7 @@ def set_stimulus_name(self, data, **kwargs):
         return data
 
 
-class InputSchema(ArgSchema):
+class BehaviorInputSchema(ArgSchema):
     class Meta:
         unknown = mm.RAISE
     log_level = LogLevel(default='INFO',
@@ -77,7 +81,7 @@ class Meta:
 
 
 class OutputSchema(RaisingSchema):
-    input_parameters = Nested(InputSchema)
+    input_parameters = Nested(BehaviorInputSchema)
     output_path = String(required=True,
                          validate=check_write_access_overwrite,
                          description='Path of output.json to be written')
diff --git a/allensdk/brain_observatory/ecephys/behavior_ecephys_session.py b/allensdk/brain_observatory/ecephys/behavior_ecephys_session.py
@@ -287,7 +287,8 @@ def from_json(
             cls,
             session_data: dict,
             stimulus_presentation_exclude_columns: Optional[List[str]] = None,
-            running_speed_load_from_multiple_stimulus_files: bool = True
+            running_speed_load_from_multiple_stimulus_files: bool = True,
+            skip_probes: Optional[List[str]] = None
     ) -> "BehaviorEcephysSession":
         """
 
@@ -296,8 +297,11 @@ def from_json(
         session_data: Dict of input data necessary to construct a session
         stimulus_presentation_exclude_columns:  Optional list of columns to
             exclude from stimulus presentations table
-        Whether to load running speed from multiple stimulus files
+        running_speed_load_from_multiple_stimulus_files:
+            Whether to load running speed from multiple stimulus files
             If False, will just load from a single behavior stimulus file
+        skip_probes: Names of probes to exclude (due to known bad data
+            for example)
 
         Returns
         -------
@@ -315,7 +319,8 @@ def from_json(
             running_speed_load_from_multiple_stimulus_files=(
                 running_speed_load_from_multiple_stimulus_files)
         )
-        probes = Probes.from_json(probes=session_data['probes'])
+        probes = Probes.from_json(probes=session_data['probes'],
+                                  skip_probes=skip_probes)
         optotagging_table = OptotaggingTable.from_json(dict_repr=session_data)
 
         return BehaviorEcephysSession(

diff --git a/allensdk/brain_observatory/ecephys/probes.py b/allensdk/brain_observatory/ecephys/probes.py
@@ -1,3 +1,4 @@
+import logging
 from typing import List, Dict, Any, Optional
 
 import numpy as np
@@ -16,7 +17,8 @@ class Probes(DataObject, JsonReadableInterface, NwbReadableInterface,
              NwbWritableInterface):
     """Probes"""
 
-    def __init__(self, probes: List[Probe]):
+    def __init__(self,
+                 probes: List[Probe]):
         """
 
         Parameters
@@ -136,7 +138,32 @@ def get_units_table(
         return units_table
 
     @classmethod
-    def from_json(cls, probes: List[Dict[str, Any]]) -> "Probes":
+    def from_json(
+            cls,
+            probes: List[Dict[str, Any]],
+            skip_probes: Optional[List[str]] = None
+    ) -> "Probes":
+        """
+
+        Parameters
+        ----------
+        probes
+        skip_probes: Names of probes to exclude (due to known bad data
+            for example)
+        Returns
+        -------
+        `Probes` instance
+        """
+        skip_probes = skip_probes if skip_probes is not None else []
+        invalid_skip_probes = set(skip_probes).difference(
+            [p['name'] for p in probes])
+        if invalid_skip_probes:
+            raise ValueError(
+                f'You passed invalid probes to skip: {invalid_skip_probes} '
+                f'are not valid probe names')
+        for probe in skip_probes:
+            logging.info(f'Skipping {probe}')
+        probes = [p for p in probes if p['name'] not in skip_probes]
         probes = sorted(probes, key=lambda probe: probe['name'])
         probes = [Probe.from_json(probe=probe) for probe in probes]
         return Probes(probes=probes)
@@ -189,3 +216,7 @@ def to_nwb(self, nwbfile: NWBFile) -> NWBFile:
         )
 
         return nwbfile
+
+    def __iter__(self):
+        for p in self.probes:
+            yield p
diff --git a/allensdk/brain_observatory/ecephys/write_nwb/__main__.py b/allensdk/brain_observatory/ecephys/write_nwb/__main__.py
@@ -1,3 +1,4 @@
+"""Module for writing NWB files for the VCN project"""
 import logging
 import sys
 from typing import Any, Dict, List, Tuple
@@ -21,7 +22,7 @@
 from allensdk.brain_observatory.ecephys.probes import Probes
 from allensdk.config.manifest import Manifest
 
-from ._schemas import InputSchema, OutputSchema
+from .schemas import VCNInputSchema, OutputSchema
 from allensdk.brain_observatory.nwb import (
     add_stimulus_timestamps,
     add_invalid_times,
@@ -623,7 +624,7 @@ def main():
 
     parser = optional_lims_inputs(
         sys.argv,
-        InputSchema,
+        VCNInputSchema,
         OutputSchema,
         get_inputs_from_lims
     )

diff --git a/...observatory/ecephys/write_nwb/_schemas.py → ..._observatory/ecephys/write_nwb/schemas.py b/...observatory/ecephys/write_nwb/_schemas.py → ..._observatory/ecephys/write_nwb/schemas.py
@@ -1,3 +1,4 @@
+import argschema.fields
 import marshmallow as mm
 import numpy as np
 
@@ -172,7 +173,34 @@ class SessionMetadata(RaisingSchema):
     donor_id = Int(required=True)
 
 
-class InputSchema(ArgSchema):
+class BaseNeuropixelsSchema(ArgSchema):
+    """Base schema for writing NWB files for projects with
+    behavior + ecephys"""
+    probes = Nested(
+        Probe,
+        many=True,
+        required=True,
+        help="records of the individual probes used for this experiment",
+    )
+    optotagging_table_path = argschema.fields.InputFile(
+        required=False,
+        help="""file at this path contains information about the optogenetic
+                stimulation applied during this experiment"""
+    )
+    running_speed_path = String(
+        required=True,
+        help="""data collected about the running behavior of the experiment's
+                subject""",
+    )
+    eye_tracking_rig_geometry = Dict(
+        required=False,
+        help="""Mapping containing information about session rig geometry used
+                for eye gaze mapping."""
+    )
+
+
+class VCNInputSchema(BaseNeuropixelsSchema):
+    """Input schema for visual coding neuropixels project"""
     class Meta:
         unknown = mm.RAISE
 
@@ -202,17 +230,6 @@ class Meta:
         required=True,
         help="epochs with invalid data"
     )
-    probes = Nested(
-        Probe,
-        many=True,
-        required=True,
-        help="records of the individual probes used for this experiment",
-    )
-    running_speed_path = String(
-        required=True,
-        help="""data collected about the running behavior of the experiment's
-                subject""",
-    )
     session_sync_path = String(
         required=True,
         validate=check_read_access,
@@ -224,17 +241,6 @@ class Meta:
         default=3,
         help="number of child processes used to write probewise lfp files"
     )
-    optotagging_table_path = String(
-        required=False,
-        validate=check_read_access,
-        help="""file at this path contains information about the optogenetic
-                stimulation applied during this experiment"""
-    )
-    eye_tracking_rig_geometry = Dict(
-        required=False,
-        help="""Mapping containing information about session rig geometry used
-                for eye gaze mapping."""
-    )
     eye_dlc_ellipses_path = String(
         required=False,
         validate=check_read_access,

diff --git a/allensdk/brain_observatory/ecephys/write_nwb/vbn/__init__.py b/allensdk/brain_observatory/ecephys/write_nwb/vbn/__init__.py
diff --git a/allensdk/brain_observatory/ecephys/write_nwb/vbn/__main__.py b/allensdk/brain_observatory/ecephys/write_nwb/vbn/__main__.py
@@ -0,0 +1,44 @@
+"""Module for writing NWB files for the Visual Behavior Neuropixels project"""
+
+import logging
+import sys
+import argschema
+import marshmallow
+from allensdk.brain_observatory.ecephys.behavior_ecephys_session import \
+    BehaviorEcephysSession
+from allensdk.brain_observatory.nwb.nwb_utils import NWBWriter
+from allensdk.brain_observatory.ecephys.write_nwb.vbn._schemas import \
+    VBNInputSchema, OutputSchema
+
+
+def main():
+    args = sys.argv[1:]
+    try:
+        parser = argschema.ArgSchemaParser(
+            args=args,
+            schema_type=VBNInputSchema,
+            output_schema_type=OutputSchema,
+        )
+        logging.info('Input successfully parsed')
+    except marshmallow.exceptions.ValidationError as err:
+        logging.error('Parsing failure')
+        logging.error(err)
+        raise err
+
+    nwb_writer = NWBWriter(
+        nwb_filepath=parser.args['output_path'],
+        session_data=parser.args['session_data'],
+        serializer=BehaviorEcephysSession
+    )
+
+    try:
+        nwb_writer.write_nwb(skip_probes=parser.args['skip_probes'])
+        logging.info('File successfully created')
+    except Exception as err:
+        logging.error('NWB write failure')
+        logging.error(err)
+        raise err
+
+
+if __name__ == "__main__":
+    main()