AllenInstitute · danielsf · Jun 2, 2022 · May 31, 2022 · May 31, 2022 · Jun 1, 2022
diff --git a/allensdk/brain_observatory/behavior/data_files/stimulus_file.py b/allensdk/brain_observatory/behavior/data_files/stimulus_file.py
@@ -210,7 +210,16 @@ def session_duration(self) -> float:
         -------
         session duration in seconds
         """
-        delta = self.data['stop_time'] - self.data['start_time']
+        start_time = self.data['start_time']
+        stop_time = self.data['stop_time']
+
+        if not isinstance(start_time, datetime.datetime):
+            start_time = datetime.datetime.fromtimestamp(start_time)
+        if not isinstance(stop_time, datetime.datetime):
+            stop_time = datetime.datetime.fromtimestamp(stop_time)
+
+        delta = stop_time - start_time
+
         return delta.total_seconds()
 
 

diff --git a/allensdk/brain_observatory/vbn_2022/input_json_writer/utils.py b/allensdk/brain_observatory/vbn_2022/input_json_writer/utils.py
@@ -238,7 +238,8 @@ def session_input_from_ecephys_session_id_list(
 
     session_table = _ecephys_summary_table_from_ecephys_session_id_list(
             lims_connection=lims_connection,
-            ecephys_session_id_list=ecephys_session_id_list)
+            ecephys_session_id_list=ecephys_session_id_list,
+            failed_ecephys_session_id_list=None)
 
     # get date_of_acquisition from the pickle file by nulling out the
     # dates of acqusition from any sessions with behavior_session_ids,

diff --git a/allensdk/brain_observatory/vbn_2022/metadata_writer/lims_queries.py b/allensdk/brain_observatory/vbn_2022/metadata_writer/lims_queries.py
@@ -1,5 +1,6 @@
 from typing import List, Tuple, Dict, Any, Optional
 import pandas as pd
+import numpy as np
 import logging
 
 from allensdk.api.queries.donors_queries import get_death_date_for_mouse_ids
@@ -9,6 +10,9 @@
     _sanitize_uuid_list,
     build_in_list_selector_query)
 
+from allensdk.internal.api.queries.ecephys_lims_queries import (
+    donor_id_lookup_from_ecephys_session_ids)
+
 from allensdk.internal.api.queries.behavior_lims_queries import (
     foraging_id_map_from_behavior_session_id)
 
@@ -401,9 +405,56 @@ def channels_table_from_ecephys_session_id_list(
     return channels_table
 
 
+def _merge_ecephys_id_and_failed(
+        lims_connection: PostgresQueryMixin,
+        ecephys_session_id_list: List[int],
+        failed_ecephys_session_id_list: List[int]) -> List[int]:
+    """
+    Take a list of passed ecephys_session_ids and a list of
+    failed ecephys_session_ids and return return the union of
+    the two lists, ignoring any failed sessions which correspond
+    to a donor_id that is not represented in the passed list.
+
+    Parameters
+    ----------
+    lims_connection: PostgresQueryMixin
+
+    ecephys_session_id_list: List[int]
+        The passed sessions
+
+    failed_ecephys_session_id_list: List[int]
+        The failed sessions
+
+    Returns
+    -------
+    merged_ecephys_session_id_list: List[int]
+    """
+
+    passed_donor_lookup = donor_id_lookup_from_ecephys_session_ids(
+        lims_connection=lims_connection,
+        session_id_list=ecephys_session_id_list)
+
+    passed_donor_ids = set(passed_donor_lookup.donor_id.values)
+
+    failed_donor_lookup = donor_id_lookup_from_ecephys_session_ids(
+        lims_connection=lims_connection,
+        session_id_list=failed_ecephys_session_id_list)
+    to_keep = []
+    for session_id, donor_id in zip(
+            failed_donor_lookup.ecephys_session_id,
+            failed_donor_lookup.donor_id):
+        if donor_id in passed_donor_ids:
+            to_keep.append(int(session_id))
+
+    result = list(set(ecephys_session_id_list + to_keep))
+    result.sort()
+    return result
+
+
 def _ecephys_summary_table_from_ecephys_session_id_list(
         lims_connection: PostgresQueryMixin,
-        ecephys_session_id_list: List[int]) -> pd.DataFrame:
+        ecephys_session_id_list: List[int],
+        failed_ecephys_session_id_list: Optional[List[int]]) -> pd.DataFrame:
     """
     Perform the database query that will return the session summary table.
 
@@ -415,6 +466,14 @@ def _ecephys_summary_table_from_ecephys_session_id_list(
         The list of ecephys_sessions.id values of the
         ecephys sessions for which to construct the units table
 
+    failed_ecephys_session_id_list: Optional[List[int]]
+        A list of ecephys_sessions that are ultimately failed
+        and should not be included in the release. The purpose
+        of this list is so that these sessions can be accounted
+        for in the various dataframe manipulations that calculate
+        a mouse's history passing through the apparatus (i.e. if
+        the day 1 session is failed but the day 2 session is not)
+
     Returns
     -------
     sumary_table: pd.DataFrame
@@ -433,7 +492,26 @@ def _ecephys_summary_table_from_ecephys_session_id_list(
         date_of_birth -- pd.Timestamp
         equipment_id -- int
 
+
+    Note
+    -----
+    The returned dataframe will contain data for all sessions (passed and
+    failed) involving the mice from the passed sessions. This is so that
+    we can reconstruct each mouse's history passing through the apparatus,
+    even if an early session in that mouse's history is marked as "failed."
+
+    Subsequent processing steps will need to trim out sessions marked as
+    "failed" from the dataframe.
     """
+
+    if failed_ecephys_session_id_list is not None:
+        query_id_list = _merge_ecephys_id_and_failed(
+                lims_connection=lims_connection,
+                ecephys_session_id_list=ecephys_session_id_list,
+                failed_ecephys_session_id_list=failed_ecephys_session_id_list)
+    else:
+        query_id_list = ecephys_session_id_list
+
     query = """
         SELECT
           ecephys_sessions.id AS ecephys_session_id
@@ -467,7 +545,7 @@ def _ecephys_summary_table_from_ecephys_session_id_list(
 
     query += build_in_list_selector_query(
             col='ecephys_sessions.id',
-            valid_list=ecephys_session_id_list,
+            valid_list=query_id_list,
             operator='WHERE',
             valid=True)
 
@@ -648,8 +726,10 @@ def _filter_on_death_date(
             how='left')
 
     behavior_session_df = behavior_session_df[
-            behavior_session_df['date_of_acquisition'] <=
-            behavior_session_df['death_on']
+            np.logical_or(
+                behavior_session_df['date_of_acquisition'] <=
+                behavior_session_df['death_on'],
+                behavior_session_df['death_on'].isna())
         ]
 
     behavior_session_df.drop(
@@ -813,6 +893,7 @@ def session_tables_from_ecephys_session_id_list(
         lims_connection: PostgresQueryMixin,
         mtrain_connection: PostgresQueryMixin,
         ecephys_session_id_list: List[int],
+        failed_ecephys_session_id_list: Optional[List[int]],
         probe_ids_to_skip: Optional[List[int]],
         logger: Optional[logging.Logger] = None
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
@@ -829,6 +910,14 @@ def session_tables_from_ecephys_session_id_list(
         The list of ecephys_sessions.id values of the
         ecephys sessions for which to construct the units table
 
+    failed_ecephys_session_id_list: Optional[List[int]]
+        A list of ecephys_sessions that are ultimately failed
+        and should not be included in the release. The purpose
+        of this list is so that these sessions can be accounted
+        for in the various dataframe manipulations that calculate
+        a mouse's history passing through the apparatus (i.e. if
+        the day 1 session is failed but the day 2 session is not)
+
     probe_ids_to_skip: Optional[List[int]]
         The IDs of probes not being released
 
@@ -891,8 +980,9 @@ def session_tables_from_ecephys_session_id_list(
             logger=logger)
 
     summary_tbl = _ecephys_summary_table_from_ecephys_session_id_list(
-                        lims_connection=lims_connection,
-                        ecephys_session_id_list=ecephys_session_id_list)
+                lims_connection=lims_connection,
+                ecephys_session_id_list=ecephys_session_id_list,
+                failed_ecephys_session_id_list=failed_ecephys_session_id_list)
 
     # patch date_of_acquisition and session_type from beh_table,
     # which read them directly from the pickle file
@@ -967,4 +1057,10 @@ def session_tables_from_ecephys_session_id_list(
              'session_type',
              'image_set']]
 
+    # pare back down to only passed sessions
+    if failed_ecephys_session_id_list is not None:
+        sessions_table = sessions_table[
+                [eid in set(ecephys_session_id_list)
+                 for eid in sessions_table.ecephys_session_id]]
+
     return sessions_table, beh_table
diff --git a/allensdk/brain_observatory/vbn_2022/metadata_writer/metadata_writer.py b/allensdk/brain_observatory/vbn_2022/metadata_writer/metadata_writer.py
@@ -146,11 +146,15 @@ def run(self):
             df=channels_table,
             output_path=self.args['channels_path'])
 
+        failed_session_list = self.args[
+            'failed_ecephys_session_id_list']
+
         (ecephys_session_table,
          behavior_session_table) = session_tables_from_ecephys_session_id_list(
                     lims_connection=lims_connection,
                     mtrain_connection=mtrain_connection,
                     ecephys_session_id_list=session_id_list,
+                    failed_ecephys_session_id_list=failed_session_list,
                     probe_ids_to_skip=probe_ids_to_skip,
                     logger=self.logger)
 

diff --git a/allensdk/brain_observatory/vbn_2022/metadata_writer/schemas.py b/allensdk/brain_observatory/vbn_2022/metadata_writer/schemas.py
@@ -16,6 +16,17 @@ class VBN2022MetadataWriterInputSchema(argschema.ArgSchema):
             description=("List of ecephys_sessions.id values "
                          "of sessions to be released"))
 
+    failed_ecephys_session_id_list = argschema.fields.List(
+            argschema.fields.Int,
+            required=False,
+            default=None,
+            allow_none=True,
+            description=("List of ecephys_sessions.id values "
+                         "associated with this release that were "
+                         "failed. These are required to "
+                         "self-consistently construct the history of "
+                         "each mouse passing through the apparatus."))
+
     probes_to_skip = argschema.fields.List(
             argschema.fields.Nested(ProbeToSkip),
             required=False,

diff --git a/allensdk/internal/api/queries/ecephys_lims_queries.py b/allensdk/internal/api/queries/ecephys_lims_queries.py
@@ -1,17 +1,23 @@
 from typing import List
+import pandas as pd
+import numpy as np
 from allensdk.internal.api import PostgresQueryMixin
 from allensdk.internal.api.queries.utils import build_in_list_selector_query
 
 
-def donor_id_list_from_ecephys_session_ids(
+def donor_id_lookup_from_ecephys_session_ids(
         lims_connection: PostgresQueryMixin,
-        session_id_list: List[int]) -> List[int]:
+        session_id_list: List[int]) -> pd.DataFrame:
     """
-    Get the list of donor IDs associated with a list
-    of ecephys_session_ids
+    Return a dataframe with columns
+    ecephys_session_id
+    donor_id
+    from a specified list of ecephys_session_ids
     """
     query = f"""
-    SELECT DISTINCT(donors.id) as donor_id
+    SELECT
+      donors.id as donor_id
+      ,ecephys_sessions.id as ecephys_session_id
     FROM donors
     JOIN specimens ON
       specimens.donor_id = donors.id
@@ -23,4 +29,18 @@ def donor_id_list_from_ecephys_session_ids(
     )}
     """
     result = lims_connection.select(query)
-    return list(result.donor_id)
+    return result
+
+
+def donor_id_list_from_ecephys_session_ids(
+        lims_connection: PostgresQueryMixin,
+        session_id_list: List[int]) -> List[int]:
+    """
+    Get the list of donor IDs associated with a list
+    of ecephys_session_ids
+    """
+    lookup = donor_id_lookup_from_ecephys_session_ids(
+                lims_connection=lims_connection,
+                session_id_list=session_id_list)
+
+    return list(np.unique(lookup.donor_id))
diff --git a/allensdk/test/brain_observatory/vbn_2022/metadata_writer/conftest.py b/allensdk/test/brain_observatory/vbn_2022/metadata_writer/conftest.py
@@ -17,6 +17,19 @@ def smoketest_config_fixture():
     return config
 
 
+@pytest.fixture
+def smoketest_with_failed_sessions_config_fixture():
+    """
+    config parameters for on-prem metadata writer smoketest
+    """
+    config = {
+      "ecephys_session_id_list": [1051155866],
+      "failed_ecephys_session_id_list": [1050962145],
+      "probes_to_skip": [{"session": 1115077618, "probe": "probeC"}]
+    }
+    return config
+
+
 @pytest.fixture
 def patching_pickle_file_fixture(
         helper_functions,

diff --git a/allensdk/test/brain_observatory/vbn_2022/metadata_writer/test_cli.py b/allensdk/test/brain_observatory/vbn_2022/metadata_writer/test_cli.py
@@ -104,3 +104,53 @@ def test_metadata_writer_smoketest(
 
     helper_functions.windows_safe_cleanup_dir(
         dir_path=output_dir)
+
+
+@pytest.mark.requires_bamboo
+def test_with_failed_sessions(
+        smoketest_with_failed_sessions_config_fixture,
+        tmp_path_factory,
+        helper_functions):
+    """
+    Test that metadata writer CLI can handle
+    failed_ecephys_session_id_list
+    """
+    output_dir = pathlib.Path(tmp_path_factory.mktemp('failed_session_test'))
+    output_json_path = output_dir / 'output.json'
+
+    config = copy.deepcopy(smoketest_with_failed_sessions_config_fixture)
+    config['clobber'] = False
+    config['output_dir'] = str(output_dir.resolve().absolute())
+    config['on_missing_file'] = 'warn'
+    config['ecephys_nwb_prefix'] = 'not_here'
+    config['ecephys_nwb_dir'] = str(output_dir.resolve().absolute())
+    config['output_json'] = str(output_json_path.resolve().absolute())
+
+    writer = VBN2022MetadataWriterClass(args=[], input_data=config)
+    writer.run()
+
+    for fname in ('behavior_sessions.csv',
+                  'ecephys_sessions.csv',
+                  'channels.csv',
+                  'units.csv',
+                  'probes.csv'):
+        file_path = output_dir / fname
+        assert file_path.is_file()
+
+    ecephys_sessions_df = pd.read_csv(output_dir / 'ecephys_sessions.csv')
+
+    assert len(ecephys_sessions_df) == 1
+
+    for bad_session_id in config['failed_ecephys_session_id_list']:
+        assert (bad_session_id not in
+                ecephys_sessions_df.ecephys_session_id.values)
+
+    for good_session_id in config['ecephys_session_id_list']:
+        assert (good_session_id in
+                ecephys_sessions_df.ecephys_session_id.values)
+
+    # make sure this session was recorded with session_number = 2
+    assert ecephys_sessions_df.session_number.values[0] == 2
+
+    helper_functions.windows_safe_cleanup_dir(
+            dir_path=output_dir)