Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ticket/2453/bad/mouse #2455

Merged
merged 5 commits into from
Jun 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion allensdk/brain_observatory/behavior/data_files/stimulus_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,16 @@ def session_duration(self) -> float:
-------
session duration in seconds
"""
delta = self.data['stop_time'] - self.data['start_time']
start_time = self.data['start_time']
stop_time = self.data['stop_time']

if not isinstance(start_time, datetime.datetime):
start_time = datetime.datetime.fromtimestamp(start_time)
if not isinstance(stop_time, datetime.datetime):
stop_time = datetime.datetime.fromtimestamp(stop_time)

delta = stop_time - start_time

return delta.total_seconds()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ def session_input_from_ecephys_session_id_list(

session_table = _ecephys_summary_table_from_ecephys_session_id_list(
lims_connection=lims_connection,
ecephys_session_id_list=ecephys_session_id_list)
ecephys_session_id_list=ecephys_session_id_list,
failed_ecephys_session_id_list=None)

# get date_of_acquisition from the pickle file by nulling out the
# dates of acqusition from any sessions with behavior_session_ids,
Expand Down
108 changes: 102 additions & 6 deletions allensdk/brain_observatory/vbn_2022/metadata_writer/lims_queries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Tuple, Dict, Any, Optional
import pandas as pd
import numpy as np
import logging

from allensdk.api.queries.donors_queries import get_death_date_for_mouse_ids
Expand All @@ -9,6 +10,9 @@
_sanitize_uuid_list,
build_in_list_selector_query)

from allensdk.internal.api.queries.ecephys_lims_queries import (
donor_id_lookup_from_ecephys_session_ids)

from allensdk.internal.api.queries.behavior_lims_queries import (
foraging_id_map_from_behavior_session_id)

Expand Down Expand Up @@ -401,9 +405,56 @@ def channels_table_from_ecephys_session_id_list(
return channels_table


def _merge_ecephys_id_and_failed(
lims_connection: PostgresQueryMixin,
ecephys_session_id_list: List[int],
failed_ecephys_session_id_list: List[int]) -> List[int]:
"""
Take a list of passed ecephys_session_ids and a list of
failed ecephys_session_ids and return return the union of
the two lists, ignoring any failed sessions which correspond
to a donor_id that is not represented in the passed list.

Parameters
----------
lims_connection: PostgresQueryMixin

ecephys_session_id_list: List[int]
The passed sessions

failed_ecephys_session_id_list: List[int]
The failed sessions

Returns
-------
merged_ecephys_session_id_list: List[int]
"""

passed_donor_lookup = donor_id_lookup_from_ecephys_session_ids(
lims_connection=lims_connection,
session_id_list=ecephys_session_id_list)

passed_donor_ids = set(passed_donor_lookup.donor_id.values)

failed_donor_lookup = donor_id_lookup_from_ecephys_session_ids(
lims_connection=lims_connection,
session_id_list=failed_ecephys_session_id_list)
to_keep = []
for session_id, donor_id in zip(
failed_donor_lookup.ecephys_session_id,
failed_donor_lookup.donor_id):
if donor_id in passed_donor_ids:
to_keep.append(int(session_id))

result = list(set(ecephys_session_id_list + to_keep))
result.sort()
return result


def _ecephys_summary_table_from_ecephys_session_id_list(
lims_connection: PostgresQueryMixin,
ecephys_session_id_list: List[int]) -> pd.DataFrame:
ecephys_session_id_list: List[int],
failed_ecephys_session_id_list: Optional[List[int]]) -> pd.DataFrame:
"""
Perform the database query that will return the session summary table.

Expand All @@ -415,6 +466,14 @@ def _ecephys_summary_table_from_ecephys_session_id_list(
The list of ecephys_sessions.id values of the
ecephys sessions for which to construct the units table

failed_ecephys_session_id_list: Optional[List[int]]
A list of ecephys_sessions that are ultimately failed
and should not be included in the release. The purpose
of this list is so that these sessions can be accounted
for in the various dataframe manipulations that calculate
a mouse's history passing through the apparatus (i.e. if
the day 1 session is failed but the day 2 session is not)

Returns
-------
sumary_table: pd.DataFrame
Expand All @@ -433,7 +492,26 @@ def _ecephys_summary_table_from_ecephys_session_id_list(
date_of_birth -- pd.Timestamp
equipment_id -- int


Note
-----
The returned dataframe will contain data for all sessions (passed and
failed) involving the mice from the passed sessions. This is so that
we can reconstruct each mouse's history passing through the apparatus,
even if an early session in that mouse's history is marked as "failed."

Subsequent processing steps will need to trim out sessions marked as
"failed" from the dataframe.
"""

if failed_ecephys_session_id_list is not None:
query_id_list = _merge_ecephys_id_and_failed(
lims_connection=lims_connection,
ecephys_session_id_list=ecephys_session_id_list,
failed_ecephys_session_id_list=failed_ecephys_session_id_list)
else:
query_id_list = ecephys_session_id_list

query = """
SELECT
ecephys_sessions.id AS ecephys_session_id
Expand Down Expand Up @@ -467,7 +545,7 @@ def _ecephys_summary_table_from_ecephys_session_id_list(

query += build_in_list_selector_query(
col='ecephys_sessions.id',
valid_list=ecephys_session_id_list,
valid_list=query_id_list,
operator='WHERE',
valid=True)

Expand Down Expand Up @@ -648,8 +726,10 @@ def _filter_on_death_date(
how='left')

behavior_session_df = behavior_session_df[
behavior_session_df['date_of_acquisition'] <=
behavior_session_df['death_on']
np.logical_or(
behavior_session_df['date_of_acquisition'] <=
behavior_session_df['death_on'],
behavior_session_df['death_on'].isna())
]

behavior_session_df.drop(
Expand Down Expand Up @@ -813,6 +893,7 @@ def session_tables_from_ecephys_session_id_list(
lims_connection: PostgresQueryMixin,
mtrain_connection: PostgresQueryMixin,
ecephys_session_id_list: List[int],
failed_ecephys_session_id_list: Optional[List[int]],
probe_ids_to_skip: Optional[List[int]],
logger: Optional[logging.Logger] = None
) -> Tuple[pd.DataFrame, pd.DataFrame]:
Expand All @@ -829,6 +910,14 @@ def session_tables_from_ecephys_session_id_list(
The list of ecephys_sessions.id values of the
ecephys sessions for which to construct the units table

failed_ecephys_session_id_list: Optional[List[int]]
A list of ecephys_sessions that are ultimately failed
and should not be included in the release. The purpose
of this list is so that these sessions can be accounted
for in the various dataframe manipulations that calculate
a mouse's history passing through the apparatus (i.e. if
the day 1 session is failed but the day 2 session is not)

probe_ids_to_skip: Optional[List[int]]
The IDs of probes not being released

Expand Down Expand Up @@ -891,8 +980,9 @@ def session_tables_from_ecephys_session_id_list(
logger=logger)

summary_tbl = _ecephys_summary_table_from_ecephys_session_id_list(
lims_connection=lims_connection,
ecephys_session_id_list=ecephys_session_id_list)
lims_connection=lims_connection,
ecephys_session_id_list=ecephys_session_id_list,
failed_ecephys_session_id_list=failed_ecephys_session_id_list)

# patch date_of_acquisition and session_type from beh_table,
# which read them directly from the pickle file
Expand Down Expand Up @@ -967,4 +1057,10 @@ def session_tables_from_ecephys_session_id_list(
'session_type',
'image_set']]

# pare back down to only passed sessions
if failed_ecephys_session_id_list is not None:
sessions_table = sessions_table[
[eid in set(ecephys_session_id_list)
for eid in sessions_table.ecephys_session_id]]

return sessions_table, beh_table
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,15 @@ def run(self):
df=channels_table,
output_path=self.args['channels_path'])

failed_session_list = self.args[
'failed_ecephys_session_id_list']

(ecephys_session_table,
behavior_session_table) = session_tables_from_ecephys_session_id_list(
lims_connection=lims_connection,
mtrain_connection=mtrain_connection,
ecephys_session_id_list=session_id_list,
failed_ecephys_session_id_list=failed_session_list,
probe_ids_to_skip=probe_ids_to_skip,
logger=self.logger)

Expand Down
11 changes: 11 additions & 0 deletions allensdk/brain_observatory/vbn_2022/metadata_writer/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ class VBN2022MetadataWriterInputSchema(argschema.ArgSchema):
description=("List of ecephys_sessions.id values "
"of sessions to be released"))

failed_ecephys_session_id_list = argschema.fields.List(
argschema.fields.Int,
required=False,
default=None,
allow_none=True,
description=("List of ecephys_sessions.id values "
"associated with this release that were "
"failed. These are required to "
"self-consistently construct the history of "
"each mouse passing through the apparatus."))

probes_to_skip = argschema.fields.List(
argschema.fields.Nested(ProbeToSkip),
required=False,
Expand Down
32 changes: 26 additions & 6 deletions allensdk/internal/api/queries/ecephys_lims_queries.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
from typing import List
import pandas as pd
import numpy as np
from allensdk.internal.api import PostgresQueryMixin
from allensdk.internal.api.queries.utils import build_in_list_selector_query


def donor_id_list_from_ecephys_session_ids(
def donor_id_lookup_from_ecephys_session_ids(
lims_connection: PostgresQueryMixin,
session_id_list: List[int]) -> List[int]:
session_id_list: List[int]) -> pd.DataFrame:
"""
Get the list of donor IDs associated with a list
of ecephys_session_ids
Return a dataframe with columns
ecephys_session_id
donor_id
from a specified list of ecephys_session_ids
"""
query = f"""
SELECT DISTINCT(donors.id) as donor_id
SELECT
donors.id as donor_id
,ecephys_sessions.id as ecephys_session_id
FROM donors
JOIN specimens ON
specimens.donor_id = donors.id
Expand All @@ -23,4 +29,18 @@ def donor_id_list_from_ecephys_session_ids(
)}
"""
result = lims_connection.select(query)
return list(result.donor_id)
return result


def donor_id_list_from_ecephys_session_ids(
lims_connection: PostgresQueryMixin,
session_id_list: List[int]) -> List[int]:
"""
Get the list of donor IDs associated with a list
of ecephys_session_ids
"""
lookup = donor_id_lookup_from_ecephys_session_ids(
lims_connection=lims_connection,
session_id_list=session_id_list)

return list(np.unique(lookup.donor_id))
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@ def smoketest_config_fixture():
return config


@pytest.fixture
def smoketest_with_failed_sessions_config_fixture():
"""
config parameters for on-prem metadata writer smoketest
"""
config = {
"ecephys_session_id_list": [1051155866],
"failed_ecephys_session_id_list": [1050962145],
"probes_to_skip": [{"session": 1115077618, "probe": "probeC"}]
}
return config


@pytest.fixture
def patching_pickle_file_fixture(
helper_functions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,53 @@ def test_metadata_writer_smoketest(

helper_functions.windows_safe_cleanup_dir(
dir_path=output_dir)


@pytest.mark.requires_bamboo
def test_with_failed_sessions(
smoketest_with_failed_sessions_config_fixture,
tmp_path_factory,
helper_functions):
"""
Test that metadata writer CLI can handle
failed_ecephys_session_id_list
"""
output_dir = pathlib.Path(tmp_path_factory.mktemp('failed_session_test'))
output_json_path = output_dir / 'output.json'

config = copy.deepcopy(smoketest_with_failed_sessions_config_fixture)
config['clobber'] = False
config['output_dir'] = str(output_dir.resolve().absolute())
config['on_missing_file'] = 'warn'
config['ecephys_nwb_prefix'] = 'not_here'
config['ecephys_nwb_dir'] = str(output_dir.resolve().absolute())
config['output_json'] = str(output_json_path.resolve().absolute())

writer = VBN2022MetadataWriterClass(args=[], input_data=config)
writer.run()

for fname in ('behavior_sessions.csv',
'ecephys_sessions.csv',
'channels.csv',
'units.csv',
'probes.csv'):
file_path = output_dir / fname
assert file_path.is_file()

ecephys_sessions_df = pd.read_csv(output_dir / 'ecephys_sessions.csv')

assert len(ecephys_sessions_df) == 1

for bad_session_id in config['failed_ecephys_session_id_list']:
assert (bad_session_id not in
ecephys_sessions_df.ecephys_session_id.values)

for good_session_id in config['ecephys_session_id_list']:
assert (good_session_id in
ecephys_sessions_df.ecephys_session_id.values)

# make sure this session was recorded with session_number = 2
assert ecephys_sessions_df.session_number.values[0] == 2

helper_functions.windows_safe_cleanup_dir(
dir_path=output_dir)
Loading