Skip to content

Commit

Permalink
Merge branch 'ticket/ecephys/33a/metadata/writer' into vbn_2022_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
danielsf committed May 9, 2022
2 parents c687f1c + e8be731 commit fe68a15
Show file tree
Hide file tree
Showing 31 changed files with 2,948 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
from allensdk.internal.api import db_connection_creator
from allensdk.brain_observatory.ecephys.ecephys_project_api.http_engine \
import (HttpEngine)
from allensdk.core.typing import SupportsStr
from allensdk.core.authentication import DbCredentials
from allensdk.core.auth_config import (
MTRAIN_DB_CREDENTIAL_MAP, LIMS_DB_CREDENTIAL_MAP)
from allensdk.internal.api.queries.utils import (
build_in_list_selector_query)
from allensdk.internal.api.queries.behavior_lims_queries import (
foraging_id_map_from_behavior_session_id)
from allensdk.internal.api.queries.mtrain_queries import (
session_stage_from_foraging_id)


class BehaviorProjectLimsApi(BehaviorProjectBase):
Expand Down Expand Up @@ -114,35 +119,6 @@ def default(
return cls(lims_engine, mtrain_engine, app_engine,
data_release_date=data_release_date)

@staticmethod
def _build_in_list_selector_query(
col,
valid_list: Optional[SupportsStr] = None,
operator: str = "WHERE") -> str:
"""
Filter for rows where the value of a column is contained in a list.
If no list is specified in `valid_list`, return an empty string.
NOTE: if string ids are used, then the strings in `valid_list` must
be enclosed in single quotes, or else the query will throw a column
does not exist error. E.g. ["'mystringid1'", "'mystringid2'"...]
:param col: name of column to compare if in a list
:type col: str
:param valid_list: iterable of values that can be mapped to str
(e.g. string, int, float).
:type valid_list: list
:param operator: SQL operator to start the clause. Default="WHERE".
Valid inputs: "AND", "OR", "WHERE" (not case-sensitive).
:type operator: str
"""
if not valid_list:
return ""
session_query = (
f"""{operator} {col} IN ({",".join(
sorted(set(map(str, valid_list))))})""")
return session_query

def _build_experiment_from_session_query(self) -> str:
"""Aggregate sql sub-query to get all ophys_experiment_ids associated
with a single ophys_session_id."""
Expand Down Expand Up @@ -243,51 +219,6 @@ def _get_behavior_summary_table(self) -> pd.DataFrame:
self.logger.debug(f"get_behavior_session_table query: \n{query}")
return self.lims_engine.select(query)

def _get_foraging_ids_from_behavior_session(
self, behavior_session_ids: List[int]) -> List[str]:
behav_ids = self._build_in_list_selector_query("id",
behavior_session_ids,
operator="AND")
forag_ids_query = f"""
SELECT foraging_id
FROM behavior_sessions
WHERE foraging_id IS NOT NULL
{behav_ids};
"""
self.logger.debug("get_foraging_ids_from_behavior_session query: \n"
f"{forag_ids_query}")
foraging_ids = self.lims_engine.fetchall(forag_ids_query)

self.logger.debug(f"Retrieved {len(foraging_ids)} foraging ids for"
f" behavior stage query. Ids = {foraging_ids}")
return foraging_ids

def _get_behavior_stage_table(
self,
behavior_session_ids: Optional[List[int]] = None):
# Select fewer rows if possible via behavior_session_id
if behavior_session_ids:
foraging_ids = self._get_foraging_ids_from_behavior_session(
behavior_session_ids)
foraging_ids = [f"'{fid}'" for fid in foraging_ids]
# Otherwise just get the full table from mtrain
else:
foraging_ids = None

foraging_ids_query = self._build_in_list_selector_query(
"bs.id", foraging_ids)

query = f"""
SELECT
stages.name as session_type,
bs.id AS foraging_id
FROM behavior_sessions bs
JOIN stages ON stages.id = bs.state_id
{foraging_ids_query};
"""
self.logger.debug(f"_get_behavior_stage_table query: \n {query}")
return self.mtrain_engine.select(query)

def get_behavior_stage_parameters(self,
foraging_ids: List[str]) -> pd.Series:
"""Gets the stage parameters for each foraging id from mtrain
Expand All @@ -302,7 +233,7 @@ def get_behavior_stage_parameters(self,
---------
Series with index of foraging id and values stage parameters
"""
foraging_ids_query = self._build_in_list_selector_query(
foraging_ids_query = build_in_list_selector_query(
"bs.id", foraging_ids)

query = f"""
Expand Down Expand Up @@ -514,8 +445,19 @@ def get_behavior_session_table(self) -> pd.DataFrame:
:rtype: pd.DataFrame
"""
summary_tbl = self._get_behavior_summary_table()
stimulus_names = self._get_behavior_stage_table(
behavior_session_ids=summary_tbl.index.tolist())

foraging_id_map = foraging_id_map_from_behavior_session_id(
lims_engine=self.lims_engine,
behavior_session_ids=summary_tbl.behavior_session_id.tolist(),
logger=self.logger)

foraging_ids = list(foraging_id_map.foraging_id)

stimulus_names = session_stage_from_foraging_id(
mtrain_engine=self.mtrain_engine,
foraging_ids=foraging_ids,
logger=self.logger)

return (summary_tbl.merge(stimulus_names,
on=["foraging_id"], how="left")
.set_index("behavior_session_id"))
Expand Down Expand Up @@ -602,19 +544,19 @@ def _get_behavior_session_release_filter(self):
release_behavior_only_session_ids + \
release_behavior_with_ophys_session_ids

return self._build_in_list_selector_query(
return build_in_list_selector_query(
"bs.id", release_behavior_session_ids)

def _get_ophys_session_release_filter(self):
release_files = self.get_release_files(
file_type='BehaviorOphysNwb')
return self._build_in_list_selector_query(
return build_in_list_selector_query(
"bs.id", release_files['behavior_session_id'].tolist())

def _get_ophys_experiment_release_filter(self):
release_files = self.get_release_files(
file_type='BehaviorOphysNwb')
return self._build_in_list_selector_query(
return build_in_list_selector_query(
"oe.id", release_files.index.tolist())

def get_natural_movie_template(self, number: int) -> Iterable[bytes]:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Optional
import pandas as pd
import re


def get_image_set(df: pd.DataFrame) -> pd.Series:
"""Get image set
The image set here is the letter part of the session type
ie for session type OPHYS_1_images_B, it would be "B"
Some session types don't have an image set name, such as
gratings, which will be set to null
Parameters
----------
df
The session df
Returns
--------
Series with index same as df whose values are image_set
"""
def __get_image_set_name(session_type: Optional[str]):
match = re.match(r'.*images_(?P<image_set>\w)', session_type)
if match is None:
return None
return match.group('image_set')

session_type = df['session_type'][
df['session_type'].notnull()]
image_set = session_type.apply(__get_image_set_name)
return image_set
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import re
from typing import Optional

import pandas as pd

from allensdk.brain_observatory.behavior.behavior_project_cache.project_apis.data_io import BehaviorProjectLimsApi # noqa: E501
from allensdk.brain_observatory.behavior.behavior_project_cache \
.tables.util.image_presentation_utils import (
get_image_set)


def get_prior_exposures_to_session_type(df: pd.DataFrame) -> pd.Series:
Expand Down Expand Up @@ -40,16 +40,7 @@ def get_prior_exposures_to_image_set(df: pd.DataFrame) -> pd.Series:
--------
Series with index same as df and values prior exposure counts to image set
"""

def __get_image_set_name(session_type: Optional[str]):
match = re.match(r'.*images_(?P<image_set>\w)', session_type)
if match is None:
return None
return match.group('image_set')

session_type = df['session_type'][
df['session_type'].notnull()]
image_set = session_type.apply(__get_image_set_name)
image_set = get_image_set(df=df)
return __get_prior_exposure_count(df=df, to=image_set)


Expand Down
58 changes: 58 additions & 0 deletions allensdk/brain_observatory/behavior/data_files/stimulus_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from cachetools import cached, LRUCache
from cachetools.keys import hashkey

import datetime
import pandas as pd
import copy

from allensdk.internal.api import PostgresQueryMixin
from allensdk.internal.core.lims_utilities import safe_system_path
Expand Down Expand Up @@ -142,6 +144,62 @@ def num_frames(self) -> int:
self._validate_frame_data()
return len(self.data['items']['behavior']['intervalsms']) + 1

@property
def date_of_acquisition(self) -> datetime.datetime:
"""
Return the date_of_acquisition as a datetime.datetime.
This will be read from self.data['start_time']
"""
assert isinstance(self.data, dict)
if 'start_time' not in self.data:
raise KeyError(
"No 'start_time' listed in pickle file "
f"{self.filepath}")

return copy.deepcopy(self.data['start_time'])

@property
def session_type(self) -> str:
"""
Return the session type as read from the pickle file. This can
be read either from
data['items']['behavior']['params']['stage']
or
data['items']['behavior']['cl_params']['stage']
if both are present and they disagree, raise an exception
"""
param_value = None
if 'params' in self.data['items']['behavior']:
if 'stage' in self.data['items']['behavior']['params']:
param_value = self.data['items']['behavior']['params']['stage']

cl_value = None
if 'cl_params' in self.data['items']['behavior']:
if 'stage' in self.data['items']['behavior']['cl_params']:
cl_value = self.data['items']['behavior']['cl_params']['stage']

if cl_value is None and param_value is None:
raise RuntimeError("Could not find stage in pickle file "
f"{self.filepath}")

if param_value is None:
return cl_value

if cl_value is None:
return param_value

if cl_value != param_value:
raise RuntimeError(
"Conflicting session_types in pickle file "
f"{self.filepath}\n"
f"cl_params: {cl_value}\n"
f"params: {param_value}\n")

return param_value


class ReplayStimulusFile(_StimulusFile):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def get_frame_indices(
each event occured on. Indexes will be chosen to be the
first index satisfying
frame_timestamps[event_indices] <= event_timestamps
frame_timestamps[event_indices] >= event_timestamps
event_timestamps < frame_timestamps[event_indices+1]
Parameters
Expand Down
1 change: 1 addition & 0 deletions allensdk/brain_observatory/data_release_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# empty
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# empty
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pathlib


class FileIDGenerator(object):
"""
A class to generate a unique integer ID for each file in the
data release
"""

def __init__(self):
self._id_lookup = dict()
self._next_id = 0
self._dummy_value = -999

@property
def dummy_value(self) -> int:
"""
Value reserved for files that are missing from the
release
"""
return self._dummy_value

def id_from_path(self,
file_path: pathlib.Path) -> int:
"""
Get the unique ID for a file path. If the file has already
been assigned a unique ID, return that. Otherwise, assign
a unique ID to the file path and return it
"""
if not isinstance(file_path, pathlib.Path):
msg = ("file_path must be a pathlib.Path (this is so "
"we can resolve it into an absolute path). You passed "
f"in a {type(file_path)}")
raise ValueError(msg)

if not file_path.is_file():
msg = f"{file_path} is not a file"
raise ValueError(msg)

if file_path.is_symlink():
msg = f"{file_path} is a symlink; must be an actual path"
raise ValueError(msg)

str_path = str(file_path.resolve().absolute())
if str_path not in self._id_lookup:
self._id_lookup[str_path] = self._next_id
self._next_id += 1
while self._next_id == self.dummy_value:
self._next_id += 1

return self._id_lookup[str_path]
Loading

0 comments on commit fe68a15

Please sign in to comment.