From cf3070b241b0a6aebe4e928babad80ccd54b9ba9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 15 Feb 2024 13:14:40 -0600 Subject: [PATCH 1/4] fix(reader): bugfix csv reader dropping column when the csv file is empty --- aeon/io/reader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/aeon/io/reader.py b/aeon/io/reader.py index 23ccdb6b..34f258cd 100644 --- a/aeon/io/reader.py +++ b/aeon/io/reader.py @@ -126,7 +126,13 @@ def __init__(self, pattern, columns, dtype=None, extension="csv"): def read(self, file): """Reads data from the specified CSV text file.""" - return pd.read_csv(file, header=0, names=self.columns, dtype=self.dtype, index_col=0) + return pd.read_csv( + file, + header=0, + names=self.columns, + dtype=self.dtype, + index_col=0 if file.stat().st_size else None, + ) class Subject(Csv): From 8da259ceb7490e67327ce82e76376096a67a951f Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 15 Feb 2024 13:22:32 -0600 Subject: [PATCH 2/4] feat(block_analysis): add BlockSubject analysis based on Jai's initial work --- aeon/dj_pipeline/analysis/block_analysis.py | 142 +++++++++++++++++++- 1 file changed, 137 insertions(+), 5 deletions(-) diff --git a/aeon/dj_pipeline/analysis/block_analysis.py b/aeon/dj_pipeline/analysis/block_analysis.py index cc27766e..fd72f8fd 100644 --- a/aeon/dj_pipeline/analysis/block_analysis.py +++ b/aeon/dj_pipeline/analysis/block_analysis.py @@ -5,6 +5,7 @@ import pandas as pd import plotly.express as px import plotly.graph_objs as go +from matplotlib import path as mpl_path from aeon.analysis import utils as analysis_utils from aeon.dj_pipeline import acquisition, fetch_stream, get_schema_name, streams, tracking @@ -226,14 +227,145 @@ class Patch(dj.Part): in_patch_time: float # total seconds spent in this patch for this block pellet_count: int pellet_timestamps: longblob - wheel_distance_travelled: longblob # wheel's cumulative distance travelled - wheel_timestamps: longblob - cumulative_sum_preference: longblob - windowed_sum_preference: longblob + wheel_cumsum_distance_travelled: longblob # wheel's cumulative distance travelled + """ + + class Preference(dj.Part): + definition = """ # Measure of preference for a particular patch from a particular subject + -> master + -> BlockAnalysis.Patch + -> BlockAnalysis.Subject + --- + cumulative_preference_by_wheel: longblob + windowed_preference_by_wheel: longblob + cumulative_preference_by_pellet: longblob + windowed_preference_by_pellet: longblob + cumulative_preference_by_time: longblob + windowed_preference_by_time: longblob + preference_score: float # one representative preference score for the entire block """ def make(self, key): - pass + block_patches = (BlockAnalysis.Patch & key).fetch(as_dict=True) + block_subjects = (BlockAnalysis.Subject & key).fetch(as_dict=True) + subject_names = [s["subject_name"] for s in block_subjects] + # Construct subject position dataframe + subjects_positions_df = pd.concat( + [ + pd.DataFrame( + {"subject_name": [s["subject_name"]] * len(s["position_timestamps"])} + | { + k: s[k] + for k in ( + "position_timestamps", + "position_x", + "position_y", + "position_likelihood", + ) + } + ) + for s in block_subjects + ] + ) + subjects_positions_df.set_index("position_timestamps", inplace=True) + + self.insert1(key) + for i, patch in enumerate(block_patches): + cum_wheel_dist = pd.Series( + index=patch["wheel_timestamps"], data=patch["wheel_cumsum_distance_travelled"] + ) + # Get distance-to-patch at each pose data timestep + patch_region = ( + acquisition.EpochActiveRegion.Region + & key + & {"region_name": f"{patch['patch_name']}Region"} + & f'epoch_start < "{key["block_start"]}"' + ).fetch("region_data", order_by="epoch_start DESC", limit=1)[0] + patch_xy = list(zip(*[(int(p["X"]), int(p["Y"])) for p in patch_region["ArrayOfPoint"]])) + patch_center = np.mean(patch_xy[0]).astype(np.uint32), np.mean(patch_xy[1]).astype(np.uint32) + subjects_xy = subjects_positions_df[["position_x", "position_y"]].values + dist_to_patch = np.sqrt(np.sum((subjects_xy - patch_center) ** 2, axis=1).astype(float)) + dist_to_patch_df = subjects_positions_df[["subject_name"]].copy() + dist_to_patch_df["dist_to_patch"] = dist_to_patch + # Assign pellets and wheel timestamps to subjects + if len(block_subjects) == 1: + cum_wheel_dist_dm = cum_wheel_dist.to_frame(name=subject_names[0]) + patch_df_for_pellets_df = pd.DataFrame( + index=patch["pellet_timestamps"], data={"subject_name": subject_names[0]} + ) + else: + # Assign id based on which subject was closest to patch at time of event + # Get distance-to-patch at each wheel ts and pel del ts, organized by subject + dist_to_patch_wheel_ts_id_df = pd.DataFrame( + index=cum_wheel_dist.index, columns=subject_names + ) + dist_to_patch_pel_ts_id_df = pd.DataFrame( + index=patch["pellet_timestamps"], columns=subject_names + ) + for subject_name in subject_names: + # Find closest match between pose_df indices and wheel indices + dist_to_patch_wheel_ts_subj = pd.merge_asof( + left=dist_to_patch_wheel_ts_id_df[subject_name], + right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], + left_index=True, + right_index=True, + direction="forward", + tolerance=pd.Timedelta("100ms"), + ) + dist_to_patch_wheel_ts_id_df[subject_name] = dist_to_patch_wheel_ts_subj[ + "dist_to_patch" + ] + # Find closest match between pose_df indices and pel indices + dist_to_patch_pel_ts_subj = pd.merge_asof( + left=dist_to_patch_pel_ts_id_df[subject_name], + right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], + left_index=True, + right_index=True, + direction="forward", + tolerance=pd.Timedelta("200ms"), + ) + dist_to_patch_pel_ts_id_df[subject_name] = dist_to_patch_pel_ts_subj["dist_to_patch"] + # Get closest subject to patch at each pel del timestep + patch_df_for_pellets_df = pd.DataFrame( + index=patch["pellet_timestamps"], + data={"subject_name": dist_to_patch_pel_ts_id_df.idxmin(axis=1).values}, + ) + + # Get closest subject to patch at each wheel timestep + cum_wheel_dist_subj_df = pd.DataFrame( + index=cum_wheel_dist.index, columns=subject_names, data=0.0 + ) + closest_subjects = dist_to_patch_wheel_ts_id_df.idxmin(axis=1) + wheel_dist = cum_wheel_dist.diff().fillna(cum_wheel_dist.iloc[0]) + # Assign wheel dist to closest subject for each wheel timestep + for subject_name in subject_names: + subj_idxs = cum_wheel_dist_subj_df[closest_subjects == subject_name].index + cum_wheel_dist_subj_df.loc[subj_idxs, subject_name] = wheel_dist[subj_idxs] + cum_wheel_dist_dm = cum_wheel_dist_subj_df.cumsum(axis=0) + + # In Patch Time + patch_bbox = mpl_path.Path(list(zip(*patch_xy))) + in_patch = subjects_positions_df.apply( + lambda row: patch_bbox.contains_point((row["position_x"], row["position_y"])), axis=1 + ) + # Insert data + for subject_name in subject_names: + pellets = patch_df_for_pellets_df[patch_df_for_pellets_df["subject_name"] == subject_name] + subject_in_patch = subjects_positions_df[ + in_patch & (subjects_positions_df["subject_name"] == subject_name) + ] + self.Patch.insert1( + key + | dict( + patch_name=patch["patch_name"], + subject_name=subject_name, + in_patch_timestamps=subject_in_patch.index.values, + in_patch_time=len(subject_in_patch), + pellet_count=len(pellets), + pellet_timestamps=pellets.index.values, + wheel_cumsum_distance_travelled=cum_wheel_dist_dm[subject_name].values, + ) + ) @schema From 080819a1118217ee88dc5a99d28b133f668835d1 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 15 Feb 2024 15:27:38 -0600 Subject: [PATCH 3/4] feat(metadata): explicitly extract and store frame rate for video source --- aeon/dj_pipeline/utils/load_metadata.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/aeon/dj_pipeline/utils/load_metadata.py b/aeon/dj_pipeline/utils/load_metadata.py index b2334516..18f0f690 100644 --- a/aeon/dj_pipeline/utils/load_metadata.py +++ b/aeon/dj_pipeline/utils/load_metadata.py @@ -186,6 +186,9 @@ def ingest_epoch_metadata(experiment_name, devices_schema, metadata_yml_filepath device_type_mapper, _ = get_device_mapper(devices_schema, metadata_yml_filepath) + # Retrieve video controller + video_controller = epoch_config["metadata"].pop("VideoController", {}) + # Insert into each device table epoch_device_types = [] device_list = [] @@ -221,6 +224,14 @@ def ingest_epoch_metadata(experiment_name, devices_schema, metadata_yml_filepath } for attribute_name, attribute_value in device_config.items() ] + if "TriggerFrequency" in device_config: + table_attribute_entry.append( + { + **table_entry, + "attribute_name": "SamplingFrequency", + "attribute_value": video_controller[device_config["TriggerFrequency"]], + } + ) """Check if this device is currently installed. If the same device serial number is currently installed check for any changes in configuration. If not, skip this""" current_device_query = table - table.RemovalTime & experiment_key & device_key From 9aaae2bd317f9361a8b906bc305a9e0843995bfd Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 15 Feb 2024 15:41:12 -0600 Subject: [PATCH 4/4] fix(block_analysis): minor bugfix --- aeon/dj_pipeline/analysis/block_analysis.py | 56 +++++++++++++-------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/aeon/dj_pipeline/analysis/block_analysis.py b/aeon/dj_pipeline/analysis/block_analysis.py index fd72f8fd..08cab6e4 100644 --- a/aeon/dj_pipeline/analysis/block_analysis.py +++ b/aeon/dj_pipeline/analysis/block_analysis.py @@ -268,6 +268,16 @@ def make(self, key): ] ) subjects_positions_df.set_index("position_timestamps", inplace=True) + # Get frame rate of CameraTop + camera_fps = int( + ( + streams.SpinnakerVideoSource * streams.SpinnakerVideoSource.Attribute + & key + & 'attribute_name = "SamplingFrequency"' + & 'spinnaker_video_source_name = "CameraTop"' + & f'spinnaker_video_source_install_time < "{key["block_start"]}"' + ).fetch("attribute_value", order_by="spinnaker_video_source_install_time DESC", limit=1)[0] + ) self.insert1(key) for i, patch in enumerate(block_patches): @@ -304,27 +314,31 @@ def make(self, key): ) for subject_name in subject_names: # Find closest match between pose_df indices and wheel indices - dist_to_patch_wheel_ts_subj = pd.merge_asof( - left=dist_to_patch_wheel_ts_id_df[subject_name], - right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], - left_index=True, - right_index=True, - direction="forward", - tolerance=pd.Timedelta("100ms"), - ) - dist_to_patch_wheel_ts_id_df[subject_name] = dist_to_patch_wheel_ts_subj[ - "dist_to_patch" - ] + if not dist_to_patch_wheel_ts_id_df.empty: + dist_to_patch_wheel_ts_subj = pd.merge_asof( + left=dist_to_patch_wheel_ts_id_df[subject_name], + right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], + left_index=True, + right_index=True, + direction="forward", + tolerance=pd.Timedelta("100ms"), + ) + dist_to_patch_wheel_ts_id_df[subject_name] = dist_to_patch_wheel_ts_subj[ + "dist_to_patch" + ] # Find closest match between pose_df indices and pel indices - dist_to_patch_pel_ts_subj = pd.merge_asof( - left=dist_to_patch_pel_ts_id_df[subject_name], - right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], - left_index=True, - right_index=True, - direction="forward", - tolerance=pd.Timedelta("200ms"), - ) - dist_to_patch_pel_ts_id_df[subject_name] = dist_to_patch_pel_ts_subj["dist_to_patch"] + if not dist_to_patch_pel_ts_id_df.empty: + dist_to_patch_pel_ts_subj = pd.merge_asof( + left=dist_to_patch_pel_ts_id_df[subject_name], + right=dist_to_patch_df[dist_to_patch_df["subject_name"] == subject_name], + left_index=True, + right_index=True, + direction="forward", + tolerance=pd.Timedelta("200ms"), + ) + dist_to_patch_pel_ts_id_df[subject_name] = dist_to_patch_pel_ts_subj[ + "dist_to_patch" + ] # Get closest subject to patch at each pel del timestep patch_df_for_pellets_df = pd.DataFrame( index=patch["pellet_timestamps"], @@ -360,7 +374,7 @@ def make(self, key): patch_name=patch["patch_name"], subject_name=subject_name, in_patch_timestamps=subject_in_patch.index.values, - in_patch_time=len(subject_in_patch), + in_patch_time=len(subject_in_patch) / camera_fps, pellet_count=len(pellets), pellet_timestamps=pellets.index.values, wheel_cumsum_distance_travelled=cum_wheel_dist_dm[subject_name].values,