From 05d5f02a8d078b1244b421c99ee1c498e725aff2 Mon Sep 17 00:00:00 2001
From: Kat Schelonka <kschelon@gmail.com>
Date: Fri, 6 Mar 2020 13:39:54 -0800
Subject: [PATCH] GH-1404: Handle monitor delay errors

In order to compute monitor delay, a sync square monitored
by a photodiode changes state every 60 frames ('sentinel
event'). Each transition from a rise/fall to a fall/rise
('photodiode event') represents a change in the state of
the sync square.
Previously only consecutive falling edges of the photodiode
were used to determine whether the photodiode events triggered
by sentinel frames had started. Only a falling edge was allowed
to be the final event in a photodiode stream, even though it
is possible that the last valid event could be a rising edge.
This led sometimes to missing valid events, and resulted
in an error (because the number of sentinel frames should match
the number of photodiode events).
The algorithm has been updated to examine consecutive events
instead of relying on a specific edge type. A transition marks
a sentinel frame if the next photodiode transition is roughly
1.0s after the previous or 1.0s before the next. Due to noisy
data we can't just look for all contiguous events of this type.
Instad we mark the beginning and end of the photodiode event
stream as follows:
1. The first valid photodiode event is the first event where
the time between the next two consecutive events are ~1.0s
2. The last valid photodiode event is the final event where
the last event where the time between the previous two
consecutive events are ~1.0s

Update the logging to show the min, max, and std delay.

No longer use a default value, but throw an error if there are
data issues or the value computed is outside expected range.
---
 CHANGELOG.md                                  |   2 +
 .../internal/brain_observatory/time_sync.py   | 146 +++++++++++-------
 .../brain_observatory/test_time_sync.py       | 141 ++++++++++++++---
 doc_template/index.rst                        |   1 +
 4 files changed, 217 insertions(+), 73 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 07b7fd3305..c63faf9bb0 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file.
 - A new mixin for managing processing parameters for Session objects
 
 ### Changed
+- Monitor delay calculation is updated to properly handle photodiode streams that end
+on a rising edge. We are no longer providing a default delay value in case of error.
 
 ### Bug Fixes
 - experiment\_table from behavior project cache has NaNs in the 'imaging\_depth' column for MultiScope experiments due to incorrect join in behavior\_project\_lims\_api.py and 4 other places where ophys\_sessions was incorrectly queried for imaging\_depth\_id
diff --git a/allensdk/internal/brain_observatory/time_sync.py b/allensdk/internal/brain_observatory/time_sync.py
index 3133fb3e82..696862d658 100644
--- a/allensdk/internal/brain_observatory/time_sync.py
+++ b/allensdk/internal/brain_observatory/time_sync.py
@@ -1,3 +1,5 @@
+from collections import deque
+
 import numpy as np
 import h5py
 from allensdk.brain_observatory.sync_dataset import Dataset
@@ -9,14 +11,11 @@
     cv2 = None
 
 TRANSITION_FRAME_INTERVAL = 60
-SHORT_PHOTODIODE_MIN = 0.1 # seconds
-SHORT_PHOTODIODE_MAX = 0.5 # seconds
-REG_PHOTODIODE_MIN = 1.9 # seconds
-REG_PHOTODIODE_MAX = 2.1 # seconds
-PHOTODIODE_ANOMALY_THRESHOLD = 0.5 # seconds
-LONG_STIM_THRESHOLD = 0.2 # seconds
-ASSUMED_DELAY = 0.0215 # seconds
-MAX_MONITOR_DELAY = 0.07 # seconds
+REG_PHOTODIODE_INTERVAL = 1.0     # seconds
+REG_PHOTODIODE_STD = 0.05    # seconds
+PHOTODIODE_ANOMALY_THRESHOLD = 0.5     # seconds
+LONG_STIM_THRESHOLD = 0.2     # seconds
+MAX_MONITOR_DELAY = 0.07     # seconds
 
 VERSION_1_KEYS = {
     "photodiode": "stim_photodiode",
@@ -53,58 +52,101 @@ def get_keys(sync_dset):
 
 def monitor_delay(sync_dset, stim_times, photodiode_key,
                   transition_frame_interval=TRANSITION_FRAME_INTERVAL,
-                  max_monitor_delay=MAX_MONITOR_DELAY,
-                  assumed_delay=ASSUMED_DELAY):
+                  max_monitor_delay=MAX_MONITOR_DELAY):
     """Calculate monitor delay."""
-    try:
-        transitions = stim_times[::transition_frame_interval]
-        photodiode_events = get_real_photodiode_events(sync_dset, photodiode_key)
-        transition_events = photodiode_events[0:len(transitions)]
-
-        delay = np.mean(transition_events-transitions)
-        logging.info("Calculated monitor delay: %s", delay)
-
-        if delay < 0 or delay > max_monitor_delay:
-            delay = assumed_delay
-            logging.warning("Setting delay to assumed value: %s", delay)
-    except (IndexError, ValueError) as e:
-        logging.error(e)
-        delay = assumed_delay
-        logging.warning("Bad photodiode signal, setting delay to assumed "
-                        "value: %s", delay)
-
+    transitions = stim_times[::transition_frame_interval]
+    photodiode_events = get_real_photodiode_events(sync_dset, photodiode_key)
+    transition_events = photodiode_events[0:len(transitions)]
+
+    delays = transition_events - transitions
+    delay = np.mean(delays)
+    logging.info(f"Calculated monitor delay: {delay}. \n "
+                 f"Max monitor delay: {np.max(delays)}. \n "
+                 f"Min monitor delay: {np.min(delays)}.\n "
+                 f"Std monitor delay: {np.std(delays)}.")
+
+    if delay < 0 or delay > max_monitor_delay:
+        raise ValueError("Delay ({delay}s) falls outside expected value "
+                         "range.")
     return delay
 
 
-def get_photodiode_events(sync_dset, photodiode_key):
-    """Returns the photodiode events with the start/stop indicators and
-    the window init flash stripped off.
+def _find_last_n(arr, n, cond):
     """
-    all_events = sync_dset.get_events_by_line(photodiode_key)
-    pdr = sync_dset.get_rising_edges(photodiode_key)
-    pdf = sync_dset.get_falling_edges(photodiode_key)
-
-    all_events_sec = all_events/sync_dset.sample_freq
-    pdr_sec = pdr/sync_dset.sample_freq
-    pdf_sec = pdf/sync_dset.sample_freq
-
-    pdf_diff = np.ediff1d(pdf_sec, to_end=0)
-    pdr_diff = np.ediff1d(pdr_sec, to_end=0)
-
-    reg_pd_falling = pdf_sec[(pdf_diff >= REG_PHOTODIODE_MIN) &
-                             (pdf_diff <= REG_PHOTODIODE_MAX)]
+    Find the final index where the prior `n` values in an array meet
+    the condition `cond` (inclusive).
+    Parameters
+    ==========
+    arr: numpy.1darray
+    n: int
+    cond: Callable that returns True if condition is met, False
+    otherwise. Should be able to be applied to the array elements  
+    without any additional arguments.
+    """
+    reversed_ix = _find_n(arr[::-1], n, cond)
+    if reversed_ix is not None:
+        reversed_ix = len(arr) - reversed_ix - 1
+    return reversed_ix
 
-    short_pd_rising = pdr_sec[(pdr_diff >= SHORT_PHOTODIODE_MIN) &
-                              (pdr_diff <= SHORT_PHOTODIODE_MAX)]
 
-    first_falling = reg_pd_falling[0]
-    last_falling = reg_pd_falling[-1]
+def _find_n(arr, n, cond):
+    """
+    Find the index where the next `n` values in an array meet the 
+    condition `cond` (inclusive).
+    Parameters
+    ==========
+    arr: numpy.1darray
+    n: int
+    cond: Callable that returns True if condition is met, False
+    otherwise. Should be able to be applied to the array elements  
+    without any additional arguments.
+    """
+    if len(arr) < n:
+        return None
+    queue = deque(np.apply_along_axis(cond, 0, arr[:n]), maxlen=n)
+    i = 0
+    while queue.count(True) < n:
+        try:
+            i += 1
+            queue.append(cond(arr[i+n-1]))
+        except IndexError:
+            return None
+    return i
 
-    end_indicators = short_pd_rising[short_pd_rising > last_falling]
-    first_end_indicator = end_indicators[0]
 
-    pd_events =  all_events_sec[(all_events_sec >= first_falling) &
-                                (all_events_sec < first_end_indicator)]
+def get_photodiode_events(sync_dset, photodiode_key):
+    """Returns the photodiode events with the start/stop indicators and
+    the window init flash stripped off. These transitions occur roughly
+    ~1.0s apart, since the sync square changes state every N frames
+    (where N = 60, and frame rate is 60 Hz). Because there are no
+    markers for when the first transition of this type started, we
+    estimate based on the event intervals. For the first valid event,
+    find the first two events that both meet the following criteria:
+        The next event occurs ~1.0s later
+    First the last valid event, find the first two events that both meet
+    the following criteria:
+        The last valid event occured ~1.0s before
+    """
+    all_events = sync_dset.get_events_by_line(photodiode_key, units="seconds")
+    all_events_diff = np.ediff1d(all_events, to_begin=0, to_end=0)
+    all_events_diff_prev = all_events_diff[:-1]
+    all_events_diff_next = all_events_diff[1:]
+    min_interval = REG_PHOTODIODE_INTERVAL - REG_PHOTODIODE_STD
+    max_interval = REG_PHOTODIODE_INTERVAL + REG_PHOTODIODE_STD
+    if not len(all_events):
+        raise ValueError("No photodiode events found. Please check "
+                         "the input data for errors. ")
+    first_valid_index = _find_n(
+        all_events_diff_next, 2,
+        lambda x: (x >= min_interval) & (x <= max_interval))
+    last_valid_index = _find_last_n(
+        all_events_diff_prev, 2,
+        lambda x: (x >= min_interval) & (x <= max_interval))
+    if first_valid_index is None:
+        raise ValueError("Can't find valid start event")
+    if last_valid_index is None:
+        raise ValueError("Can't find valid end event")
+    pd_events = all_events[first_valid_index:last_valid_index+1]
     return pd_events
 
 
@@ -283,7 +325,7 @@ def corrected_stim_timestamps(self):
 
         photodiode_key = self._keys["photodiode"]
         delay = monitor_delay(self.dataset, timestamps, photodiode_key)
-        
+
         return timestamps + delay, delta, delay
 
     @property
diff --git a/allensdk/test/internal/brain_observatory/test_time_sync.py b/allensdk/test/internal/brain_observatory/test_time_sync.py
index 46f6aeb1a6..b37fbe6140 100644
--- a/allensdk/test/internal/brain_observatory/test_time_sync.py
+++ b/allensdk/test/internal/brain_observatory/test_time_sync.py
@@ -7,6 +7,11 @@
 from mock import patch
 from allensdk.internal.brain_observatory import time_sync as ts
 from allensdk.internal.pipeline_modules import run_ophys_time_sync
+from allensdk.brain_observatory.sync_dataset import Dataset
+
+
+ASSUMED_DELAY = 0.0351
+
 
 data_file = resource_filename(__name__, "time_sync_test_data.json")
 test_data = json.load(open(data_file, "r"))
@@ -20,6 +25,24 @@
 MIN_BOUND = .03
 MAX_BOUND = .04
 
+
+class MockSyncDataset(Dataset):
+    """
+    Mock the Dataset class so it doesn't load an h5 file upon
+    initialization.
+    """
+    def __init__(self, data):
+        self.dfile = data
+
+
+def mock_get_real_photodiode_events(data, key):
+    return data
+
+
+def mock_get_events_by_line(line, units="seconds"):
+    return line
+
+
 def calculate_stimulus_alignment(stim_time, valid_twop_vsync_fall):
     stimulus_alignment = np.empty(len(stim_time))
 
@@ -312,7 +335,7 @@ def test_get_corrected_stim_times(stim_data_length, start_delay):
             aligner = ts.OphysTimeAligner("test")
 
     aligner.stim_data_length = stim_data_length
-    with patch.object(ts, "monitor_delay", return_value=ts.ASSUMED_DELAY):
+    with patch.object(ts, "monitor_delay", return_value=ASSUMED_DELAY):
         with patch.object(ts.Dataset, "get_falling_edges",
                           return_value=true_falling) as mock_falling:
             with patch.object(ts.Dataset, "get_rising_edges",
@@ -329,15 +352,15 @@ def test_get_corrected_stim_times(stim_data_length, start_delay):
         assert mock_log.call_count == 2
         assert len(times) == len(true_falling) - 1
         assert delta == len(true_falling) - 1 - stim_data_length
-        assert np.all(times == true_falling[1:] + ts.ASSUMED_DELAY)
+        assert np.all(times == true_falling[1:] + ASSUMED_DELAY)
     elif stim_data_length != len(true_falling):
         mock_rising.assert_called_once()
         mock_log.assert_called_once()
         assert delta == len(true_falling) - stim_data_length
-        assert np.all(times == true_falling + ts.ASSUMED_DELAY)
+        assert np.all(times == true_falling + ASSUMED_DELAY)
     else:
         assert mock_rising.call_count == 0
-        assert np.all(times == true_falling + ts.ASSUMED_DELAY)
+        assert np.all(times == true_falling + ASSUMED_DELAY)
         assert mock_log.call_count == 0
         assert delta == 0
 
@@ -413,23 +436,99 @@ def test_module(input_json):
                            equal_nan=True)
 
 
-@pytest.mark.skipif(data_skip, reason="No sync or data")
-def test_monitor_delay(scientifica_input):
-    sync_file = scientifica_input.pop("sync_file")
-    dset = ts.Dataset(sync_file)
-    stim_times = dset.get_falling_edges("stim_vsync")
-
-    with patch("numpy.mean", side_effect=ValueError()) as mock_mean:
-        delay = ts.monitor_delay(dset, stim_times, "stim_photodiode",
-                                 assumed_delay=20)
-        assert delay == 20
-        mock_mean.assert_called_once()
-
-    with patch.object(ts, "get_real_photodiode_events",
-                      side_effect=IndexError()) as mock_events:
-        delay = ts.monitor_delay(dset, stim_times, "stim_photodiode",
-                                 assumed_delay=30)
-        assert delay == 30
+@pytest.mark.parametrize(
+    "sync_dset,stim_times,transition_interval,expected",
+    [
+        (np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+         np.array([0.99, 1.99, 2.99, 3.99, 4.99]), 1, 0.01),
+        (np.array([1.0, 2.0, 3.0, 4.0]),
+         np.array([0.95, 2.0, 2.95, 4.0]), 1, 0.025),
+        (np.array([1.0]), np.array([1.0]), 1, 0.0)
+    ],
+)
+def test_monitor_delay(sync_dset, stim_times, transition_interval, expected,
+                       monkeypatch):
+    monkeypatch.setattr(ts, "get_real_photodiode_events",
+                        mock_get_real_photodiode_events)
+    pytest.approx(expected, ts.monitor_delay(sync_dset, stim_times, "key",
+                                             transition_interval))
+
+
+@pytest.mark.parametrize(
+    "sync_dset,stim_times,transition_interval",
+    [
+        (np.array([1.0, 2.0, 3.0]), np.array([0.9, 1.9, 2.9]), 1,),    # negative
+        (np.array([1.0, 2.0, 3.0, 4.0]), np.array([1.1, 2.1, 3.1, 4.1]), 1,),  # too big
+    ],
+)
+def test_monitor_delay_raises_error(
+        sync_dset, stim_times, transition_interval,
+        monkeypatch):
+    monkeypatch.setattr(ts, "get_real_photodiode_events",
+                        mock_get_real_photodiode_events)
+    with pytest.raises(ValueError):
+        ts.monitor_delay(sync_dset, stim_times, "key", transition_interval)
+
+
+@pytest.mark.parametrize(
+    "arr,cond,n,expected",
+    [
+        (np.array([1, 1, 1, 2]), lambda x: x < 2, 3, 0),
+        (np.array([2, 1, 1, 1]), lambda x: x < 2, 3, 1),
+        (np.array([1, 2, 2, 1, 1]), lambda x: x >= 1, 2, 0),
+        (np.array([]), lambda x: x < 1, 1, None),
+        (np.array([1, 2, 3]), lambda x: x < 3, 4, None),
+        (np.array([1, 2, 2, 3, 2]), lambda x: x == 2, 3, None),
+    ]
+)
+def test_find_n(arr, cond, n, expected):
+    assert expected == ts._find_n(arr, n, cond)
+
+
+@pytest.mark.parametrize(
+    "arr,cond,n,expected",
+    [
+        (np.array([1, 1, 1, 2]), lambda x: x < 2, 3, 2),
+        (np.array([2, 1, 1, 1]), lambda x: x < 2, 3, 3),
+        (np.array([1, 2, 2, 1, 1]), lambda x: x >= 1, 2, 4),
+        (np.array([]), lambda x: x < 1, 1, None),
+        (np.array([1, 2, 3]), lambda x: x < 3, 4, None),
+        (np.array([1, 2, 2, 3, 2]), lambda x: x == 2, 3, None),
+    ]
+)
+def test_find_last_n(arr, cond, n, expected):
+    assert expected == ts._find_last_n(arr, n, cond)
+
+
+@pytest.mark.parametrize(
+    "sync_dset,expected",
+    [
+        ([0.25, 0.5, 0.75, 1., 2., 3., 5., 5.75], [1., 2., 3.]),
+        ([1., 2., 3., 4.], [1., 2., 3., 4.]),
+        ([0.25, 1., 2., 2.1, 2.2, 3., 4., 5.], [3., 4., 5.]),   # false alarm start
+        ([0.25, 1., 2., 3., 4., 4.5, 5.1, 6.1], [1., 2., 3., 4.]),  # false alarm end
+    ],
+)
+def test_get_photodiode_events(sync_dset, expected, monkeypatch):
+    ds = MockSyncDataset(sync_dset)
+    monkeypatch.setattr(ds, "get_events_by_line", mock_get_events_by_line)
+    np.testing.assert_array_equal(
+        expected, ts.get_photodiode_events(ds, sync_dset))
+
+
+@pytest.mark.parametrize(
+    "sync_dset,",
+    [
+        ([]),
+        ([0.25, 0.25]),
+        ([1., 2.]),
+    ]
+)
+def test_photodiode_events_error_if_none_found(sync_dset, monkeypatch):
+    ds = MockSyncDataset(sync_dset)
+    monkeypatch.setattr(ds, "get_events_by_line", mock_get_events_by_line)
+    with pytest.raises(ValueError):
+        ts.get_photodiode_events(ds, sync_dset)
 
 
 @pytest.mark.parametrize("deserialized_pkl,expected", [
diff --git a/doc_template/index.rst b/doc_template/index.rst
index bd2408059f..e9516d696b 100644
--- a/doc_template/index.rst
+++ b/doc_template/index.rst
@@ -97,6 +97,7 @@ As of the 1.7.0 release:
 
 - Added functionality so internal users can now access `eye_tracking` ellipse fit data from behavior + ophys Session objects
 - Added a new mixin for managing processing parameters for Session objects
+- Update the monitor delay calculation to better handle edge cases; no longer provide a default delay value if encounter an error
 
 What's New - 1.6.0 (March 23, 2020)
 -----------------------------------------------------------------------