From f916a0c6b6963854f4bba7b79e7670414b0166be Mon Sep 17 00:00:00 2001
From: Jacob <jgayers@ucsd.edu>
Date: Sat, 20 Feb 2021 22:36:10 -0800
Subject: [PATCH] Added comments and improved readability of isolate Laid out
 the plan for changes to be made to isolate function in order to perform
 gradient descent.

---
 microfaune_local_score.py | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/microfaune_local_score.py b/microfaune_local_score.py
index b3e086d..55d8b23 100644
--- a/microfaune_local_score.py
+++ b/microfaune_local_score.py
@@ -17,44 +17,53 @@
 
 # Gabriel's original moment-to-moment classification tool. Reworked to output
 # a Pandas DataFrame.
-def isolate(scores, samples, sample_rate, audio_dir, filename):
+# TODO rework isolate in a way that allows a user to input a dictionary that where they can modulate different
+# parameters involved in Gabriel's algorithm. We can set the default of this dict to be what he originally chose for now.
+# Some ideas for how to change the parameters are to allow for different modification of the threshold. We would want to be able
+# to modify the bird presence threshold to be a pure value. This will allow us to build ROC curves. Another would be to allow for a
+# selection of how many standard deviations away from the mean. Another would be, instead of a median, allow standard deviation and mean as
+# alternatives. Another option would be to allow for curve smoothing on the local score array that is being passed in. This could come in
+# the form of a high order polynomial fit or possibly testing out my curve smoothing algorithm that uses a bell-curved distribution to
+# loop around and average each sample with its surrounding samples over many iterations. We could also play around with filtering.
+
+def isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename):
     # calculate original duration
-    old_duration = len(samples) / sample_rate
+    old_duration = len(SIGNAL) / SAMPLE_RATE
 
     # create entry for audio clip
     entry = {'FOLDER'  : audio_dir,
              'IN FILE'    : filename,
              'CHANNEL' : 0,
              'CLIP LENGTH': old_duration,
-             'SAMPLE RATE': sample_rate,
+             'SAMPLE RATE': SAMPLE_RATE,
              'OFFSET'  : [],
              'MANUAL ID'  : []}
 
     # Variable to modulate when encapsulating this function.
     # treshold is 'thresh_mult' times above median score value
     thresh_mult = 2
-    thresh = np.median(scores) * thresh_mult
+    thresh = np.median(local_scores) * thresh_mult
 
 
     # how many samples one score represents
     # Scores meaning local scores
-    samples_per_score = len(samples) // len(scores)
+    samples_per_score = len(SIGNAL) // len(local_scores)
 
     # isolate samples that produce a score above thresh
     isolated_samples = np.empty(0, dtype=np.int16)
     prev_cap = 0        # sample idx of previously captured
-    for i in range(len(scores)):
+    for i in range(len(local_scores)):
         # if a score hits or surpasses thresh, capture 1s on both sides of it
-        if scores[i] >= thresh:
+        if local_scores[i] >= thresh:
             # score_pos is the sample index that the score corresponds to
             score_pos = i * samples_per_score
 
             # upper and lower bound of captured call
             # sample rate is # of samples in 1 second: +-1 second
-            lo_idx = max(0, score_pos - sample_rate)
-            hi_idx = min(len(samples), score_pos + sample_rate)
-            lo_time = lo_idx / sample_rate
-            hi_time = hi_idx / sample_rate
+            lo_idx = max(0, score_pos - SAMPLE_RATE)
+            hi_idx = min(len(SIGNAL), score_pos + SAMPLE_RATE)
+            lo_time = lo_idx / SAMPLE_RATE
+            hi_time = hi_idx / SAMPLE_RATE
 
             # calculate start and end stamps
             # create new sample if not overlapping or if first stamp
@@ -72,7 +81,7 @@ def isolate(scores, samples, sample_rate, audio_dir, filename):
 
             # add to isolated samples
             # sub-clip numpy array
-            isolated_samples = np.append(isolated_samples,samples[lo_idx:hi_idx])
+            isolated_samples = np.append(isolated_samples,SIGNAL[lo_idx:hi_idx])
 
 
     entry = pd.DataFrame.from_dict(entry)