From 8a03ffd6bc32912faba2c0293c17789a726d626c Mon Sep 17 00:00:00 2001
From: Jacob <jgayers@ucsd.edu>
Date: Sat, 27 Mar 2021 14:25:03 -0700
Subject: [PATCH] Added in new "chunk" isolation technique - New technique
 should be handy in situations where audio annotations are a discrete length.
 Cases such as BirdNET outputs as well as the BirdCLEF2020 labels. - Also
 handled a bug related to IoU scores in bird_label_score()

---
 microfaune_local_score.py | 66 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/microfaune_local_score.py b/microfaune_local_score.py
index 2517627..2540669 100644
--- a/microfaune_local_score.py
+++ b/microfaune_local_score.py
@@ -36,6 +36,8 @@ def isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename,isolation_par
         isolation_df = steinberg_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename,isolation_parameters, manual_id = "bird")
     elif isolation_parameters["technique"] == "stack":
         isolation_df = stack_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isolation_parameters, manual_id = "bird")
+    elif isolation_parameters["technique"] == "chunk":
+        isolation_df = chunk_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isolation_parameters, manual_id = "bird")
 
     return isolation_df
 
@@ -293,6 +295,67 @@ def stack_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isolat
     # returning pandas dataframe from dictionary constructed with all of the annotations
     return pd.DataFrame.from_dict(entry)
 
+# Isolation technique that breaks down an audio clip into chunks based on a user-defined duration. It then goes through and finds the max local score
+# in those chunks to decide whether or not a chunk contains the vocalization of interest.
+# TODO
+# Make it so that a user has the option of an overlap between the chunks.
+# Make it so that a user can choose how many samples have to be above the threshold in order to consider a chunk to be good or not.
+# Give the option to combine annotations that follow one-another.
+def chunk_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isolation_parameters, manual_id = "bird"):
+    # configuring the threshold based on isolation parameters
+    if isolation_parameters["threshold_type"] == "median":
+        thresh = np.median(local_scores) * isolation_parameters["threshold_const"]
+    elif isolation_parameters["threshold_type"] == "mean" or isolation_parameters["threshold_type"] == "average":
+        thresh = np.mean(local_scores) * isolation_parameters["threshold_const"]
+    elif isolation_parameters["threshold_type"] == "standard deviation":
+        thresh = np.mean(local_scores) + (np.std(local_scores) * isolation_parameters["threshold_const"])
+    elif isolation_parameters["threshold_type"] == "pure":
+        thresh = isolation_parameters["threshold_const"]
+        if thresh < 0:
+            print("Threshold is less than zero, setting to zero")
+            thresh = 0
+        elif thresh > 1:
+            print("Threshold is greater than one, setting to one.")
+            thresh = 1
+
+    # calculate original duration
+    old_duration = len(SIGNAL) / SAMPLE_RATE
+
+    # initializing the dictionary for the output pandas dataframe
+    entry = {'FOLDER'  : audio_dir,
+             'IN FILE'    : filename,
+             'CHANNEL' : 0,
+             'CLIP LENGTH': old_duration,
+             'SAMPLE RATE': SAMPLE_RATE,
+             'OFFSET'  : [],
+             'DURATION' : [],
+             'MANUAL ID'  : manual_id}
+
+    # calculating the number of chunks that define an audio clip
+    chunk_count = math.ceil(len(SIGNAL)/(isolation_parameters["chunk_size"]*SAMPLE_RATE))
+    # calculating the number of local scores per second
+    scores_per_second = len(local_scores)/old_duration
+    # calculating the chunk size with respect to the local score array
+    local_scores_per_chunk = scores_per_second * isolation_parameters["chunk_size"]
+    # looping through each chunk
+    for ndx in range(chunk_count):
+        # finding the start of a chunk
+        chunk_start = ndx*local_scores_per_chunk
+        # finding the end of a chunk
+        chunk_end = min((ndx+1)*local_scores_per_chunk,len(local_scores))
+        # breaking up the local_score array into a chunk.
+        chunk = local_scores[int(chunk_start):int(chunk_end)]
+        # comparing the largest local score value to the treshold.
+        # the case for if we label the chunk as an annotation
+        if max(chunk) >= thresh:
+            annotation_start = chunk_start/scores_per_second
+            annotation_end = chunk_end/scores_per_second
+            entry["OFFSET"].append(annotation_start)
+            entry["DURATION"].append(annotation_end - annotation_start)
+
+    return pd.DataFrame.from_dict(entry)
+
+
 
 ## Function that applies the moment to moment labeling system to a directory full of wav files.
 def generate_automated_labels(bird_dir, isolation_parameters, weight_path=None, Normalized_Sample_Rate = 44100, normalize_local_scores = False):
@@ -310,6 +373,8 @@ def generate_automated_labels(bird_dir, isolation_parameters, weight_path=None,
 
     # init detector
     # Use Default Microfaune Detector
+    # TODO
+    # Expand to neural networks beyond just microfaune
     if weight_path is None:
         detector = RNNDetector()
     # Use Custom weights for Microfaune Detector
@@ -596,6 +661,7 @@ def bird_label_scores(automated_df,human_df,plot_fig = False, save_fig = False):
         f1 = 0
         precision = 0
         recall = 0
+        IoU = 0
 
     # Creating a Dictionary which will be turned into a Pandas Dataframe
     entry = {'FOLDER'  : folder_name,