Merge branch 'tweetynet_integrations' into TweetyNet_integrations_2_main

UCSD-E4E · May 26, 2022 · e55baa3 · e55baa3
2 parents 97faf00 + 5564bd4
commit e55baa3
Show file tree

Hide file tree

Showing 25 changed files with 3,755 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .ipynb_checkpoints
 PyHa/__pycache__
-PyHa/microfaune_package/microfaune/__pycache__
+PyHa/microfaune_package/microfaune/__pycache__
+PyHa/tweetynet_package/tweetynet/__pycache__
diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
@@ -1,5 +1,9 @@
+#from PyHa.tweetynet_package.tweetynet.network import TweetyNet
 from .microfaune_package.microfaune.detection import RNNDetector
 from .microfaune_package.microfaune import audio
+from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
+from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
+import torch
 import pandas as pd
 import scipy.signal as scipy_signal
 import numpy as np
@@ -256,7 +260,6 @@ def steinberg_isolate(
     """
     # calculate original duration
     old_duration = len(SIGNAL) / SAMPLE_RATE
-
     # create entry for audio clip
     entry = {'FOLDER': audio_dir,
              'IN FILE': filename,
@@ -269,7 +272,6 @@ def steinberg_isolate(
     # calculating threshold that will define how labels are created in current
     # audio clip
     thresh = threshold(local_scores, isolation_parameters)
-
     # how many samples one local score represents
     samples_per_score = len(SIGNAL) // len(local_scores)
 
@@ -316,12 +318,12 @@ def steinberg_isolate(
             # sub-clip numpy array
             isolated_samples = np.append(
                 isolated_samples, SIGNAL[lo_idx:hi_idx])
-
     entry = pd.DataFrame.from_dict(entry)
     # TODO, when you go through the process of rebuilding this isolate function
     # as a potential optimization problem
     # rework the algorithm so that it builds the dataframe correctly to save
     # time.
+
     OFFSET = entry['OFFSET'].str[0]
     DURATION = entry['OFFSET'].str[1]
     DURATION = DURATION - OFFSET
@@ -668,6 +670,8 @@ def chunk_isolate(
 def generate_automated_labels_microfaune(
         audio_dir,
         isolation_parameters,
+        ml_model = "microfaune",
+        tweety_output = False,
         manual_id="bird",
         weight_path=None,
         normalized_sample_rate=44100,
@@ -703,11 +707,19 @@ def generate_automated_labels_microfaune(
     # Use Default Microfaune Detector
     # TODO
     # Expand to neural networks beyond just microfaune
-    if weight_path is None:
-        detector = RNNDetector()
-    # Use Custom weights for Microfaune Detector
+    #Add flag to work for creating tweetynet model.
+    if ml_model == "microfaune":
+        if weight_path is None:
+            detector = RNNDetector()
+        # Use Custom weights for Microfaune Detector
+        else:
+            detector = RNNDetector(weight_path)
+    elif ml_model == "tweetynet":
+        device = torch.device('cpu')
+        detector = TweetyNetModel(2, (1, 86, 86), 86, device, binary = False)
     else:
-        detector = RNNDetector(weight_path)
+        print("model \"{}\" does not exist".format(ml_model))
+        return None
 
     # init labels dataframe
     annotations = pd.DataFrame()
@@ -747,13 +759,19 @@ def generate_automated_labels_microfaune(
         # Might want to compare to just taking the first set of data.
         if len(SIGNAL.shape) == 2:
             SIGNAL = SIGNAL.sum(axis=1) / 2
-
         # detection
         try:
-            microfaune_features = detector.compute_features([SIGNAL])
-            global_score, local_scores = detector.predict(microfaune_features)
-        except BaseException:
+            #Add flag to work with creating features for tweetynet.
+            if ml_model == "microfaune":
+                microfaune_features = detector.compute_features([SIGNAL])
+                global_score, local_scores = detector.predict(microfaune_features)
+            elif ml_model == "tweetynet":
+                #need a function to convert a signal into a spectrogram and then window it
+                tweetynet_features = compute_features([SIGNAL])
+                predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path)
+        except BaseException as e:
             print("Error in detection, skipping", audio_file)
+            print(e)
             continue
 
         # get duration of clip
@@ -762,22 +780,33 @@ def generate_automated_labels_microfaune(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe.
-            new_entry = isolate(
-                local_scores[0],
-                SIGNAL,
-                SAMPLE_RATE,
-                audio_dir,
-                audio_file,
-                isolation_parameters,
-                manual_id=manual_id,
-                normalize_local_scores=normalize_local_scores)
+            #Add tweetynet without isolation functions here 
+            if tweety_output:
+                    local_scores = [np.array(predictions["pred"].values)]
+                    print(local_scores)
+                    print(predictions)
+                    print("here", audio_file)
+                    predictions.to_csv(audio_file + ".csv")
+                    print("saved_csv")
+                    new_entry = predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, SAMPLE_RATE)
+            else:
+                new_entry = isolate(
+                    local_scores[0],
+                    SIGNAL,
+                    SAMPLE_RATE,
+                    audio_dir,
+                    audio_file,
+                    isolation_parameters,
+                    manual_id=manual_id,
+                    normalize_local_scores=normalize_local_scores)
             # print(new_entry)
             if annotations.empty:
                 annotations = new_entry
             else:
                 annotations = annotations.append(new_entry)
-        except BaseException:
+        except BaseException as e:
             print("Error in isolating bird calls from", audio_file)
+            print(e)
             continue
     # Quick fix to indexing
     annotations.reset_index(inplace=True, drop=True)
@@ -917,8 +946,6 @@ def generate_automated_labels(
     #     return None
     return annotations
 
-
-
 def kaleidoscope_conversion(df):
     """
     Function that strips away Pandas Dataframe columns necessary for PyHa
@@ -969,3 +996,146 @@ def kaleidoscope_conversion(df):
 
 
 #        annotation = annotation + annotation_chain_count - 1
+
+def generate_automated_labels_final(audio_dir,
+        isolation_parameters,
+        tweety_output = False,
+        manual_id="bird",
+        weight_path=None,
+        Normalized_Sample_Rate=44100,
+        normalize_local_scores=False):
+    return generate_automated_labels_tweetynet(
+        audio_dir,
+        isolation_parameters,
+        tweety_output,
+        manual_id,
+        weight_path,
+        Normalized_Sample_Rate,
+        normalize_local_scores)
+
+def generate_automated_labels_tweetynet(
+        audio_dir,
+        isolation_parameters,
+        tweety_output = False,
+        manual_id="bird",
+        weight_path=None,
+        Normalized_Sample_Rate=44100,
+        normalize_local_scores=False):
+    """
+    Function that applies isolation technique determined by
+    isolation_parameters dictionary across a folder of audio clips.
+
+    Args:
+        audio_dir (string)
+            - Directory with wav audio files.
+
+        isolation_parameters (dict)
+            - Python Dictionary that controls the various label creation
+              techniques.
+        
+        tweety_output (boolean) # may want to incorporate into isolation parameters
+            - True to use tweetynet's original output, or False to use 
+              isolation techniques.
+
+        manual_id (string)
+            - controls the name of the class written to the pandas dataframe.
+
+        weight_path (string)
+            - File path of weights to be used by TweetyNet for
+              determining presence of bird sounds.
+
+        Normalized_Sample_Rate (int)
+            - Sampling rate that the audio files should all be normalized to.
+
+        normalize_local_scores (boolean) # may want to incorporate into isolation parameters
+            - Flag to normalize the local scores.
+
+    Returns:
+        Dataframe of automated labels for the audio clips in audio_dir.
+    """
+
+    # init detector
+    # Use Default Microfaune Detector
+    # TODO
+    # Expand to neural networks beyond just microfaune
+    #Add flag to work for creating tweetynet model.
+    device = torch.device('cpu')
+    detector = TweetyNetModel(2, (1, 86, 86), 86, device)
+
+    # init labels dataframe
+    annotations = pd.DataFrame()
+    # generate local scores for every bird file in chosen directory
+    for audio_file in os.listdir(audio_dir):
+        # skip directories
+        if os.path.isdir(audio_dir + audio_file):
+            continue
+
+        # It is a bit awkward here to be relying on Microfaune's wave file
+        # reading when we want to expand to other frameworks,
+        # Likely want to change that in the future. Librosa had some troubles.
+
+        # Reading in the wave audio files
+        try:
+            SAMPLE_RATE, SIGNAL = audio.load_wav(audio_dir + audio_file)
+        except BaseException:
+            print("Failed to load", audio_file)
+            continue
+
+        # downsample the audio if the sample rate isn't 44.1 kHz
+        # Force everything into the human hearing range.
+        # May consider reworking this function so that it upsamples as well
+        if SAMPLE_RATE != Normalized_Sample_Rate:
+            rate_ratio = Normalized_Sample_Rate / SAMPLE_RATE
+            SIGNAL = scipy_signal.resample(
+                SIGNAL, int(len(SIGNAL) * rate_ratio))
+            SAMPLE_RATE = Normalized_Sample_Rate
+            # resample produces unreadable float32 array so convert back
+            # SIGNAL = np.asarray(SIGNAL, dtype=np.int16)
+
+        # convert stereo to mono if needed
+        # Might want to compare to just taking the first set of data.
+        if len(SIGNAL.shape) == 2:
+            SIGNAL = SIGNAL.sum(axis=1) / 2
+        # detection
+        try:
+            #Add flag to work with creating features for tweetynet.
+            tweetynet_features = compute_features([SIGNAL])
+            predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path)
+        except BaseException as e:
+            print("Error in detection, skipping", audio_file)
+            print(e)
+            continue
+
+        try:
+            # Running moment to moment algorithm and appending to a master
+            # dataframe.
+            #Add tweetynet without isolation functions here 
+            if tweety_output:
+                new_entry = predictions_to_kaleidoscope(
+                    predictions, 
+                    SIGNAL, 
+                    audio_dir, 
+                    audio_file, 
+                    manual_id, 
+                    SAMPLE_RATE)
+            else:
+                new_entry = isolate(
+                    local_scores[0],
+                    SIGNAL,
+                    SAMPLE_RATE,
+                    audio_dir,
+                    audio_file,
+                    isolation_parameters,
+                    manual_id=manual_id,
+                    normalize_local_scores=normalize_local_scores)
+            # print(new_entry)
+            if annotations.empty:
+                annotations = new_entry
+            else:
+                annotations = annotations.append(new_entry)
+        except BaseException:
+            print("Error in isolating bird calls from", audio_file)
+            continue
+    # Quick fix to indexing
+    annotations.reset_index(inplace=True, drop=True)
+    return annotations
diff --git a/PyHa/tweetynet_package/tweetynet/CustomAudioDataset.py b/PyHa/tweetynet_package/tweetynet/CustomAudioDataset.py
@@ -0,0 +1,25 @@
+from torch.utils.data import Dataset
+
+class CustomAudioDataset(Dataset):
+    def __init__(self, spec, annotations, uids, transform=None, target_transform=None):
+        self.img_labels = annotations
+        self.spec = spec
+        self.uids = uids
+        self.transform = transform
+        self.target_transform = target_transform
+
+    def __len__(self):
+        return len(self.img_labels)
+
+    def __getitem__(self, idx):
+        #img_path = os.path.join(self.img_dir, self.img_labels[idx, 0])
+        #Read_audio
+        image = self.spec[idx]#read_image(img_path)
+        label = self.img_labels[idx]
+        # transform should be spectrogram from librosa # unless we do that beforehand.
+        if self.transform:
+            image = self.transform(image)
+        if self.target_transform:
+            label = self.target_transform(label)
+        return image, label, self.uids[idx]
+