updated files and checkiing results on windows

UCSD-E4E · May 11, 2022 · 4893339 · 4893339
1 parent e3fbbba
commit 4893339
Show file tree

Hide file tree

Showing 5 changed files with 201 additions and 43 deletions.
diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
@@ -9,7 +9,7 @@
 
 import torch
 from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
-from .tweetynet_package.tweetynet.Load_data_functions import compute_features
+from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
 
 def build_isolation_parameters(
         technique,
@@ -670,6 +670,7 @@ def generate_automated_labels(
         audio_dir,
         isolation_parameters,
         ml_model = "microfaune",
+        tweety_output = False,
         manual_id="bird",
         weight_path=None,
         Normalized_Sample_Rate=44100,
@@ -761,13 +762,11 @@ def generate_automated_labels(
                 global_score, local_scores = detector.predict(microfaune_features)
             elif ml_model == "tweetynet":
                 #need a function to convert a signal into a spectrogram and then window it
-                tweetynet_features = compute_features(SIGNAL)
+                tweetynet_features = compute_features([SIGNAL])
                 predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path)
-            #print(len(local_scores[0]))
-            #print(local_scores)
-            #print(min(local_scores[0]), max(local_scores[0]))
-        except BaseException:
+        except BaseException as e:
             print("Error in detection, skipping", audio_file)
+            print(e)
             continue
 
         # get duration of clip
@@ -776,22 +775,33 @@ def generate_automated_labels(
         try:
             # Running moment to moment algorithm and appending to a master
             # dataframe.
-            new_entry = isolate(
-                local_scores[0],
-                SIGNAL,
-                SAMPLE_RATE,
-                audio_dir,
-                audio_file,
-                isolation_parameters,
-                manual_id=manual_id,
-                normalize_local_scores=normalize_local_scores)
+            #Add tweetynet without isolation functions here 
+            if tweety_output:
+                    local_scores = [np.array(predictions["pred"].values)]
+                    print(local_scores)
+                    print(predictions)
+                    print("here", audio_file)
+                    predictions.to_csv(audio_file + ".csv")
+                    print("saved_csv")
+                    new_entry = predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, SAMPLE_RATE)
+            else:
+                new_entry = isolate(
+                    local_scores[0],
+                    SIGNAL,
+                    SAMPLE_RATE,
+                    audio_dir,
+                    audio_file,
+                    isolation_parameters,
+                    manual_id=manual_id,
+                    normalize_local_scores=normalize_local_scores)
             # print(new_entry)
             if annotations.empty:
                 annotations = new_entry
             else:
                 annotations = annotations.append(new_entry)
-        except BaseException:
+        except BaseException as e:
             print("Error in isolating bird calls from", audio_file)
+            print(e)
             continue
     # Quick fix to indexing
     annotations.reset_index(inplace=True, drop=True)

diff --git a/PyHa/tweetynet_package/tweetynet/Load_data_functions.py b/PyHa/tweetynet_package/tweetynet/Load_data_functions.py
@@ -175,8 +175,8 @@ def load_signal2spec(signal, SR, n_mels, frame_size, hop_length):
     time_bins = dataset['time_bins']
     return X, Y, uids, time_bins
 
-def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=1024, windowsize=1):
-    x, y, uids, time_bins = load_signal2spec(signal, SR, n_mels, frame_size, hop_length)
+def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=1024, windowsize=2):
+    x, y, uids, time_bins = load_signal2spec(signal[0], SR, n_mels, frame_size, hop_length)
     dataset = window_data(x, y, uids, time_bins, windowsize)
     X = np.array(dataset['X'])
     X = X.reshape(X.shape[0], 1, X.shape[1], X.shape[2])
@@ -186,3 +186,30 @@ def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=10
     UIDS = UIDS.reshape(UIDS.shape[1])
     tweetynet_features = CustomAudioDataset(X, Y, UIDS)
     return tweetynet_features
+
+def predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, sample_rate):
+    time_bin_seconds = predictions.iloc[0]["time_bins"]
+    zero_sorted_filtered_df = predictions[predictions["pred"] == 0]
+    offset = zero_sorted_filtered_df["time_bins"]
+    duration = zero_sorted_filtered_df["time_bins"].diff().shift(-1)
+    intermediary_df = pd.DataFrame({"OFFSET": offset, "DURATION": duration})
+    #need to fill out df. 
+    print("made it")
+    kaliedoscope_df = []
+    if offset.iloc[0] != 0:
+        kaliedoscope_df.append(pd.DataFrame({"OFFSET": [0], "DURATION": [offset.iloc[0]]}))
+    kaliedoscope_df.append(intermediary_df[intermediary_df["DURATION"] >= 2*time_bin_seconds])
+    if offset.iloc[-1] < predictions.iloc[-1]["time_bins"]:
+        kaliedoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]], "DURATION": [predictions.iloc[-1]["time_bins"] + 
+                                predictions.iloc[1]["time_bins"]]}))
+    kaliedoscope_df = pd.concat(kaliedoscope_df)
+    kaliedoscope_df = kaliedoscope_df.reset_index(drop=True)
+    kaliedoscope_df["FOLDER"] = audio_dir
+    kaliedoscope_df["IN FILE"] = audio_file
+    kaliedoscope_df["CHANNEL"] = 0
+    kaliedoscope_df["CLIP LENGTH"] = len(SIGNAL)/sample_rate
+    kaliedoscope_df["SAMPLE RATE"] = sample_rate
+    kaliedoscope_df["MANUAL ID"] = manual_id
+
+    return kaliedoscope_df
+
diff --git a/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py b/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py
@@ -69,37 +69,134 @@ def test_path(self, wav_path, n_mels):
         test_out = self.test_a_file(test_data_loader)
         return test_out
 
-    def predict(self, test_dataset, model_weights=None, norm=False, batch_size=1, window_size=1):
+    def predict(self, test_dataset, model_weights=None, norm=False, batch_size=1, window_size=2):
         if model_weights != None:
             self.model.load_state_dict(torch.load(model_weights))
         else:
-            self.model.load_state_dict(torch.load(r"E:\PyHa\PyHa\tweetynet_package\tweetynet\config\model_weights_test.h5"))
+            self.model.load_state_dict(torch.load(os.path.join("PyHa","tweetynet_package","tweetynet","config","tweetynet_weights.h5"), map_location=torch.device('cpu')))
         test_data_loader = DataLoader(test_dataset, batch_size=batch_size)
         predictions = pd.DataFrame()
         self.model.eval()
         local_score = []
+        dataiter = iter(test_data_loader)
+        _, label, uid = dataiter.next()
+        time_bin = float(window_size)/label.shape[1]
+        st_time = np.array([time_bin*n for n in range(label.shape[1])])
+
         with torch.no_grad():
             for i, data in enumerate(test_data_loader):
                 inputs, labels, uids = data
                 #inputs = inputs.reshape(inputs.shape[0], 1, inputs.shape[0], inputs.shape[1])
                 inputs, labels = inputs.to(self.device), labels.to(self.device)
                 output = self.model(inputs, inputs.shape[0], inputs.shape[0])
+                #Add 0 predictions and 1 predictions
+                ####                
+                #### Ways to interpret the bidirectional output of Tweetynet. 
+                #local_score.extend(np.median(output[0, 0, :], output[0, 1, :]))
+                #local_score.extend(np.mean(output[0, 0, :], output[0, 1, :]))
+                #local_score.extend(np.subtract(output[0, 0, :], output[0, 1, :]))
+                #local_score.extend(np.add(output[0, 0, :], output[0, 1, :]))
                 local_score.extend([x for x in output[0, 1, :]])
                 #add option to normalize
                 #be able to create df if interested
-                pred = torch.argmax(output, dim=1)
+                pred = torch.max(output, dim=1)[1].cpu().detach().numpy()
+                #pred = torch.argmax(output, dim=1)
                 pred = pred.reshape(pred.shape[1])
                 labels = labels.reshape(labels.shape[1])
                 #print(uids.shape, pred.shape, labels.shape)
-                d = {"uid": uids[0], "pred": pred, "label": labels}
+                #print(int(uids[0].split("_")[0])
+                bins = st_time + (int(uids[0].split("_")[0])*window_size)
+                d = {"uid": uids[0], "pred": pred, "label": labels, "time_bins": bins}
                 new_preds = pd.DataFrame(d)
                 predictions = predictions.append(new_preds)
         if norm:
             local_score = normalize(local_score, 0, 1)
         local_score = np.array(local_score)
+        print(local_score.shape)
         predictions["local_score"] = local_score
         return predictions, [local_score]
 
+def testing_step(self, test_loader, hop_length, sr, window_size):
+
+        predictions = pd.DataFrame()
+        self.model.eval()
+
+        st_time = []
+        dataiter = iter(test_loader)
+        label, _, _ = dataiter.next()
+        # print(label.shape)
+        for i in range(label.shape[-1]): # will change to be more general, does it only for one trainfile?
+            st_time.append(get_time(i, hop_length, sr))
+        st_time = np.array(st_time)
+        with torch.no_grad():
+            for i, data in enumerate(test_loader):
+                inputs, labels, uids = data
+                #inputs = inputs.reshape(inputs.shape[0], 1, inputs.shape[1], inputs.shape[2])
+                #print(labels.dtype)
+                #labels = labels.long()
+                #print(labels.dtype)
+
+                inputs, labels = inputs.to(self.device), labels.to(self.device)
+
+                output = self.model(inputs, inputs.shape[0], labels.shape[0]) # what is this output look like?
+                #print(output)
+
+                temp_uids = []
+                files = []
+                window_file = []
+                window_number = []
+                frame_number = []
+                overall_frame_number = []
+                st_batch_times = []
+                if self.binary: # weakly labeled
+                    labels = torch.from_numpy((np.array([[x] * output.shape[-1] for x in labels])))
+                    temp_uids = np.array([[x] * output.shape[-1] for x in uids])
+                    files.append(u)
+                else:  # in the case of strongly labeled data
+                    for u in uids:
+                        st_batch_times.extend(st_time + (window_size*int(u.split("_")[0])))
+                        for j in range(output.shape[-1]):
+                             temp_uids.append(str(j + (output.shape[-1]*int(u.split("_")[0]))) + "_" + u)
+                             window_file.append(u)
+                             frame_number.append(j)
+                             overall_frame_number.append(j+ (output.shape[-1]*int(u.split("_")[0])))
+                             window_number.append(int(u.split("_")[0]))
+                             files.append("_".join(u.split("_")[1:]))
+                    temp_uids = np.array(temp_uids)
+                    window_file = np.array(window_file)
+                    window_number = np.array(window_number)
+                    frame_number = np.array(frame_number)
+                    overall_frame_number = np.array(overall_frame_number)
+                    st_batch_times = np.array(st_batch_times)
+                zero_pred = output[:, 0, :]
+                one_pred = output[:, 1, :]
+
+                pred = torch.argmax(output, dim=1) 
+                d = {"uid": temp_uids.flatten(), "window file": window_file.flatten(), "file":files, 
+                        "overall frame number": overall_frame_number, "frame number": frame_number, "window number": window_number, 
+                        "zero_pred": zero_pred.flatten(), "one_pred": one_pred.flatten(), 
+                        "pred": pred.flatten(),"label": labels.flatten(), "temporal_frame_start_times": st_batch_times.flatten()}
+                new_preds = pd.DataFrame(d)
+                predictions = predictions.append(new_preds)
+
+                #tim = {"temporal_frame_start_times": st_time}
+                #time_secs = pd.DataFrame(tim)
+
+                #nu_time = pd.concat([time_secs]*425, ignore_index=True)
+
+                #extracted_col = nu_time["temporal_frame_start_times"]
+
+                #predictions_timed = predictions.join(extracted_col)
+
+        #predictions = prediction_fix(predictions, label.shape[-1])
+        predictions = predictions.sort_values(["file", "overall frame number"])
+        predictions = predictions.reset_index(drop=True)
+        tim = {"temporal_frame_start_times": st_time}
+        time_secs = pd.DataFrame(tim)
+        print('Finished Testing')
+        return predictions, time_secs
+
+
 def normalize(arr, t_min, t_max):
     norm_arr = []
     diff = t_max - t_min

diff --git a/PyHa/tweetynet_package/tweetynet/config/tweetynet_weights.h5 b/PyHa/tweetynet_package/tweetynet/config/tweetynet_weights.h5
diff --git a/PyHa/visualizations.py b/PyHa/visualizations.py
@@ -7,6 +7,9 @@
 import seaborn as sns
 from .IsoAutio import *
 
+import torch
+from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
+from .tweetynet_package.tweetynet.Load_data_functions import compute_features
 
 def local_line_graph(
         local_scores,
@@ -81,6 +84,7 @@ def local_line_graph(
     fig.suptitle("Spectrogram and Local Scores for " + clip_name)
     # score line plot - top plot
     axs[0].plot(time_stamps, local_scores)
+    #Look into this and their relation.
     axs[0].set_xlim(0, duration)
     if log_scale:
         axs[0].set_yscale('log')
@@ -139,6 +143,8 @@ def local_line_graph(
 
 def local_score_visualization(
         clip_path,
+        ml_model="tweetynet",
+        tweety_output=False,
         weight_path=None,
         premade_annotations_df=None,
         premade_annotations_label="Human Labels",
@@ -205,21 +211,35 @@ def local_score_visualization(
         SIGNAL = SIGNAL.sum(axis=1) / 2
 
     # Initializing the detector to baseline or with retrained weights
-    if weight_path is None:
-        # Microfaune RNNDetector class
-        detector = RNNDetector()
+    if ml_model == "microfaune":
+        if weight_path is None:
+            # Microfaune RNNDetector class
+            detector = RNNDetector()
+        else:
+            try:
+                # Initializing Microfaune hybrid CNN-RNN with new weights
+                detector = RNNDetector(weight_path)
+            except BaseException:
+                print("Error in weight path:", weight_path)
+                return
+    elif ml_model == "tweetynet":
+        device = torch.device('cpu')
+        detector = TweetyNetModel(2, (1, 86, 43), 43, device, binary = False)
     else:
-        try:
-            # Initializing Microfaune hybrid CNN-RNN with new weights
-            detector = RNNDetector(weight_path)
-        except BaseException:
-            print("Error in weight path:", weight_path)
-            return
+        print("model \"{}\" does not exist".format(ml_model))
+        return None
     try:
-        # Computing Mel Spectrogram of the audio clip
-        microfaune_features = detector.compute_features([SIGNAL])
-        # Running the Mel Spectrogram through the RNN
-        global_score, local_score = detector.predict(microfaune_features)
+        if ml_model == "microfaune":
+            # Computing Mel Spectrogram of the audio clip
+            microfaune_features = detector.compute_features([SIGNAL])
+            # Running the Mel Spectrogram through the RNN
+            global_score, local_score = detector.predict(microfaune_features)
+        elif ml_model == "tweetynet":
+            #need a function to convert a signal into a spectrogram and then window it
+            tweetynet_features = compute_features([SIGNAL])
+            predictions, local_score = detector.predict(tweetynet_features, model_weights=weight_path)
+        #if tweety_output:
+            #    local_score = [np.array(predictions["pred"].values)]
     except BaseException:
         print(
             "Skipping " +
@@ -230,14 +250,18 @@ def local_score_visualization(
     if premade_annotations_df is None:
         premade_annotations_df = pd.DataFrame()
     if automated_df:
-        automated_df = isolate(
-            local_score[0],
-            SIGNAL,
-            SAMPLE_RATE,
-            "Doesn't",
-            "Matter",
-            isolation_parameters,
-            normalize_local_scores=normalize_local_scores)
+        if tweety_output:
+                local_scores = [np.array(predictions["pred"].values)]
+                automated_df = predictions_to_kaleidoscope(predictions, SIGNAL, "Doesn't", "Doesn't", "Matter", SAMPLE_RATE)
+        else:
+            automated_df = isolate(
+                local_score[0],
+                SIGNAL,
+                SAMPLE_RATE,
+                "Doesn't",
+                "Matter",
+                isolation_parameters,
+                normalize_local_scores=normalize_local_scores)
     else:
         automated_df = pd.DataFrame()