diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py index 7968fa1..55a595a 100644 --- a/PyHa/IsoAutio.py +++ b/PyHa/IsoAutio.py @@ -9,7 +9,7 @@ import torch from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel -from .tweetynet_package.tweetynet.Load_data_functions import compute_features +from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope def build_isolation_parameters( technique, @@ -670,6 +670,7 @@ def generate_automated_labels( audio_dir, isolation_parameters, ml_model = "microfaune", + tweety_output = False, manual_id="bird", weight_path=None, Normalized_Sample_Rate=44100, @@ -761,13 +762,11 @@ def generate_automated_labels( global_score, local_scores = detector.predict(microfaune_features) elif ml_model == "tweetynet": #need a function to convert a signal into a spectrogram and then window it - tweetynet_features = compute_features(SIGNAL) + tweetynet_features = compute_features([SIGNAL]) predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path) - #print(len(local_scores[0])) - #print(local_scores) - #print(min(local_scores[0]), max(local_scores[0])) - except BaseException: + except BaseException as e: print("Error in detection, skipping", audio_file) + print(e) continue # get duration of clip @@ -776,22 +775,33 @@ def generate_automated_labels( try: # Running moment to moment algorithm and appending to a master # dataframe. - new_entry = isolate( - local_scores[0], - SIGNAL, - SAMPLE_RATE, - audio_dir, - audio_file, - isolation_parameters, - manual_id=manual_id, - normalize_local_scores=normalize_local_scores) + #Add tweetynet without isolation functions here + if tweety_output: + local_scores = [np.array(predictions["pred"].values)] + print(local_scores) + print(predictions) + print("here", audio_file) + predictions.to_csv(audio_file + ".csv") + print("saved_csv") + new_entry = predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, SAMPLE_RATE) + else: + new_entry = isolate( + local_scores[0], + SIGNAL, + SAMPLE_RATE, + audio_dir, + audio_file, + isolation_parameters, + manual_id=manual_id, + normalize_local_scores=normalize_local_scores) # print(new_entry) if annotations.empty: annotations = new_entry else: annotations = annotations.append(new_entry) - except BaseException: + except BaseException as e: print("Error in isolating bird calls from", audio_file) + print(e) continue # Quick fix to indexing annotations.reset_index(inplace=True, drop=True) diff --git a/PyHa/tweetynet_package/tweetynet/Load_data_functions.py b/PyHa/tweetynet_package/tweetynet/Load_data_functions.py index eba7967..56d744f 100644 --- a/PyHa/tweetynet_package/tweetynet/Load_data_functions.py +++ b/PyHa/tweetynet_package/tweetynet/Load_data_functions.py @@ -175,8 +175,8 @@ def load_signal2spec(signal, SR, n_mels, frame_size, hop_length): time_bins = dataset['time_bins'] return X, Y, uids, time_bins -def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=1024, windowsize=1): - x, y, uids, time_bins = load_signal2spec(signal, SR, n_mels, frame_size, hop_length) +def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=1024, windowsize=2): + x, y, uids, time_bins = load_signal2spec(signal[0], SR, n_mels, frame_size, hop_length) dataset = window_data(x, y, uids, time_bins, windowsize) X = np.array(dataset['X']) X = X.reshape(X.shape[0], 1, X.shape[1], X.shape[2]) @@ -186,3 +186,30 @@ def compute_features(signal, SR=44100, n_mels=86, frame_size=2048, hop_length=10 UIDS = UIDS.reshape(UIDS.shape[1]) tweetynet_features = CustomAudioDataset(X, Y, UIDS) return tweetynet_features + +def predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, sample_rate): + time_bin_seconds = predictions.iloc[0]["time_bins"] + zero_sorted_filtered_df = predictions[predictions["pred"] == 0] + offset = zero_sorted_filtered_df["time_bins"] + duration = zero_sorted_filtered_df["time_bins"].diff().shift(-1) + intermediary_df = pd.DataFrame({"OFFSET": offset, "DURATION": duration}) + #need to fill out df. + print("made it") + kaliedoscope_df = [] + if offset.iloc[0] != 0: + kaliedoscope_df.append(pd.DataFrame({"OFFSET": [0], "DURATION": [offset.iloc[0]]})) + kaliedoscope_df.append(intermediary_df[intermediary_df["DURATION"] >= 2*time_bin_seconds]) + if offset.iloc[-1] < predictions.iloc[-1]["time_bins"]: + kaliedoscope_df.append(pd.DataFrame({"OFFSET": [offset.iloc[-1]], "DURATION": [predictions.iloc[-1]["time_bins"] + + predictions.iloc[1]["time_bins"]]})) + kaliedoscope_df = pd.concat(kaliedoscope_df) + kaliedoscope_df = kaliedoscope_df.reset_index(drop=True) + kaliedoscope_df["FOLDER"] = audio_dir + kaliedoscope_df["IN FILE"] = audio_file + kaliedoscope_df["CHANNEL"] = 0 + kaliedoscope_df["CLIP LENGTH"] = len(SIGNAL)/sample_rate + kaliedoscope_df["SAMPLE RATE"] = sample_rate + kaliedoscope_df["MANUAL ID"] = manual_id + + return kaliedoscope_df + \ No newline at end of file diff --git a/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py b/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py index e446c1f..890fd2b 100644 --- a/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py +++ b/PyHa/tweetynet_package/tweetynet/TweetyNetModel.py @@ -69,37 +69,134 @@ def test_path(self, wav_path, n_mels): test_out = self.test_a_file(test_data_loader) return test_out - def predict(self, test_dataset, model_weights=None, norm=False, batch_size=1, window_size=1): + def predict(self, test_dataset, model_weights=None, norm=False, batch_size=1, window_size=2): if model_weights != None: self.model.load_state_dict(torch.load(model_weights)) else: - self.model.load_state_dict(torch.load(r"E:\PyHa\PyHa\tweetynet_package\tweetynet\config\model_weights_test.h5")) + self.model.load_state_dict(torch.load(os.path.join("PyHa","tweetynet_package","tweetynet","config","tweetynet_weights.h5"), map_location=torch.device('cpu'))) test_data_loader = DataLoader(test_dataset, batch_size=batch_size) predictions = pd.DataFrame() self.model.eval() local_score = [] + dataiter = iter(test_data_loader) + _, label, uid = dataiter.next() + time_bin = float(window_size)/label.shape[1] + st_time = np.array([time_bin*n for n in range(label.shape[1])]) + with torch.no_grad(): for i, data in enumerate(test_data_loader): inputs, labels, uids = data #inputs = inputs.reshape(inputs.shape[0], 1, inputs.shape[0], inputs.shape[1]) inputs, labels = inputs.to(self.device), labels.to(self.device) output = self.model(inputs, inputs.shape[0], inputs.shape[0]) + #Add 0 predictions and 1 predictions + #### + #### Ways to interpret the bidirectional output of Tweetynet. + #local_score.extend(np.median(output[0, 0, :], output[0, 1, :])) + #local_score.extend(np.mean(output[0, 0, :], output[0, 1, :])) + #local_score.extend(np.subtract(output[0, 0, :], output[0, 1, :])) + #local_score.extend(np.add(output[0, 0, :], output[0, 1, :])) local_score.extend([x for x in output[0, 1, :]]) #add option to normalize #be able to create df if interested - pred = torch.argmax(output, dim=1) + pred = torch.max(output, dim=1)[1].cpu().detach().numpy() + #pred = torch.argmax(output, dim=1) pred = pred.reshape(pred.shape[1]) labels = labels.reshape(labels.shape[1]) #print(uids.shape, pred.shape, labels.shape) - d = {"uid": uids[0], "pred": pred, "label": labels} + #print(int(uids[0].split("_")[0]) + bins = st_time + (int(uids[0].split("_")[0])*window_size) + d = {"uid": uids[0], "pred": pred, "label": labels, "time_bins": bins} new_preds = pd.DataFrame(d) predictions = predictions.append(new_preds) if norm: local_score = normalize(local_score, 0, 1) local_score = np.array(local_score) + print(local_score.shape) predictions["local_score"] = local_score return predictions, [local_score] +def testing_step(self, test_loader, hop_length, sr, window_size): + + predictions = pd.DataFrame() + self.model.eval() + + st_time = [] + dataiter = iter(test_loader) + label, _, _ = dataiter.next() + # print(label.shape) + for i in range(label.shape[-1]): # will change to be more general, does it only for one trainfile? + st_time.append(get_time(i, hop_length, sr)) + st_time = np.array(st_time) + with torch.no_grad(): + for i, data in enumerate(test_loader): + inputs, labels, uids = data + #inputs = inputs.reshape(inputs.shape[0], 1, inputs.shape[1], inputs.shape[2]) + #print(labels.dtype) + #labels = labels.long() + #print(labels.dtype) + + inputs, labels = inputs.to(self.device), labels.to(self.device) + + output = self.model(inputs, inputs.shape[0], labels.shape[0]) # what is this output look like? + #print(output) + + temp_uids = [] + files = [] + window_file = [] + window_number = [] + frame_number = [] + overall_frame_number = [] + st_batch_times = [] + if self.binary: # weakly labeled + labels = torch.from_numpy((np.array([[x] * output.shape[-1] for x in labels]))) + temp_uids = np.array([[x] * output.shape[-1] for x in uids]) + files.append(u) + else: # in the case of strongly labeled data + for u in uids: + st_batch_times.extend(st_time + (window_size*int(u.split("_")[0]))) + for j in range(output.shape[-1]): + temp_uids.append(str(j + (output.shape[-1]*int(u.split("_")[0]))) + "_" + u) + window_file.append(u) + frame_number.append(j) + overall_frame_number.append(j+ (output.shape[-1]*int(u.split("_")[0]))) + window_number.append(int(u.split("_")[0])) + files.append("_".join(u.split("_")[1:])) + temp_uids = np.array(temp_uids) + window_file = np.array(window_file) + window_number = np.array(window_number) + frame_number = np.array(frame_number) + overall_frame_number = np.array(overall_frame_number) + st_batch_times = np.array(st_batch_times) + zero_pred = output[:, 0, :] + one_pred = output[:, 1, :] + + pred = torch.argmax(output, dim=1) + d = {"uid": temp_uids.flatten(), "window file": window_file.flatten(), "file":files, + "overall frame number": overall_frame_number, "frame number": frame_number, "window number": window_number, + "zero_pred": zero_pred.flatten(), "one_pred": one_pred.flatten(), + "pred": pred.flatten(),"label": labels.flatten(), "temporal_frame_start_times": st_batch_times.flatten()} + new_preds = pd.DataFrame(d) + predictions = predictions.append(new_preds) + + #tim = {"temporal_frame_start_times": st_time} + #time_secs = pd.DataFrame(tim) + + #nu_time = pd.concat([time_secs]*425, ignore_index=True) + + #extracted_col = nu_time["temporal_frame_start_times"] + + #predictions_timed = predictions.join(extracted_col) + + #predictions = prediction_fix(predictions, label.shape[-1]) + predictions = predictions.sort_values(["file", "overall frame number"]) + predictions = predictions.reset_index(drop=True) + tim = {"temporal_frame_start_times": st_time} + time_secs = pd.DataFrame(tim) + print('Finished Testing') + return predictions, time_secs + + def normalize(arr, t_min, t_max): norm_arr = [] diff = t_max - t_min diff --git a/PyHa/tweetynet_package/tweetynet/config/tweetynet_weights.h5 b/PyHa/tweetynet_package/tweetynet/config/tweetynet_weights.h5 new file mode 100644 index 0000000..277da3c Binary files /dev/null and b/PyHa/tweetynet_package/tweetynet/config/tweetynet_weights.h5 differ diff --git a/PyHa/visualizations.py b/PyHa/visualizations.py index 3f35980..cdadf3e 100644 --- a/PyHa/visualizations.py +++ b/PyHa/visualizations.py @@ -7,6 +7,9 @@ import seaborn as sns from .IsoAutio import * +import torch +from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel +from .tweetynet_package.tweetynet.Load_data_functions import compute_features def local_line_graph( local_scores, @@ -81,6 +84,7 @@ def local_line_graph( fig.suptitle("Spectrogram and Local Scores for " + clip_name) # score line plot - top plot axs[0].plot(time_stamps, local_scores) + #Look into this and their relation. axs[0].set_xlim(0, duration) if log_scale: axs[0].set_yscale('log') @@ -139,6 +143,8 @@ def local_line_graph( def local_score_visualization( clip_path, + ml_model="tweetynet", + tweety_output=False, weight_path=None, premade_annotations_df=None, premade_annotations_label="Human Labels", @@ -205,21 +211,35 @@ def local_score_visualization( SIGNAL = SIGNAL.sum(axis=1) / 2 # Initializing the detector to baseline or with retrained weights - if weight_path is None: - # Microfaune RNNDetector class - detector = RNNDetector() + if ml_model == "microfaune": + if weight_path is None: + # Microfaune RNNDetector class + detector = RNNDetector() + else: + try: + # Initializing Microfaune hybrid CNN-RNN with new weights + detector = RNNDetector(weight_path) + except BaseException: + print("Error in weight path:", weight_path) + return + elif ml_model == "tweetynet": + device = torch.device('cpu') + detector = TweetyNetModel(2, (1, 86, 43), 43, device, binary = False) else: - try: - # Initializing Microfaune hybrid CNN-RNN with new weights - detector = RNNDetector(weight_path) - except BaseException: - print("Error in weight path:", weight_path) - return + print("model \"{}\" does not exist".format(ml_model)) + return None try: - # Computing Mel Spectrogram of the audio clip - microfaune_features = detector.compute_features([SIGNAL]) - # Running the Mel Spectrogram through the RNN - global_score, local_score = detector.predict(microfaune_features) + if ml_model == "microfaune": + # Computing Mel Spectrogram of the audio clip + microfaune_features = detector.compute_features([SIGNAL]) + # Running the Mel Spectrogram through the RNN + global_score, local_score = detector.predict(microfaune_features) + elif ml_model == "tweetynet": + #need a function to convert a signal into a spectrogram and then window it + tweetynet_features = compute_features([SIGNAL]) + predictions, local_score = detector.predict(tweetynet_features, model_weights=weight_path) + #if tweety_output: + # local_score = [np.array(predictions["pred"].values)] except BaseException: print( "Skipping " + @@ -230,14 +250,18 @@ def local_score_visualization( if premade_annotations_df is None: premade_annotations_df = pd.DataFrame() if automated_df: - automated_df = isolate( - local_score[0], - SIGNAL, - SAMPLE_RATE, - "Doesn't", - "Matter", - isolation_parameters, - normalize_local_scores=normalize_local_scores) + if tweety_output: + local_scores = [np.array(predictions["pred"].values)] + automated_df = predictions_to_kaleidoscope(predictions, SIGNAL, "Doesn't", "Doesn't", "Matter", SAMPLE_RATE) + else: + automated_df = isolate( + local_score[0], + SIGNAL, + SAMPLE_RATE, + "Doesn't", + "Matter", + isolation_parameters, + normalize_local_scores=normalize_local_scores) else: automated_df = pd.DataFrame()