diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py index 9ead972..742df2c 100644 --- a/PyHa/IsoAutio.py +++ b/PyHa/IsoAutio.py @@ -250,6 +250,7 @@ def simple_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isola annotation_start = 0 call_start = 0 + call_stop = 0 # looping through all of the local scores for ndx in range(len(local_scores)): current_score = local_scores[ndx] @@ -333,6 +334,7 @@ def stack_isolate(local_scores, SIGNAL, SAMPLE_RATE, audio_dir, filename, isolat stack_counter = 0 annotation_start = 0 call_start = 0 + call_stop = 0 # looping through every local score array value for ndx in range(len(local_scores)): # the case for the end of the local score array and the stack isn't empty. @@ -515,6 +517,8 @@ def generate_automated_labels(audio_dir, isolation_parameters, manual_id = "bird print("Error in detection, skipping", audio_file) continue + # get duration of clip + duration = len(SIGNAL) / SAMPLE_RATE try: # Running moment to moment algorithm and appending to a master dataframe. diff --git a/PyHa/statistics.py b/PyHa/statistics.py index 4233754..d0ea63b 100644 --- a/PyHa/statistics.py +++ b/PyHa/statistics.py @@ -171,32 +171,22 @@ def automated_labeling_statistics(automated_df,manual_df,stats_type = "IoU", thr statistics_df.reset_index(inplace = True, drop = True) return statistics_df -# Function that will provide statistics across a whole dataset, inclusive of all classes -def dataset_statistics(statistics_df, stats_type): - return pd.DataFrame.from_dict([entry]) - -# Function that summarizes statistics across all classes -def global_dataset_statistics(statistics_df, stats_type): - - return pd.DataFrame.from_dict([entry]) - -def class_general_statistics(statistics_df): +def global_dataset_statistics(statistics_df): """ - Function that takes in a dataframe of statistics for multiple clips and outputs the same statistics, but across summed across the dataset. + Function that takes in a dataframe of efficiency statistics for multiple clips and outputs their global values. Args: statistics_df (Dataframe) - Dataframe of statistics value for multiple audio clips as returned by the function automated_labelling_statistics. + Returns: - Dataframe of global statistics for the efficacy of automated labels across multiple audio clips. + Dataframe of global statistics for the multiple audio clips' labelling. """ - # Determining which class the clips are derived from class_id = statistics_df["MANUAL ID"][0] - # summing all of the tp_sum = statistics_df["TRUE POSITIVE"].sum() fp_sum = statistics_df["FALSE POSITIVE"].sum() fn_sum = statistics_df["FALSE NEGATIVE"].sum() - #tn_sum = statistics_df["TRUE NEGATIVE"].sum() + tn_sum = statistics_df["TRUE NEGATIVE"].sum() union_sum = statistics_df["UNION"].sum() precision = tp_sum/(tp_sum + fp_sum) recall = tp_sum/(tp_sum + fn_sum) @@ -351,7 +341,7 @@ def clip_catch(automated_df,manual_df): manual_df.reset_index(inplace = True, drop = True) # figuring out how many automated labels and human labels exist manual_row_count = manual_df.shape[0] - #automated_row_count = automated_df.shape[0] + automated_row_count = automated_df.shape[0] # finding the length of the clip as well as the sampling frequency. duration = automated_df["CLIP LENGTH"].to_list()[0] SAMPLE_RATE = automated_df["SAMPLE RATE"].to_list()[0] @@ -463,7 +453,7 @@ def clip_catch(automated_df,manual_df): # IoU_Statistics.reset_index(inplace = True, drop = True) # return IoU_Statistics -def class_IoU_statistics(statistics_df): +def global_IoU_Statistics(statistics_df): """ Function that takes the output of dataset_IoU Statistics and outputs a global count of true positives and false positives, as well as computing the precision, recall, and f1 metrics across the dataset. @@ -541,7 +531,7 @@ def dataset_Catch(automated_df,manual_df): # I am going to wait on showing off this function since we don't have any multi-class classifiers yet. def dataset_IoU_Statistics(automated_df, manual_df, threshold = 0.5): - # Building a list of unique classes + # finding the number of unique classes. class_list = manual_df["MANUAL ID"].to_list() class_list = list(dict.fromkeys(class_list)) @@ -551,13 +541,13 @@ def dataset_IoU_Statistics(automated_df, manual_df, threshold = 0.5): # Looping through each class for class_id in class_list: - # Isolating the class from the passed in dataframe + # Isolating the class from the class_automated_df = automated_df[automated_df["MANUAL_ID"] == class_id] class_manual_df = manual_df[manual_df["MANUAL ID"] == class_id] class_stats_df = class_IoU_Statistics(class_automated_df,class_manual_df, threshold = threshold) - class_global_stats_df = class_IoU_statistics(class_stats_df) + class_global_stats_df = global_IoU_Statistics(class_stats_df) if master_clip_stats_df.empty: - master_clip_stats_df = class_stats_df + master_clips_stats_df = class_stats_df if master_global_stats_df.empty: master_global_stats_df = class_global_stats_df else: diff --git a/PyHa/visualizations.py b/PyHa/visualizations.py index de495e3..b2840ea 100644 --- a/PyHa/visualizations.py +++ b/PyHa/visualizations.py @@ -4,7 +4,7 @@ import pandas as pd import scipy.signal as scipy_signal import numpy as np -from .IsoAutio import isolate +from .IsoAutio import * @@ -181,7 +181,7 @@ def plot_bird_label_scores(automated_df,human_df,save_fig = False): human_arr = np.zeros((int(SAMPLE_RATE*duration),)) bot_arr = np.zeros((int(SAMPLE_RATE*duration),)) - #folder_name = automated_df["FOLDER"].to_list()[0] + folder_name = automated_df["FOLDER"].to_list()[0] clip_name = automated_df["IN FILE"].to_list()[0] # Placing 1s wherever the au for row in automated_df.index: