Skip to content

Commit

Permalink
Merge pull request #191 from UCSD-E4E/local_score_confidene
Browse files Browse the repository at this point in the history
First pass of confidence column from max local score array values
  • Loading branch information
JacobGlennAyers authored May 2, 2024
2 parents 42da684 + cec1213 commit 510aa0b
Show file tree
Hide file tree
Showing 3 changed files with 460 additions and 357 deletions.
34 changes: 34 additions & 0 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,36 @@ def build_isolation_parameters_microfaune(

return isolation_parameters

def write_confidence(local_score_arr, automated_labels_df):
"""
Function that adds a new column to a clip dataframe that has had automated labels generated.
Goes through all of the annotations and adding to said row a confidence metric based on the
maximum value of said annotation.
Args:
local_score_arr (np.ndarray or list of floats)
- Array of small predictions of bird presence
automated_labels_df (pd.DataFrame)
- labels derived from the local_score_arr from the def isolate() method for the "IN FILE"
column clip
returns:
Pandas DataFrame with an additional column of the confidence scores from the local score array
"""
assert isinstance(local_score_arr, np.ndarray) or isinstance(local_score_arr, list)
assert isinstance(automated_labels_df, pd.DataFrame)
assert len(automated_labels_df) > 0

time_ratio = len(local_score_arr)/automated_labels_df["CLIP LENGTH"][0]
confidences = []
for row in automated_labels_df.index:
start_ndx = int(automated_labels_df["OFFSET"][row] * time_ratio)
end_ndx = start_ndx + int(automated_labels_df["DURATION"][row] * time_ratio)
cur_confidence = np.max(local_score_arr[start_ndx:end_ndx])
confidences.append(cur_confidence)

automated_labels_df["CONFIDENCE"] = confidences
return automated_labels_df


def isolate(
local_scores,
Expand Down Expand Up @@ -225,6 +255,10 @@ def isolate(
filename,
isolation_parameters,
manual_id=manual_id)

if "write_confidence" in isolation_parameters.keys():
if isolation_parameters["write_confidence"]:
isolation_df = write_confidence(local_scores, isolation_df)

return isolation_df

Expand Down
20 changes: 16 additions & 4 deletions PyHa/annotation_post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,23 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
kaleidoscope_df (Dataframe)
- Dataframe of annotations in kaleidoscope format
chunk_length (int)
chunk_length (int, float)
- duration to set all annotation chunks
Returns:
Dataframe of labels with chunk_length duration
(elements in "OFFSET" are divisible by chunk_length).
"""

assert isinstance(kaleidoscope_df, pd.DataFrame)
assert isinstance(chunk_length, int) or isinstance(chunk_length, float)
assert chunk_length > 0
#Init list of clips to cycle through and output dataframe
clips = kaleidoscope_df["IN FILE"].unique()
df_columns = {'IN FILE' :'str', 'CLIP LENGTH' : 'float64', 'CHANNEL' : 'int64', 'OFFSET' : 'float64',
'DURATION' : 'float64', 'SAMPLE RATE' : 'int64','MANUAL ID' : 'str'}
set_confidence = False
if "CONFIDENCE" in kaleidoscope_df.keys():
df_columns["CONFIDENCE"] = 'float64'
set_confidence = True
output_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in df_columns.items()})

# going through each clip
Expand Down Expand Up @@ -57,14 +63,18 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
1000,
0))
# Placing the label relative to the clip
human_arr[minval:maxval] = 1
if set_confidence:
human_arr[minval:maxval] = species_df["CONFIDENCE"][annotation]
else:
human_arr[minval:maxval] = 1
# performing the chunk isolation technique on the human array

for index in range(potential_annotation_count):
chunk_start = index * (chunk_length*1000)
chunk_end = min((index+1)*chunk_length*1000,arr_len)
chunk = human_arr[int(chunk_start):int(chunk_end)]
if max(chunk) >= 0.5:
chunk_max = max(chunk)
if chunk_max > 1e-4:
row = pd.DataFrame(index = [0])
annotation_start = chunk_start / 1000
#updating the dictionary
Expand All @@ -75,5 +85,7 @@ def annotation_chunker(kaleidoscope_df, chunk_length):
row["SAMPLE RATE"] = sr
row["MANUAL ID"] = bird
row["CHANNEL"] = 0
if set_confidence:
row["CONFIDENCE"] = chunk_max
output_df = pd.concat([output_df,row], ignore_index=True)
return output_df
Loading

0 comments on commit 510aa0b

Please sign in to comment.