Skip to content

Commit

Permalink
Merge pull request #1099 from kitzeslab/issue_1098_annotation_col_names
Browse files Browse the repository at this point in the history
enable passing list of options for annotation_column
  • Loading branch information
sammlapp authored Feb 6, 2025
2 parents 244e879 + b208702 commit 70b8d97
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 6 deletions.
42 changes: 36 additions & 6 deletions opensoundscape/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,16 @@ def from_raven_files(
Args:
raven_files: list or iterable of raven .txt file paths (as str or pathlib.Path),
or a single file path (str or pathlib.Path). Eg ['path1.txt','path2.txt']
annotation_column: string name or integer position of column containing annotations
annotation_column: column name(s) or integer position to use as the annotations
- pass `None` to load the Raven file without explicitly
assigning a column as the annotation column. The resulting
object's `.df` will have an `annotation` column with nan values!
- if a string is passed, the column with this name will be used as the annotations.
- if an integer is passed, the column at that position will be used as the annotation column.
NOTE: column positions are ordered increasingly starting at 0.
NOTE: column positions are ordered increasingly starting at 0.
- if a list/tuple is passed, find a column matching any value in the list
NOTE: if multiple columns match, an error will be raised
Example: ['annotation','label','Species'] will find a column with any of these names
audio_files: (list) optionally specify audio files corresponding to each
raven file (length should match raven_files) Eg ['path1.txt','path2.txt']
- if None (default), .clip_labels() will not be able to
Expand Down Expand Up @@ -195,6 +198,10 @@ def from_raven_files(
but their lengths did not match.
"""

assert isinstance(
annotation_column, (str, int, type(None), list, tuple)
), "Annotation column index has to be a string, integer, list, tuple, or None."

all_file_dfs = []

# mapping of Raven file columns to standard opensoundscape names
Expand All @@ -214,10 +221,7 @@ def from_raven_files(
warnings.warn(f"{raven_file} has zero rows.")
continue

assert isinstance(
annotation_column, (str, int, type(None))
), "Annotation column index has to be a string, integer, or None."

# handle varioius options for specifying the annotation column
if isinstance(annotation_column, str):
# annotation_column is a string that is present in the annotation file's header
try:
Expand Down Expand Up @@ -251,6 +255,32 @@ def from_raven_files(
},
errors="raise",
)
elif isinstance(annotation_column, (list, tuple)):
annotation_column = list(annotation_column)
# make sure exactly one value from annotation_column is in the df.columns
matching_cols = [col for col in annotation_column if col in df.columns]
if len(matching_cols) == 0:
raise KeyError(
f"None of the specified annotation columns, {annotation_column}, "
f"match any of the column names in the annotation file: {list(df.columns)} "
f"when attempting to load {raven_file}. "
f"Please ensure all raven files contain one of the specified annotation_column values."
)
elif len(matching_cols) > 1:
raise KeyError(
f"Multiple columns in the annotation file match the specified annotation columns: "
f"{matching_cols}. when attempting to load {raven_file}. "
"Please ensure only one column in each raven file matches a value listed in annotation_columns"
)
else:
# rename the column to 'annotation'
df = df.rename(
columns={
matching_cols[0]: "annotation",
},
errors="raise",
)

else:
# None was passed to annotation_column
# we'll create an empty `annotation` column
Expand Down
2 changes: 2 additions & 0 deletions tests/raven_annots/raven_with_Annotation_col.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Selection View Channel Begin Time (s) End Time (s) Low Freq (Hz) High Freq (Hz) Annotation
1 Spectrogram 1 1 1.897648165999982 4.110570810999974 1326.3 3266.5 CSWA
36 changes: 36 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ def raven_file():
return "tests/raven_annots/MSD-0003_20180427_2minstart00.Table.1.selections.txt"


@pytest.fixture()
def raven_file_Annotation_col():
return "tests/raven_annots/raven_with_Annotation_col.txt"


@pytest.fixture()
def audio_2min():
return "tests/audio/MSD-0003_20180427_2minstart00.wav"
Expand Down Expand Up @@ -242,6 +247,37 @@ def test_load_raven_annotation_column_name(raven_file):
a = BoxedAnnotations.from_raven_files([raven_file], annotation_column=-1)


def test_from_raven_files_list_of_annotation_column(
raven_file, raven_file_Annotation_col
):
ba = BoxedAnnotations.from_raven_files(
[raven_file, raven_file_Annotation_col],
annotation_column=["Species", "Annotation"],
)
assert "CSWA" in ba.unique_labels() and "WOTH" in ba.unique_labels()

# also allowed to be a tuple
ba = BoxedAnnotations.from_raven_files(
[raven_file, raven_file_Annotation_col],
annotation_column=("Species", "Annotation"),
)
assert "CSWA" in ba.unique_labels() and "WOTH" in ba.unique_labels()

# raises an exception if no matching column is found
with pytest.raises(KeyError):
ba = BoxedAnnotations.from_raven_files(
[raven_file, raven_file_Annotation_col],
annotation_column=["Species", "notacolumn"],
)

# raises an exception if multiple matching columns are found
with pytest.raises(KeyError):
ba = BoxedAnnotations.from_raven_files(
[raven_file, raven_file_Annotation_col],
annotation_column=["Species", "Selection"],
)


def test_load_raven_annotations_empty(raven_file_empty):
a = BoxedAnnotations.from_raven_files([raven_file_empty], None)
assert len(a.df) == 0
Expand Down

0 comments on commit 70b8d97

Please sign in to comment.