Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Birdnet integration #112

Merged
merged 15 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 160 additions & 23 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@
import numpy as np
import math
import os
from .birdnet_lite.analyze import analyze
from copy import deepcopy


def build_isolation_parameters(
def build_isolation_parameters_microfaune(
technique,
threshold_type,
threshold_const,
threshold_min=0,
window_size=1.0,
chunk_size=2.0):
"""
Wrapper function for all of the audio isolation techniques (Steinberg,
Simple, Stack, Chunk). Will call the respective function of
each technique based on isolation_parameters "technique" key.
Wrapper function for all of Microfaune's audio isolation techniques
(Steinberg, Simple, Stack, Chunk). Will call the respective function
of each technique based on isolation_parameters "technique" key.

Args:
technique (string)
Expand Down Expand Up @@ -52,7 +53,7 @@ def build_isolation_parameters(
"""
isolation_parameters = {
"technique": technique,
"treshold_type": threshold_type,
"threshold_type": threshold_type,
"threshold_const": threshold_const,
"threshold_min": threshold_min,
"window_size": window_size,
Expand All @@ -79,9 +80,9 @@ def isolate(
manual_id="bird",
normalize_local_scores=False):
"""
Wrapper function for all of the audio isolation techniques (Steinberg,
Simple, Stack, Chunk). Will call the respective function of
each technique based on isolation_parameters "technique" key.
Wrapper function for all of Microfaune's audio isolation techniques
(Steinberg, Simple, Stack, Chunk). Will call the respective function of each technique based on
isolation_parameters "technique" key.

Args:
local_scores (list of floats)
Expand Down Expand Up @@ -128,7 +129,7 @@ def isolate(
audio_dir,
filename,
isolation_parameters,
manual_id="bird")
manual_id=manual_id)
elif isolation_parameters["technique"] == "steinberg":
isolation_df = steinberg_isolate(
local_scores,
Expand All @@ -137,7 +138,7 @@ def isolate(
audio_dir,
filename,
isolation_parameters,
manual_id="bird")
manual_id=manual_id)
elif isolation_parameters["technique"] == "stack":
isolation_df = stack_isolate(
local_scores,
Expand All @@ -146,7 +147,7 @@ def isolate(
audio_dir,
filename,
isolation_parameters,
manual_id="bird")
manual_id=manual_id)
elif isolation_parameters["technique"] == "chunk":
isolation_df = chunk_isolate(
local_scores,
Expand All @@ -155,7 +156,7 @@ def isolate(
audio_dir,
filename,
isolation_parameters,
manual_id="bird")
manual_id=manual_id)

return isolation_df

Expand Down Expand Up @@ -535,7 +536,7 @@ def stack_isolate(
# increasing this stack counter will be referred to as "pushing"
stack_counter = stack_counter + 1

# when a score is below the treshold
# when a score is below the threshold
else:
# the case where it is the end of an annotation
if stack_counter == 0 and annotation_start == 1:
Expand Down Expand Up @@ -650,7 +651,7 @@ def chunk_isolate(
chunk_end = min((ndx + 1) * local_scores_per_chunk, len(local_scores))
# breaking up the local_score array into a chunk.
chunk = local_scores[int(chunk_start):int(chunk_end)]
# comparing the largest local score value to the treshold.
# comparing the largest local score value to the threshold.
# the case for if we label the chunk as an annotation
if max(chunk) >= thresh and max(
chunk) >= isolation_parameters["threshold_min"]:
Expand All @@ -664,16 +665,17 @@ def chunk_isolate(
return pd.DataFrame.from_dict(entry)


def generate_automated_labels(
def generate_automated_labels_microfaune(
audio_dir,
isolation_parameters,
manual_id="bird",
weight_path=None,
Normalized_Sample_Rate=44100,
normalized_sample_rate=44100,
normalize_local_scores=False):
"""
Function that applies isolation technique determined by
isolation_parameters dictionary across a folder of audio clips.
Function that applies isolation technique on the local scores generated
by the Microfaune mode across a folder of audio clips. It is determined
by the isolation_parameters dictionary.

Args:
audio_dir (string)
Expand All @@ -690,7 +692,7 @@ def generate_automated_labels(
- File path of weights to be used by the RNNDetector for
determining presence of bird sounds.

Normalized_Sample_Rate (int)
normalized_sample_rate (int)
- Sampling rate that the audio files should all be normalized to.

Returns:
Expand Down Expand Up @@ -730,11 +732,11 @@ def generate_automated_labels(
# Force everything into the human hearing range.
# May consider reworking this function so that it upsamples as well
try:
if SAMPLE_RATE != Normalized_Sample_Rate:
rate_ratio = Normalized_Sample_Rate / SAMPLE_RATE
if SAMPLE_RATE != normalized_sample_rate:
rate_ratio = normalized_sample_rate / SAMPLE_RATE
SIGNAL = scipy_signal.resample(
SIGNAL, int(len(SIGNAL) * rate_ratio))
SAMPLE_RATE = Normalized_Sample_Rate
SAMPLE_RATE = normalized_sample_rate
except:
print("Failed to Downsample" + audio_file)
# resample produces unreadable float32 array so convert back
Expand Down Expand Up @@ -781,6 +783,141 @@ def generate_automated_labels(
annotations.reset_index(inplace=True, drop=True)
return annotations

def generate_automated_labels_birdnet(audio_dir, isolation_parameters):
"""
Function that generates the bird labels for an audio file or across a
folder using the BirdNet-Lite model

Args:
audio_dir (string)
- Directory with wav audio files. Can be an individual file
as well.

isolation_parameters (dict)
- Python Dictionary that controls the various label creation
techniques. The keys it accepts are :
- output_path (string)
- Path to output folder. By default results are written into
the input folder
- default: None

- lat (float)
- Recording location latitude
- default: -1 (ignore)

- lon (float)
- Recording location longitude
- default: -1 (ignore)

- week (int)
- Week of the year when the recording was made
- Values in [1, 48] (4 weeks per month)
- default: -1 (ignore)

- overlap (float)
- Overlap in seconds between extracted spectrograms
- Values in [0.5, 1.5]
- default: 0.0

- sensitivity (float)
- Detection sensitivity. Higher values result in higher sensitivity
- Values in [0.5, 1.5]
- default: 1.0

- min_conf (float)
- Minimum confidence threshold
- Values in [0.01, 0.99]
- default: 0.1

- custom_list (string)
- Path to text file containing a list of species
- default: '' (not used if not provided)

- filetype (string)
- Filetype of soundscape recordings
- default: 'wav'

- num_predictions (int)
- Defines maximum number of written predictions in a given 3s segment
- default: 10

- write_to_csv (boolean)
- Set whether or not to write output to CSV
- default: False

Returns:
Dataframe of automated labels for the audio clip(s) in audio_dir.
"""
annotations = analyze(audio_path=audio_dir, **isolation_parameters)
return annotations

def generate_automated_labels(
audio_dir,
isolation_parameters,
manual_id="bird",
weight_path=None,
normalized_sample_rate=44100,
normalize_local_scores=False):
"""
Function that generates the bird labels across a folder of audio clips
given the isolation_parameters

Args:
audio_dir (string)
- Directory with wav audio files.

isolation_parameters (dict)
- Python Dictionary that controls the various label creation
techniques.

manual_id (string)
- controls the name of the class written to the pandas dataframe

weight_path (string)
- File path of weights to be used by the model for
determining presence of bird sounds.

normalized_sample_rate (int)
- Sampling rate that the audio files should all be normalized to.
Used only for the Microfaune model.

normalize_local_scores (boolean)
- Set whether or not to normalize the local scores.

Returns:
Dataframe of automated labels for the audio clips in audio_dir.
"""

#try:
if(isolation_parameters["model"] == 'microfaune'):
annotations = generate_automated_labels_microfaune(
audio_dir=audio_dir,
isolation_parameters=isolation_parameters,
manual_id=manual_id,
weight_path=weight_path,
normalized_sample_rate=normalized_sample_rate,
normalize_local_scores=normalize_local_scores)
elif(isolation_parameters["model"] == 'birdnet'):
# We need to delete the some keys from the isolation_parameters
# because we are unpacking the other arguments
birdnet_parameters = deepcopy(isolation_parameters)
keys_to_delete = ['model', 'technique', 'threshold_type',
'threshold_const', 'chunk_size']
for key in keys_to_delete:
birdnet_parameters.pop(key, None)
annotations = generate_automated_labels_birdnet(
audio_dir, birdnet_parameters)
elif(isolation_parameters['model'] == 'tweetynet'):
pass
else:
print("{model_name} model does not exist"\
.format(model_name=isolation_parameters["model"]))
# except:
# print("Error. Check your isolation_parameters")
# return None
return annotations



def kaleidoscope_conversion(df):
"""
Expand Down
95 changes: 95 additions & 0 deletions PyHa/birdnet_lite/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# BirdNET-Lite
TFLite version of BirdNET. Bird sound recognition for more than 6,000 species worldwide.

Center for Conservation Bioacoustics, Cornell Lab of Ornithology, Cornell University

Go to https://birdnet.cornell.edu to learn more about the project.

Want to use BirdNET to analyze a large dataset? Don't hesitate to contact us: ccb-birdnet@cornell.edu

# Setup (Ubuntu 18.04)

TFLite for x86 platforms comes with the standard Tensorflow package. If you are on a different platform, you need to install a dedicated version of TFLite (e.g., a pre-compiled version for Raspberry Pi).

We need to setup TF2.3+ for BirdNET. First, we install Python 3 and pip:

```
sudo apt-get update
sudo apt-get install python3-dev python3-pip
sudo pip3 install --upgrade pip
```

Then, we can install Tensorflow with:

```
sudo pip3 install tensorflow
```

TFLite on x86 platform currently only supports CPUs.

Note: Make sure to set `CUDA_VISIBLE_DEVICES=""` in your environment variables. Or set `os.environ['CUDA_VISIBLE_DEVICES'] = ''` at the top of your Python script.

In this example, we use Librosa to open audio files. Install Librosa with:

```
sudo pip3 install librosa
sudo apt-get install ffmpeg
```

You can use any other audio lib if you like, or pass raw audio signals to the model.

If you don't use Librosa, make sure to install NumPy:

```
sudo pip3 install numpy
```

Note: BirdNET expects 3-second chunks of raw audio data, sampled at 48 kHz.

# Usage

You can run BirdNET via the command line. You can add a few parameters that affect the output.

The input parameters include:

```
--i, Path to input folder. All the nested folders will also be processed.
--o, Path to output folder. By default results are written into the input folder.
--lat, Recording location latitude. Set -1 to ignore.
--lon, Recording location longitude. Set -1 to ignore.
--week, Week of the year when the recording was made. Values in [1, 48] (4 weeks per month). Set -1 to ignore.
--overlap, Overlap in seconds between extracted spectrograms. Values in [0.0, 2.9]. Defaults tp 0.0.
--sensitivity, Detection sensitivity; Higher values result in higher sensitivity. Values in [0.5, 1.5]. Defaults to 1.0.
--min_conf, Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1.
--custom_list, Path to text file containing a list of species. Not used if not provided.
--filetype, Filetype of soundscape recordings. Defaults to 'wav'.
```

Note: A custom species list needs to contain one species label per line. Take a look at the `model/label.txt` for the correct species label. Only labels from this text file are valid. You can find an example of a valid custom list in the 'example' folder.

Here are two example commands to run this BirdNET version:

```

python3 analyze.py --i 'example/XC558716 - Soundscape.mp3' --lat 35.4244 --lon -120.7463 --week 18

python3 analyze.py --i 'example/XC563936 - Soundscape.mp3' --lat 47.6766 --lon -122.294 --week 11 --overlap 1.5 --min_conf 0.25 --sensitivity 1.25 --custom_list 'example/custom_species_list.txt'

```

Note: Please make sure to provide lat, lon, and week. BirdNET will work without these values, but the results might be less reliable.

The results of the analysis will be stored in a result file in CSV format. All confidence values are raw prediction scores and should be post-processed to eliminate occasional false-positive results.

# Contact us

Please don't hesitate to contact us if you have any issues with the code or if you have any other remarks or questions.

Our e-mail address: ccb-birdnet@cornell.edu

We are always open for a collaboration with you.

# Funding

This project is supported by Jake Holshuh (Cornell class of ’69). The Arthur Vining Davis Foundations also kindly support our efforts.

Binary file not shown.
Binary file not shown.
Loading