Added BirdNet Pipeline

UCSD-E4E · Apr 29, 2022 · e3804e2 · e3804e2 · JacobGlennAyers · Apr 29, 2022
1 parent 9de4cce
commit e3804e2
Show file tree

Hide file tree

Showing 13 changed files with 7,697 additions and 973 deletions.
diff --git a/PyHa/IsoAutio.py b/PyHa/IsoAutio.py
@@ -5,7 +5,8 @@
 import numpy as np
 import math
 import os
-
+from .birdnet_lite.analyze import analyze
+from copy import deepcopy
 
 def build_isolation_parameters(
         technique,
@@ -128,7 +129,7 @@ def isolate(
             audio_dir,
             filename,
             isolation_parameters,
-            manual_id="bird")
+            manual_id=manual_id)
     elif isolation_parameters["technique"] == "steinberg":
         isolation_df = steinberg_isolate(
             local_scores,
@@ -137,7 +138,7 @@ def isolate(
             audio_dir,
             filename,
             isolation_parameters,
-            manual_id="bird")
+            manual_id=manual_id)
     elif isolation_parameters["technique"] == "stack":
         isolation_df = stack_isolate(
             local_scores,
@@ -146,7 +147,7 @@ def isolate(
             audio_dir,
             filename,
             isolation_parameters,
-            manual_id="bird")
+            manual_id=manual_id)
     elif isolation_parameters["technique"] == "chunk":
         isolation_df = chunk_isolate(
             local_scores,
@@ -155,7 +156,7 @@ def isolate(
             audio_dir,
             filename,
             isolation_parameters,
-            manual_id="bird")
+            manual_id=manual_id)
 
     return isolation_df
 
@@ -664,16 +665,17 @@ def chunk_isolate(
     return pd.DataFrame.from_dict(entry)
 
 
-def generate_automated_labels(
+def generate_automated_labels_microfaune(
         audio_dir,
         isolation_parameters,
         manual_id="bird",
         weight_path=None,
         Normalized_Sample_Rate=44100,
         normalize_local_scores=False):
     """
-    Function that applies isolation technique determined by
-    isolation_parameters dictionary across a folder of audio clips.
+    Function that applies isolation technique on the local scores generated
+    by the Microfaune mode across a folder of audio clips. It is determined
+    by the isolation_parameters dictionary.
 
     Args:
         audio_dir (string)
@@ -781,6 +783,101 @@ def generate_automated_labels(
     annotations.reset_index(inplace=True, drop=True)
     return annotations
 
+def generate_automated_labels_birdnet(audio_dir, isolation_parameters):
+    """
+    Function that generated the bird labels for an audio file or across a
+    folder using the BirdNet-Lite model
+
+    Args:
+        audio_dir (string)
+            - Directory with wav audio files. Can be an individual file
+              as well.
+
+        isolation_parameters (dict)
+            - Python Dictionary that controls the various label creation
+              techniques. The keys it accepts are :
+              - output_path
+              - lat
+              - lon
+              - week
+              - overlap
+              - sensitivity
+              - min_conf
+              - custom_list
+              - filetype
+              - num_predictions
+              - write_to_csv
+
+    Returns:
+        Dataframe of automated labels for the audio clip(s) in audio_dir.
+    """
+    annotations = analyze(audio_path=audio_dir, **isolation_parameters)
+    return annotations
+
+def generate_automated_labels(
+        audio_dir,
+        isolation_parameters,
+        manual_id="bird",
+        weight_path=None,
+        Normalized_Sample_Rate=44100,
+        normalize_local_scores=False):
+    """
+    Function that generated the bird labels across a folder of audio clips
+    given the isolation_parameters
+
+    Args:
+        audio_dir (string)
+            - Directory with wav audio files.
+
+        isolation_parameters (dict)
+            - Python Dictionary that controls the various label creation
+              techniques.
+
+        manual_id (string)
+            - controls the name of the class written to the pandas dataframe
+
+        weight_path (string)
+            - File path of weights to be used by the model for
+              determining presence of bird sounds.
+
+        Normalized_Sample_Rate (int)
+            - Sampling rate that the audio files should all be normalized to.
+              Used only for the Microfaune model.
+
+    Returns:
+        Dataframe of automated labels for the audio clips in audio_dir.
+    """
+
+    #try:
+    if(isolation_parameters["model"] == 'microfaune'):
+        annotations = generate_automated_labels_microfaune(
+                        audio_dir=audio_dir,
+                        isolation_parameters=isolation_parameters,
+                        manual_id=manual_id,
+                        weight_path=weight_path,
+                        Normalized_Sample_Rate=Normalized_Sample_Rate,
+                        normalize_local_scores=normalize_local_scores)
+    elif(isolation_parameters["model"] == 'birndet'):
+        # We need to delete the some keys from the isolation_parameters
+        # because we are unpacking the other arguments
+        birdnet_parameters = deepcopy(isolation_parameters)
+        keys_to_delete = ['model', 'technique', 'threshold_type',
+            'threshold_const', 'chunk_size']
+        for key in keys_to_delete:
+            birdnet_parameters.pop(key, None)
+        annotations = generate_automated_labels_birdnet(
+                        audio_dir, birdnet_parameters)
+    elif(isolation_parameters['model'] == 'tweetynet'):
+        pass
+    else:
+        print("{model_name} model does not exist"\
+            .format(model_name=isolation_parameters["model"]))
+    # except:
+    #     print("Error. Check your isolation_parameters")
+    #     return None
+    return annotations
+
+
 
 def kaleidoscope_conversion(df):
     """

diff --git a/PyHa/birdnet_lite/README.md b/PyHa/birdnet_lite/README.md
@@ -0,0 +1,95 @@
+# BirdNET-Lite
+TFLite version of BirdNET. Bird sound recognition for more than 6,000 species worldwide.
+
+Center for Conservation Bioacoustics, Cornell Lab of Ornithology, Cornell University
+
+Go to https://birdnet.cornell.edu to learn more about the project.
+
+Want to use BirdNET to analyze a large dataset? Don't hesitate to contact us: ccb-birdnet@cornell.edu
+
+# Setup (Ubuntu 18.04)
+
+TFLite for x86 platforms comes with the standard Tensorflow package. If you are on a different platform, you need to install a dedicated version of TFLite (e.g., a pre-compiled version for Raspberry Pi).
+
+We need to setup TF2.3+ for BirdNET. First, we install Python 3 and pip:
+
+```
+sudo apt-get update
+sudo apt-get install python3-dev python3-pip
+sudo pip3 install --upgrade pip
+```
+
+Then, we can install Tensorflow with:
+
+```
+sudo pip3 install tensorflow
+```
+
+TFLite on x86 platform currently only supports CPUs. 
+
+Note: Make sure to set `CUDA_VISIBLE_DEVICES=""` in your environment variables. Or set `os.environ['CUDA_VISIBLE_DEVICES'] = ''` at the top of your Python script.
+
+In this example, we use Librosa to open audio files. Install Librosa with:
+
+```
+sudo pip3 install librosa
+sudo apt-get install ffmpeg
+```
+
+You can use any other audio lib if you like, or pass raw audio signals to the model.
+
+If you don't use Librosa, make sure to install NumPy:
+
+```
+sudo pip3 install numpy
+```
+
+Note: BirdNET expects 3-second chunks of raw audio data, sampled at 48 kHz.
+
+# Usage
+
+You can run BirdNET via the command line. You can add a few parameters that affect the output.
+
+The input parameters include:
+
+```
+--i, Path to input folder. All the nested folders will also be processed.
+--o, Path to output folder. By default results are written into the input folder.
+--lat, Recording location latitude. Set -1 to ignore.
+--lon, Recording location longitude. Set -1 to ignore.
+--week, Week of the year when the recording was made. Values in [1, 48] (4 weeks per month). Set -1 to ignore.
+--overlap, Overlap in seconds between extracted spectrograms. Values in [0.0, 2.9]. Defaults tp 0.0.
+--sensitivity, Detection sensitivity; Higher values result in higher sensitivity. Values in [0.5, 1.5]. Defaults to 1.0.
+--min_conf, Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1.
+--custom_list, Path to text file containing a list of species. Not used if not provided.
+--filetype, Filetype of soundscape recordings. Defaults to 'wav'.
+```
+
+Note: A custom species list needs to contain one species label per line. Take a look at the `model/label.txt` for the correct species label. Only labels from this text file are valid. You can find an example of a valid custom list in the 'example' folder.
+
+Here are two example commands to run this BirdNET version:
+
+```
+
+python3 analyze.py --i 'example/XC558716 - Soundscape.mp3' --lat 35.4244 --lon -120.7463 --week 18
+
+python3 analyze.py --i 'example/XC563936 - Soundscape.mp3' --lat 47.6766 --lon -122.294 --week 11 --overlap 1.5 --min_conf 0.25 --sensitivity 1.25 --custom_list 'example/custom_species_list.txt'
+
+```
+
+Note: Please make sure to provide lat, lon, and week. BirdNET will work without these values, but the results might be less reliable.
+
+The results of the anlysis will be stored in a result file in CSV format. All confidence values are raw prediction scores and should be post-processed to eliminate occasional false-positive results.
+
+# Contact us
+
+Please don't hesitate to contact us if you have any issues with the code or if you have any other remarks or questions.
+
+Our e-mail address: ccb-birdnet@cornell.edu
+
+We are always open for a collaboration with you.
+
+# Funding
+
+This project is supported by Jake Holshuh (Cornell class of ’69). The Arthur Vining Davis Foundations also kindly support our efforts.
+
diff --git a/PyHa/birdnet_lite/__pycache__/analyze.cpython-37.pyc b/PyHa/birdnet_lite/__pycache__/analyze.cpython-37.pyc
diff --git a/PyHa/birdnet_lite/__pycache__/analyze.cpython-38.pyc b/PyHa/birdnet_lite/__pycache__/analyze.cpython-38.pyc