Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conform tweetynet to recent PyHa refactor #113

Merged
merged 42 commits into from
Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e3fbbba
adding tweetynet model to pyha
mugen13lue Apr 12, 2022
4893339
updated files and checkiing results on windows
mugen13lue May 11, 2022
7907803
adding changes for another file
mugen13lue May 11, 2022
ea35605
local commit of file
mugen13lue May 11, 2022
fd8e6ff
adding environment with pytorch for tweetynet and fixed pyha tutorial…
mugen13lue May 11, 2022
0aa91d3
got the visualizations to work and fixed the local score output to be…
mugen13lue May 19, 2022
59b2579
fixed merge conflict
mugen13lue May 19, 2022
dbe2252
The visualization and creating the automated dataframe works and will…
mugen13lue May 24, 2022
1be6fea
adding conda environment for macOs
mugen13lue May 26, 2022
fb68481
adding environment for windows 10
mugen13lue May 26, 2022
9cda0e7
updating gitignore to ignore pycache in tweetynet_package
mugen13lue May 26, 2022
90c0414
update from microfaune to tweetynet in error message
mugen13lue May 26, 2022
5564bd4
updating notebook with tweetynet example
mugen13lue May 26, 2022
e55baa3
Merge branch 'tweetynet_integrations' into TweetyNet_integrations_2_main
mugen13lue May 26, 2022
1ac5554
Refactor TweetyNET automated label gen/visualizations
sprestrelski Jun 1, 2022
46b5e13
Updated Tutorial Notebook
JacobGlennAyers Jun 1, 2022
7c1a806
Updated Most Recent Ubuntu Conda Environment with PyTorch
JacobGlennAyers Jun 1, 2022
72a2dec
Meant to be in the conda_environments folder
JacobGlennAyers Jun 3, 2022
361d27d
added batch testing
Vanessa-Salgado Jun 27, 2022
ca4d4f5
Fixed FutureError warnings
Sean1572 Jun 27, 2022
6ca7ff4
added model comparison and spectrogram vis testing
sprestrelski Jun 27, 2022
5f6ce9f
Added chunk isolate to testing suite
Sean1572 Jun 27, 2022
2d672f0
Fixed spectrogram_graph visualization to readme
Sean1572 Jun 27, 2022
b113ac1
added tweetynet documentation
Vanessa-Salgado Jun 27, 2022
d1b2fbd
Merge branch 'TweetyNet-Testing' of https://github.com/UCSD-E4E/PyHa …
Vanessa-Salgado Jun 27, 2022
20c1536
Moved testing scripts to testing folder
Sean1572 Jun 27, 2022
4dbef91
Merge branch 'TweetyNet-Testing' of https://github.com/UCSD-E4E/PyHa …
Sean1572 Jun 27, 2022
e3a25d8
Merge pull request #119 from UCSD-E4E/TweetyNet-Testing
Sean1572 Jun 27, 2022
a9a9f7f
Removed testing folder because that was a bad idea
Sean1572 Jun 27, 2022
b937c2e
documentation update, renamed testing scripts
sprestrelski Jun 27, 2022
b0aca41
Use tweetynet normalization
Sean1572 Jun 28, 2022
62f2729
Merge branch 'TweetyNet_integrations_2_main' of https://github.com/UC…
Sean1572 Jun 28, 2022
b3846ff
Uncommented old normalize code
Sean1572 Jun 28, 2022
eeea93c
Fixed gitignore
Sean1572 Jun 28, 2022
5f00a09
Optimize panda use on steinburg to fix bug
Sean1572 Jun 28, 2022
6f2da05
Fixed bug with new steinburg fix
Sean1572 Jun 28, 2022
08d5170
Cleaned up steinburg code
Sean1572 Jun 28, 2022
0690ab9
Improved error messaging on Pyha
Sean1572 Jun 28, 2022
a67d9a5
Update .gitignore
Sean1572 Jun 28, 2022
b376ce5
Update .gitignore
Sean1572 Jun 28, 2022
2509e74
Update .gitignore
sprestrelski Jun 28, 2022
4763ee5
Merge branch 'TweetyNet_integrations_2_main' of https://github.com/UC…
sprestrelski Jun 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.ipynb_checkpoints
PyHa/__pycache__
PyHa/microfaune_package/microfaune/__pycache__
PyHa/microfaune_package/microfaune/__pycache__
PyHa/tweetynet_package/tweetynet/__pycache__
216 changes: 193 additions & 23 deletions PyHa/IsoAutio.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#from PyHa.tweetynet_package.tweetynet.network import TweetyNet
from .microfaune_package.microfaune.detection import RNNDetector
from .microfaune_package.microfaune import audio
from .tweetynet_package.tweetynet.TweetyNetModel import TweetyNetModel
from .tweetynet_package.tweetynet.Load_data_functions import compute_features, predictions_to_kaleidoscope
import torch
import pandas as pd
import scipy.signal as scipy_signal
import numpy as np
Expand Down Expand Up @@ -256,7 +260,6 @@ def steinberg_isolate(
"""
# calculate original duration
old_duration = len(SIGNAL) / SAMPLE_RATE

# create entry for audio clip
entry = {'FOLDER': audio_dir,
'IN FILE': filename,
Expand All @@ -269,7 +272,6 @@ def steinberg_isolate(
# calculating threshold that will define how labels are created in current
# audio clip
thresh = threshold(local_scores, isolation_parameters)

# how many samples one local score represents
samples_per_score = len(SIGNAL) // len(local_scores)

Expand Down Expand Up @@ -316,12 +318,12 @@ def steinberg_isolate(
# sub-clip numpy array
isolated_samples = np.append(
isolated_samples, SIGNAL[lo_idx:hi_idx])

entry = pd.DataFrame.from_dict(entry)
# TODO, when you go through the process of rebuilding this isolate function
# as a potential optimization problem
# rework the algorithm so that it builds the dataframe correctly to save
# time.

OFFSET = entry['OFFSET'].str[0]
DURATION = entry['OFFSET'].str[1]
DURATION = DURATION - OFFSET
Expand Down Expand Up @@ -668,6 +670,8 @@ def chunk_isolate(
def generate_automated_labels_microfaune(
audio_dir,
isolation_parameters,
ml_model = "microfaune",
tweety_output = False,
manual_id="bird",
weight_path=None,
normalized_sample_rate=44100,
Expand Down Expand Up @@ -703,11 +707,19 @@ def generate_automated_labels_microfaune(
# Use Default Microfaune Detector
# TODO
# Expand to neural networks beyond just microfaune
if weight_path is None:
detector = RNNDetector()
# Use Custom weights for Microfaune Detector
#Add flag to work for creating tweetynet model.
if ml_model == "microfaune":
if weight_path is None:
detector = RNNDetector()
# Use Custom weights for Microfaune Detector
else:
detector = RNNDetector(weight_path)
elif ml_model == "tweetynet":
device = torch.device('cpu')
detector = TweetyNetModel(2, (1, 86, 86), 86, device, binary = False)
else:
detector = RNNDetector(weight_path)
print("model \"{}\" does not exist".format(ml_model))
return None

# init labels dataframe
annotations = pd.DataFrame()
Expand Down Expand Up @@ -747,13 +759,19 @@ def generate_automated_labels_microfaune(
# Might want to compare to just taking the first set of data.
if len(SIGNAL.shape) == 2:
SIGNAL = SIGNAL.sum(axis=1) / 2

# detection
try:
microfaune_features = detector.compute_features([SIGNAL])
global_score, local_scores = detector.predict(microfaune_features)
except BaseException:
#Add flag to work with creating features for tweetynet.
if ml_model == "microfaune":
microfaune_features = detector.compute_features([SIGNAL])
global_score, local_scores = detector.predict(microfaune_features)
elif ml_model == "tweetynet":
#need a function to convert a signal into a spectrogram and then window it
tweetynet_features = compute_features([SIGNAL])
predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path)
except BaseException as e:
print("Error in detection, skipping", audio_file)
print(e)
continue

# get duration of clip
Expand All @@ -762,22 +780,33 @@ def generate_automated_labels_microfaune(
try:
# Running moment to moment algorithm and appending to a master
# dataframe.
new_entry = isolate(
local_scores[0],
SIGNAL,
SAMPLE_RATE,
audio_dir,
audio_file,
isolation_parameters,
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
#Add tweetynet without isolation functions here
if tweety_output:
local_scores = [np.array(predictions["pred"].values)]
print(local_scores)
print(predictions)
print("here", audio_file)
predictions.to_csv(audio_file + ".csv")
print("saved_csv")
new_entry = predictions_to_kaleidoscope(predictions, SIGNAL, audio_dir, audio_file, manual_id, SAMPLE_RATE)
else:
new_entry = isolate(
local_scores[0],
SIGNAL,
SAMPLE_RATE,
audio_dir,
audio_file,
isolation_parameters,
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)
if annotations.empty:
annotations = new_entry
else:
annotations = annotations.append(new_entry)
except BaseException:
except BaseException as e:
print("Error in isolating bird calls from", audio_file)
print(e)
continue
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
Expand Down Expand Up @@ -917,8 +946,6 @@ def generate_automated_labels(
# return None
return annotations



def kaleidoscope_conversion(df):
"""
Function that strips away Pandas Dataframe columns necessary for PyHa
Expand Down Expand Up @@ -969,3 +996,146 @@ def kaleidoscope_conversion(df):


# annotation = annotation + annotation_chain_count - 1

def generate_automated_labels_final(audio_dir,
isolation_parameters,
tweety_output = False,
manual_id="bird",
weight_path=None,
Normalized_Sample_Rate=44100,
normalize_local_scores=False):
return generate_automated_labels_tweetynet(
audio_dir,
isolation_parameters,
tweety_output,
manual_id,
weight_path,
Normalized_Sample_Rate,
normalize_local_scores)

def generate_automated_labels_tweetynet(
audio_dir,
isolation_parameters,
tweety_output = False,
manual_id="bird",
weight_path=None,
Normalized_Sample_Rate=44100,
normalize_local_scores=False):
"""
Function that applies isolation technique determined by
isolation_parameters dictionary across a folder of audio clips.

Args:
audio_dir (string)
- Directory with wav audio files.

isolation_parameters (dict)
- Python Dictionary that controls the various label creation
techniques.

tweety_output (boolean) # may want to incorporate into isolation parameters
- True to use tweetynet's original output, or False to use
isolation techniques.

manual_id (string)
- controls the name of the class written to the pandas dataframe.

weight_path (string)
- File path of weights to be used by TweetyNet for
determining presence of bird sounds.

Normalized_Sample_Rate (int)
- Sampling rate that the audio files should all be normalized to.

normalize_local_scores (boolean) # may want to incorporate into isolation parameters
- Flag to normalize the local scores.

Returns:
Dataframe of automated labels for the audio clips in audio_dir.
"""

# init detector
# Use Default Microfaune Detector
# TODO
# Expand to neural networks beyond just microfaune
#Add flag to work for creating tweetynet model.
device = torch.device('cpu')
detector = TweetyNetModel(2, (1, 86, 86), 86, device)

# init labels dataframe
annotations = pd.DataFrame()
# generate local scores for every bird file in chosen directory
for audio_file in os.listdir(audio_dir):
# skip directories
if os.path.isdir(audio_dir + audio_file):
continue

# It is a bit awkward here to be relying on Microfaune's wave file
# reading when we want to expand to other frameworks,
# Likely want to change that in the future. Librosa had some troubles.

# Reading in the wave audio files
try:
SAMPLE_RATE, SIGNAL = audio.load_wav(audio_dir + audio_file)
except BaseException:
print("Failed to load", audio_file)
continue

# downsample the audio if the sample rate isn't 44.1 kHz
# Force everything into the human hearing range.
# May consider reworking this function so that it upsamples as well
if SAMPLE_RATE != Normalized_Sample_Rate:
rate_ratio = Normalized_Sample_Rate / SAMPLE_RATE
SIGNAL = scipy_signal.resample(
SIGNAL, int(len(SIGNAL) * rate_ratio))
SAMPLE_RATE = Normalized_Sample_Rate
# resample produces unreadable float32 array so convert back
# SIGNAL = np.asarray(SIGNAL, dtype=np.int16)

# convert stereo to mono if needed
# Might want to compare to just taking the first set of data.
if len(SIGNAL.shape) == 2:
SIGNAL = SIGNAL.sum(axis=1) / 2
# detection
try:
#Add flag to work with creating features for tweetynet.
tweetynet_features = compute_features([SIGNAL])
predictions, local_scores = detector.predict(tweetynet_features, model_weights=weight_path)
except BaseException as e:
print("Error in detection, skipping", audio_file)
print(e)
continue

try:
# Running moment to moment algorithm and appending to a master
# dataframe.
#Add tweetynet without isolation functions here
if tweety_output:
new_entry = predictions_to_kaleidoscope(
predictions,
SIGNAL,
audio_dir,
audio_file,
manual_id,
SAMPLE_RATE)
else:
new_entry = isolate(
local_scores[0],
SIGNAL,
SAMPLE_RATE,
audio_dir,
audio_file,
isolation_parameters,
manual_id=manual_id,
normalize_local_scores=normalize_local_scores)
# print(new_entry)
if annotations.empty:
annotations = new_entry
else:
annotations = annotations.append(new_entry)
except BaseException:
print("Error in isolating bird calls from", audio_file)
continue
# Quick fix to indexing
annotations.reset_index(inplace=True, drop=True)
return annotations
25 changes: 25 additions & 0 deletions PyHa/tweetynet_package/tweetynet/CustomAudioDataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from torch.utils.data import Dataset

class CustomAudioDataset(Dataset):
def __init__(self, spec, annotations, uids, transform=None, target_transform=None):
self.img_labels = annotations
self.spec = spec
self.uids = uids
self.transform = transform
self.target_transform = target_transform

def __len__(self):
return len(self.img_labels)

def __getitem__(self, idx):
#img_path = os.path.join(self.img_dir, self.img_labels[idx, 0])
#Read_audio
image = self.spec[idx]#read_image(img_path)
label = self.img_labels[idx]
# transform should be spectrogram from librosa # unless we do that beforehand.
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label, self.uids[idx]

Loading