Skip to content

Commit

Permalink
refactor timestamps managing (#210)
Browse files Browse the repository at this point in the history
* rename build files test

* add unit tests for audio file check util

* move audio file tests to specific test module

* add datetime template validation function

* add datetime regex builder

* add extract timestamp from filename with integration tests

* add tests ids and check exception messages

* move regex dictionary to module constants

* add docstrings

* add associate_timestamps function

* add strftime to osmose format

* use string interpolation in test result

* backwards compatibility of timestamp.csv labels

* use pandas.to_datetime for strptime operations

* support timezone in strptime

* use new timestamps utils in build method

* remove obsolete test module

* format with black

* rename filename to text in strptime parser util
---------

Co-authored-by: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com>
  • Loading branch information
Gautzilla and mathieudpnt authored Oct 22, 2024
1 parent 0eb129c commit 4138c80
Show file tree
Hide file tree
Showing 8 changed files with 574 additions and 222 deletions.
43 changes: 30 additions & 13 deletions src/OSmOSE/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
import pandas as pd
import numpy as np
from tqdm import tqdm
from OSmOSE.utils.timestamp_utils import check_epoch
from OSmOSE.utils.audio_utils import get_all_audio_files
from OSmOSE.utils.timestamp_utils import (
check_epoch,
associate_timestamps,
strftime_osmose_format,
)
from OSmOSE.utils.core_utils import check_n_files, set_umask, chmod_if_needed
from OSmOSE.utils.path_utils import make_path
from OSmOSE.timestamps import write_timestamp
from OSmOSE.config import DPDEFAULT, FPDEFAULT, OSMOSE_PATH, TIMESTAMP_FORMAT_AUDIO_FILE


Expand Down Expand Up @@ -354,20 +358,18 @@ def build(
if original_folder is not None
else self._find_or_create_original_folder()
)
path_timestamp_formatted = path_raw_audio.joinpath("timestamp.csv")

resume_test_anomalies = path_raw_audio.joinpath("resume_test_anomalies.txt")
path_timestamp_formatted = path_raw_audio.joinpath("timestamp.csv")
user_timestamp = (
path_timestamp_formatted.exists()
) # TODO: Formatting audio files beforehand will make this obsolete

if not path_timestamp_formatted.exists():
user_timestamp = False
write_timestamp(
audio_path=path_raw_audio,
date_template=date_template,
timezone=self.timezone,
verbose=False,
if not user_timestamp:
self._write_timestamp_csv_from_audio_files(
audio_path=path_raw_audio, date_template=date_template
)
else:
user_timestamp = True

resume_test_anomalies = path_raw_audio.joinpath("resume_test_anomalies.txt")

# read the timestamp.csv file
timestamp_csv = pd.read_csv(path_timestamp_formatted)["timestamp"].values
Expand Down Expand Up @@ -655,6 +657,21 @@ def build(

print("\n DONE ! your dataset is on OSmOSE platform !")

def _write_timestamp_csv_from_audio_files(
self, audio_path: Path, date_template: str
):
supported_audio_files = [file.name for file in get_all_audio_files(audio_path)]
filenames_with_timestamps = associate_timestamps(
audio_files=supported_audio_files, datetime_template=date_template
)
filenames_with_timestamps["timestamp"] = filenames_with_timestamps[
"timestamp"
].apply(lambda t: strftime_osmose_format(t))
filenames_with_timestamps.to_csv(
audio_path / "timestamp.csv", index=False, na_rep="NaN"
)
os.chmod(audio_path / "timestamp.csv", mode=FPDEFAULT)

def _format_timestamp(
self,
cur_timestamp_not_formatted: str,
Expand Down
2 changes: 0 additions & 2 deletions src/OSmOSE/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from OSmOSE.Dataset import Dataset
from OSmOSE.timestamps import write_timestamp
from OSmOSE.job import Job_builder
from OSmOSE.Spectrogram import Spectrogram
import OSmOSE.utils as utils
Expand All @@ -8,7 +7,6 @@
__all__ = [
"Auxiliary",
"Dataset",
"write_timestamp",
"Job_builder",
"Spectrogram",
"utils",
Expand Down
182 changes: 0 additions & 182 deletions src/OSmOSE/timestamps.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/OSmOSE/utils/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path


def is_audio(filename: Path) -> bool:
def is_supported_audio_format(filename: Path) -> bool:
"""
Check if a given file is a supported audio file based on its extension.
Expand All @@ -17,7 +17,7 @@ def is_audio(filename: Path) -> bool:
True if the file has an extension that matches a supported audio format,
False otherwise.
"""
return filename.suffix in SUPPORTED_AUDIO_FORMAT
return filename.suffix.lower() in SUPPORTED_AUDIO_FORMAT


def get_all_audio_files(directory: Path) -> list[Path]:
Expand Down
Loading

0 comments on commit 4138c80

Please sign in to comment.