Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor timestamps managing #210

Merged
merged 25 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ebd8fe6
rename build files test
Gautzilla Oct 15, 2024
a2de7e3
add unit tests for audio file check util
Gautzilla Oct 15, 2024
59dc323
move audio file tests to specific test module
Gautzilla Oct 15, 2024
0dc28fc
add datetime template validation function
Gautzilla Oct 15, 2024
e9a3406
add datetime regex builder
Gautzilla Oct 15, 2024
08390ef
add extract timestamp from filename with integration tests
Gautzilla Oct 15, 2024
e99ec18
add tests ids and check exception messages
Gautzilla Oct 16, 2024
cc9b5fd
move regex dictionary to module constants
Gautzilla Oct 16, 2024
08e9f5d
add docstrings
Gautzilla Oct 16, 2024
45b577c
add associate_timestamps function
Gautzilla Oct 16, 2024
4c746ae
add strftime to osmose format
Gautzilla Oct 17, 2024
6350e9c
use string interpolation in test result
Gautzilla Oct 17, 2024
ed21535
backwards compatibility of timestamp.csv labels
Gautzilla Oct 17, 2024
cc4fe1b
use pandas.to_datetime for strptime operations
Gautzilla Oct 17, 2024
c8325e3
support timezone in strptime
Gautzilla Oct 17, 2024
879a159
use new timestamps utils in build method
Gautzilla Oct 17, 2024
d6f6887
remove obsolete test module
Gautzilla Oct 17, 2024
eec9a13
format with black
Gautzilla Oct 17, 2024
43d20b6
fix missing imports
Gautzilla Oct 17, 2024
015454a
resolve tests warnings
Gautzilla Oct 17, 2024
35f124a
resolve regex escape warnings
Gautzilla Oct 17, 2024
9c215aa
rename filename to text in strptime parser util
Gautzilla Oct 17, 2024
bcbc890
Merge branch 'main' into refactor/audio_files_build
Gautzilla Oct 18, 2024
71f8c37
Merge branch 'main' into refactor/audio_files_build
mathieudpnt Oct 22, 2024
d1ee2d8
fix typo
Gautzilla Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions src/OSmOSE/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
import pandas as pd
import numpy as np
from tqdm import tqdm
from OSmOSE.utils.timestamp_utils import check_epoch
from OSmOSE.utils.audio_utils import get_all_audio_files
from OSmOSE.utils.timestamp_utils import (
check_epoch,
associate_timestamps,
strftime_osmose_format,
)
from OSmOSE.utils.core_utils import check_n_files, set_umask, chmod_if_needed
from OSmOSE.utils.path_utils import make_path
from OSmOSE.timestamps import write_timestamp
from OSmOSE.config import DPDEFAULT, FPDEFAULT, OSMOSE_PATH, TIMESTAMP_FORMAT_AUDIO_FILE


Expand Down Expand Up @@ -354,20 +358,18 @@ def build(
if original_folder is not None
else self._find_or_create_original_folder()
)
path_timestamp_formatted = path_raw_audio.joinpath("timestamp.csv")

resume_test_anomalies = path_raw_audio.joinpath("resume_test_anomalies.txt")
path_timestamp_formatted = path_raw_audio.joinpath("timestamp.csv")
user_timestamp = (
path_timestamp_formatted.exists()
) # TODO: Formatting audio files beforehand will make this obsolete

if not path_timestamp_formatted.exists():
user_timestamp = False
write_timestamp(
audio_path=path_raw_audio,
date_template=date_template,
timezone=self.timezone,
verbose=False,
if not user_timestamp:
self._write_timestamp_csv_from_audio_files(
audio_path=path_raw_audio, date_template=date_template
)
else:
user_timestamp = True

resume_test_anomalies = path_raw_audio.joinpath("resume_test_anomalies.txt")

# read the timestamp.csv file
timestamp_csv = pd.read_csv(path_timestamp_formatted)["timestamp"].values
Expand Down Expand Up @@ -655,6 +657,21 @@ def build(

print("\n DONE ! your dataset is on OSmOSE platform !")

def _write_timestamp_csv_from_audio_files(
self, audio_path: Path, date_template: str
):
supported_audio_files = [file.name for file in get_all_audio_files(audio_path)]
filenames_with_timestamps = associate_timestamps(
audio_files=supported_audio_files, datetime_template=date_template
)
filenames_with_timestamps["timestamp"] = filenames_with_timestamps[
"timestamp"
].apply(lambda t: strftime_osmose_format(t))
filenames_with_timestamps.to_csv(
audio_path / "timestamp.csv", index=False, na_rep="NaN"
)
os.chmod(audio_path / "timestamp.csv", mode=FPDEFAULT)

def _format_timestamp(
self,
cur_timestamp_not_formatted: str,
Expand Down
2 changes: 0 additions & 2 deletions src/OSmOSE/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from OSmOSE.Dataset import Dataset
from OSmOSE.timestamps import write_timestamp
from OSmOSE.job import Job_builder
from OSmOSE.Spectrogram import Spectrogram
import OSmOSE.utils as utils
Expand All @@ -8,7 +7,6 @@
__all__ = [
"Auxiliary",
"Dataset",
"write_timestamp",
"Job_builder",
"Spectrogram",
"utils",
Expand Down
182 changes: 0 additions & 182 deletions src/OSmOSE/timestamps.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/OSmOSE/utils/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path


def is_audio(filename: Path) -> bool:
def is_supported_audio_format(filename: Path) -> bool:
"""
Check if a given file is a supported audio file based on its extension.

Expand All @@ -17,7 +17,7 @@ def is_audio(filename: Path) -> bool:
True if the file has an extension that matches a supported audio format,
False otherwise.
"""
return filename.suffix in SUPPORTED_AUDIO_FORMAT
return filename.suffix.lower() in SUPPORTED_AUDIO_FORMAT


def get_all_audio_files(directory: Path) -> list[Path]:
Expand Down
Loading
Loading