From 95aa414a6bc62b4c27ec87538018a1386d21650c Mon Sep 17 00:00:00 2001 From: blissful Date: Thu, 12 Oct 2023 20:59:17 -0400 Subject: [PATCH] Optimize the cache updater to minimize disk accesses The bottleneck is disk accesses, so we rewrote the updater to to minimize them. We trade higher memory for reduced disk accesses. We: 1. Execute one big SQL query at the start to fetch the relevant previous caches. 2. Skip reading a file's data if the mtime has not changed since the previous cache update. 3. Only execute a SQLite upsert if the read data differ from the previous caches. With these optimizations, we make a lot of readdir and stat calls, but minimize file and database accesses to solely the files that have updated since the last cache run. --- README.md | 5 +- conftest.py | 68 +- rose/cache.py | 810 ++++++++++++------ rose/cache.sql | 17 +- rose/cache_test.py | 217 ++++- testdata/cache/Test Release 1/ignorethis.file | 0 .../Test Release 2/.rose.ilovecarly.toml | 1 + testdata/cache/Test Release 2/.rose.toml | 2 - 8 files changed, 809 insertions(+), 311 deletions(-) create mode 100644 testdata/cache/Test Release 1/ignorethis.file create mode 100644 testdata/cache/Test Release 2/.rose.ilovecarly.toml delete mode 100644 testdata/cache/Test Release 2/.rose.toml diff --git a/README.md b/README.md index 92f479f..c41b34f 100644 --- a/README.md +++ b/README.md @@ -145,12 +145,15 @@ must be top-level directories inside `music_source_dir`. Each album should also be a single directory in `music_source_dir`. Every directory should follow the format: `$music_source_dir/$album_name/$track.mp3`. +Additional nested directories are not currently supported. So for example: `$music_source_dir/BLACKPINK - 2016. SQUARE ONE/*.mp3`. +Currently, + ## Filetypes -Rosé supports MP3, M4A, OGG, OPUS, and FLAC audio files. +Rosé supports `.mp3`, `.m4a`, `.ogg` (vorbis), `.opus`, and `.flac` audio files. Rosé also supports JPEG and PNG cover art. The supported cover art file stems are `cover`, `folder`, and `art`. The supported cover art file extensions are diff --git a/conftest.py b/conftest.py index 9ea2a84..346928a 100644 --- a/conftest.py +++ b/conftest.py @@ -71,37 +71,43 @@ def seeded_cache(config: Config) -> None: with sqlite3.connect(config.cache_database_path) as conn: conn.executescript( f"""\ -INSERT INTO releases (id, source_path, cover_image_path, virtual_dirname, title, release_type, release_year, new) -VALUES ('r1', '{dirpaths[0]}', null, 'r1', 'Release 1', 'album', 2023, true) - , ('r2', '{dirpaths[1]}', '{imagepaths[0]}', 'r2', 'Release 2', 'album', 2021, false); - -INSERT INTO releases_genres (release_id, genre, genre_sanitized) -VALUES ('r1', 'Techno', 'Techno') - , ('r1', 'Deep House', 'Deep House') - , ('r2', 'Classical', 'Classical'); - -INSERT INTO releases_labels (release_id, label, label_sanitized) -VALUES ('r1', 'Silk Music', 'Silk Music') - , ('r2', 'Native State', 'Native State'); - -INSERT INTO tracks (id, source_path, virtual_filename, title, release_id, track_number, disc_number, duration_seconds) -VALUES ('t1', '{musicpaths[0]}', '01.m4a', 'Track 1', 'r1', '01', '01', 120) - , ('t2', '{musicpaths[1]}', '02.m4a', 'Track 2', 'r1', '02', '01', 240) - , ('t3', '{musicpaths[2]}', '01.m4a', 'Track 1', 'r2', '01', '01', 120); - -INSERT INTO releases_artists (release_id, artist, artist_sanitized, role) -VALUES ('r1', 'Techno Man', 'Techno Man', 'main') - , ('r1', 'Bass Man', 'Bass Man', 'main') - , ('r2', 'Violin Woman', 'Violin Woman', 'main') - , ('r2', 'Conductor Woman', 'Conductor Woman', 'guest'); - -INSERT INTO tracks_artists (track_id, artist, artist_sanitized, role) -VALUES ('t1', 'Techno Man', 'Techno Man', 'main') - , ('t1', 'Bass Man', 'Bass Man', 'main') - , ('t2', 'Techno Man', 'Techno Man', 'main') - , ('t2', 'Bass Man', 'Bass Man', 'main') - , ('t3', 'Violin Woman', 'Violin Woman', 'main') - , ('t3', 'Conductor Woman', 'Conductor Woman', 'guest'); +INSERT INTO releases + (id , source_path , cover_image_path , datafile_mtime, virtual_dirname, title , release_type, release_year, multidisc, new , formatted_artists) +VALUES ('r1', '{dirpaths[0]}', null , '999' , 'r1' , 'Release 1', 'album' , 2023 , false , true , 'Techno Man;Bass Man') + , ('r2', '{dirpaths[1]}', '{imagepaths[0]}', '999' , 'r2' , 'Release 2', 'album' , 2021 , false , false, 'Violin Woman feat. Conductor Woman'); + +INSERT INTO releases_genres + (release_id, genre , genre_sanitized) +VALUES ('r1' , 'Techno' , 'Techno') + , ('r1' , 'Deep House', 'Deep House') + , ('r2' , 'Classical' , 'Classical'); + +INSERT INTO releases_labels + (release_id, label , label_sanitized) +VALUES ('r1' , 'Silk Music' , 'Silk Music') + , ('r2' , 'Native State', 'Native State'); + +INSERT INTO tracks + (id , source_path , source_mtime, virtual_filename, title , release_id, track_number, disc_number, duration_seconds, formatted_artists) +VALUES ('t1', '{musicpaths[0]}', '999' , '01.m4a' , 'Track 1', 'r1' , '01' , '01' , 120 , 'Techno Man;Bass Man') + , ('t2', '{musicpaths[1]}', '999' , '02.m4a' , 'Track 2', 'r1' , '02' , '01' , 240 , 'Techno Man;Bass Man') + , ('t3', '{musicpaths[2]}', '999' , '01.m4a' , 'Track 1', 'r2' , '01' , '01' , 120 , 'Violin Woman feat. Conductor Woman'); + +INSERT INTO releases_artists + (release_id, artist , artist_sanitized , role) +VALUES ('r1' , 'Techno Man' , 'Techno Man' , 'main') + , ('r1' , 'Bass Man' , 'Bass Man' , 'main') + , ('r2' , 'Violin Woman' , 'Violin Woman' , 'main') + , ('r2' , 'Conductor Woman', 'Conductor Woman', 'guest'); + +INSERT INTO tracks_artists + (track_id, artist , artist_sanitized , role) +VALUES ('t1' , 'Techno Man' , 'Techno Man' , 'main') + , ('t1' , 'Bass Man' , 'Bass Man' , 'main') + , ('t2' , 'Techno Man' , 'Techno Man' , 'main') + , ('t2' , 'Bass Man' , 'Bass Man' , 'main') + , ('t3' , 'Violin Woman' , 'Violin Woman' , 'main') + , ('t3' , 'Conductor Woman', 'Conductor Woman', 'guest'); """ # noqa: E501 ) diff --git a/rose/cache.py b/rose/cache.py index bc84879..6d51b00 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -3,6 +3,7 @@ import logging import os import random +import re import sqlite3 import time from collections.abc import Iterator @@ -119,21 +120,25 @@ class CachedArtist: class CachedRelease: id: str source_path: Path + datafile_mtime: str cover_image_path: Path | None virtual_dirname: str title: str - release_type: str - release_year: int | None + type: str + year: int | None new: bool + multidisc: bool genres: list[str] labels: list[str] artists: list[CachedArtist] + formatted_artists: str @dataclass class CachedTrack: id: str source_path: Path + source_mtime: str virtual_filename: str title: str release_id: str @@ -142,6 +147,12 @@ class CachedTrack: duration_seconds: int artists: list[CachedArtist] + formatted_artists: str + + +@dataclass +class StoredDataFile: + new: bool VALID_COVER_FILENAMES = [ @@ -170,16 +181,23 @@ class CachedTrack: "unknown", ] +STORED_DATA_FILE_REGEX = re.compile(r"\.rose\.([^.]+)\.toml") + def update_cache_for_all_releases(c: Config) -> None: """ - Process and update the cache for all releases. Delete any nonexistent releases. + Update the read cache to match the data for all releases in the music source directory. Delete + any cached releases that are no longer present on disk. """ dirs = [Path(d.path).resolve() for d in os.scandir(c.music_source_dir) if d.is_dir()] logger.info(f"Found {len(dirs)} releases to update") - for d in dirs: - update_cache_for_release(c, d) + update_cache_for_releases(c, dirs) + update_cache_delete_nonexistent_releases(c) + + +def update_cache_delete_nonexistent_releases(c: Config) -> None: logger.info("Deleting cached releases that are not on disk") + dirs = [Path(d.path).resolve() for d in os.scandir(c.music_source_dir) if d.is_dir()] with connect(c) as conn: conn.execute( f""" @@ -190,146 +208,527 @@ def update_cache_for_all_releases(c: Config) -> None: ) -def update_cache_for_release(c: Config, release_dir: Path) -> None: +def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: """ - Given a release's directory, update the cache entry based on the release's metadata. If this is - a new release or track, update the directory and file names to include the UUIDs. + Update the read cache to match the data for any passed-in releases. If a directory lacks a + .rose.{uuid}.toml datafile, create the datafile for the release and set it to the initial state. + + This is a hot path and is thus performance-optimized. The bottleneck is disk accesses, so we + structure this function in order to minimize them. We trade higher memory for reduced disk + accesses. We: - Returns the new release_dir if a rename occurred; otherwise, returns the same release_dir. + 1. Execute one big SQL query at the start to fetch the relevant previous caches. + 2. Skip reading a file's data if the mtime has not changed since the previous cache update. + 3. Only execute a SQLite upsert if the read data differ from the previous caches. + + With these optimizations, we make a lot of readdir and stat calls, but minimize file and + database accesses to solely the files that have updated since the last cache run. """ - logger.info(f"Refreshing cached data for {release_dir.name}") - with connect(c) as conn, transaction(conn) as conn: - # The release will be updated based on the album tags of the first track. - release: CachedRelease | None = None - # But first, parse the release_id from the directory name. If the directory name does not - # contain a release_id, generate one and rename the directory. - stored_release_data = _read_stored_data_file(release_dir) - if not stored_release_data: - stored_release_data = _create_stored_data_file(release_dir) - - # Fetch all track tags from disk. - track_tags: list[tuple[os.DirEntry[str], AudioFile]] = [] - for f in os.scandir(release_dir): - # Skip non-music files. - if any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): - track_tags.append((f, AudioFile.from_file(Path(f.path)))) - - # Calculate whether this is a multidisc release or not. This will affect the virtual - # filename formatting. - multidisc = len({t.disc_number for _, t in track_tags}) > 1 - - for f, tags in track_tags: - # If this is the first track, upsert the release. - if release is None: - logger.debug("Upserting release from first track's tags") - - # Compute the album's visual directory name. - virtual_dirname = format_artist_string(tags.album_artists, tags.genre) + " - " - if tags.year: - virtual_dirname += str(tags.year) + ". " - virtual_dirname += tags.album or "Unknown Release" - if ( - tags.release_type - and tags.release_type.lower() in SUPPORTED_RELEASE_TYPES - and tags.release_type not in ["album", "unknown"] - ): - virtual_dirname += " - " + tags.release_type.title() - if tags.genre: - virtual_dirname += " [" + ";".join(tags.genre) + "]" - if tags.label: - virtual_dirname += " {" + ";".join(tags.label) + "}" - if stored_release_data.new: - virtual_dirname += " +NEW!+" - virtual_dirname = sanitize_filename(virtual_dirname) - # And in case of a name collision, add an extra number at the end. Iterate to find - # the first unused number. - original_virtual_dirname = virtual_dirname - collision_no = 1 - while True: - collision_no += 1 - cursor = conn.execute( - """ - SELECT EXISTS( - SELECT * FROM releases WHERE virtual_dirname = ? AND id <> ? - ) - """, - (virtual_dirname, stored_release_data.uuid), - ) - if not cursor.fetchone()[0]: - break - virtual_dirname = f"{original_virtual_dirname} [{collision_no}]" - - # Search for cover art. - cover_image_path = None - for cn in VALID_COVER_FILENAMES: - p = release_dir / cn - if p.is_file(): - cover_image_path = p.resolve() - break + logger.info(f"Refreshing cached data for {', '.join([r.name for r in release_dirs])}") + + # First, call readdir on every release directory. We store the results in a map of + # Path Basename -> (Release ID if exists, File DirEntries). + dir_tree: list[tuple[Path, str | None, list[os.DirEntry[str]]]] = [] + release_uuids: list[str] = [] + for rd in release_dirs: + release_id = None + files: list[os.DirEntry[str]] = [] + for f in os.scandir(str(rd)): + if m := STORED_DATA_FILE_REGEX.match(f.name): + release_id = m[1] + elif any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): + files.append(f) + dir_tree.append((rd.resolve(), release_id, files)) + if release_id is not None: + release_uuids.append(release_id) + + # Then batch query for all metadata associated with the discovered IDs. This pulls all data into + # memory for fast access throughout this function. We do this in two passes (and two queries!): + # 1. Fetch all releases. + # 2. Fetch all tracks in a single query, and then associates each track with a release. + # The tracks are stored as a dict of source_path -> Track. + cached_releases: dict[str, tuple[CachedRelease, dict[str, CachedTrack]]] = {} + with connect(c) as conn: + cursor = conn.execute( + rf""" + WITH genres AS ( + SELECT + release_id, + GROUP_CONCAT(genre, ' \\ ') AS genres + FROM releases_genres + GROUP BY release_id + ), labels AS ( + SELECT + release_id, + GROUP_CONCAT(label, ' \\ ') AS labels + FROM releases_labels + GROUP BY release_id + ), artists AS ( + SELECT + release_id, + GROUP_CONCAT(artist, ' \\ ') AS names, + GROUP_CONCAT(role, ' \\ ') AS roles + FROM releases_artists + GROUP BY release_id + ) + SELECT + r.id + , r.source_path + , r.cover_image_path + , r.datafile_mtime + , r.virtual_dirname + , r.title + , r.release_type + , r.release_year + , r.multidisc + , r.new + , r.formatted_artists + , COALESCE(g.genres, '') AS genres + , COALESCE(l.labels, '') AS labels + , COALESCE(a.names, '') AS artist_names + , COALESCE(a.roles, '') AS artist_roles + FROM releases r + LEFT JOIN genres g ON g.release_id = r.id + LEFT JOIN labels l ON l.release_id = r.id + LEFT JOIN artists a ON a.release_id = r.id + WHERE r.id IN ({','.join(['?'*len(release_uuids)])}) + """, + release_uuids, + ) + for row in cursor: + release_artists: list[CachedArtist] = [] + for n, r in zip(row["artist_names"].split(r" \\ "), row["artist_roles"].split(r" \\ ")): + release_artists.append(CachedArtist(name=n, role=r)) + cached_releases[row["id"]] = ( + CachedRelease( + id=row["id"], + source_path=Path(row["source_path"]), + cover_image_path=Path(row["cover_image_path"]) + if row["cover_image_path"] + else None, + datafile_mtime=row["datafile_mtime"], + virtual_dirname=row["virtual_dirname"], + title=row["title"], + type=row["release_type"], + year=row["release_year"], + multidisc=bool(row["multidisc"]), + new=bool(row["new"]), + genres=row["genres"].split(r" \\ "), + labels=row["labels"].split(r" \\ "), + artists=release_artists, + formatted_artists=row["formatted_artists"], + ), + {}, + ) + + logger.debug(f"Found {len(cached_releases)}/{len(release_dirs)} releases in cache") + + cursor = conn.execute( + rf""" + WITH artists AS ( + SELECT + track_id, + GROUP_CONCAT(artist, ' \\ ') AS names, + GROUP_CONCAT(role, ' \\ ') AS roles + FROM tracks_artists + GROUP BY track_id + ) + SELECT + t.id + , t.source_path + , t.source_mtime + , t.virtual_filename + , t.title + , t.release_id + , t.track_number + , t.disc_number + , t.duration_seconds + , r.formatted_artists + , COALESCE(a.names, '') AS artist_names + , COALESCE(a.roles, '') AS artist_roles + FROM tracks t + JOIN releases r ON r.id = t.release_id + LEFT JOIN artists a ON a.track_id = t.id + WHERE r.id IN ({','.join(['?'*len(release_uuids)])}) + """, + release_uuids, + ) + num_tracks_found = 0 + for row in cursor: + track_artists: list[CachedArtist] = [] + for n, r in zip(row["artist_names"].split(r" \\ "), row["artist_roles"].split(r" \\ ")): + track_artists.append(CachedArtist(name=n, role=r)) + cached_releases[row["release_id"]][1][row["source_path"]] = CachedTrack( + id=row["id"], + source_path=Path(row["source_path"]), + source_mtime=row["source_mtime"], + virtual_filename=row["virtual_filename"], + title=row["title"], + release_id=row["release_id"], + track_number=row["track_number"], + disc_number=row["disc_number"], + duration_seconds=row["duration_seconds"], + artists=track_artists, + formatted_artists=row["formatted_artists"], + ) + num_tracks_found += 1 + + logger.debug(f"Found {num_tracks_found} tracks in cache") - # Construct the cached release. - release = CachedRelease( - id=stored_release_data.uuid, - source_path=release_dir.resolve(), - cover_image_path=cover_image_path, - virtual_dirname=virtual_dirname, - title=tags.album or "Unknown Release", - release_type=( + # Now iterate over all releases in the source directory. Leverage mtime from stat to determine + # whether to even check the file tags or not. Only perform database updates if necessary. + for source_path, preexisting_release_id, files in dir_tree: + logger.debug( + f"Processing release {source_path} with {len(files)} " + f"files and preexisting id {preexisting_release_id}" + ) + # Check to see if we should even process the directory. If the directory does not have any + # tracks, skip it. And if it does not have any tracks, but is in the cache, remove it from + # the cache. + for f in files: + if any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): + break + else: + logger.debug(f"Did not find any audio files in release {source_path}, skipping") + logger.debug(f"Running cache deletion for empty directory release {source_path}") + with connect(c) as conn: + conn.execute("DELETE FROM releases where source_path = ?", (str(source_path),)) + continue + + # This value is used to track whether to update the database for this release. If this is + # False at the end of this loop body, we can save a database update call. + release_dirty = False + + # Fetch the release from the cache. We will be updating this value on-the-fly, so + # instantiate to zero values if we do not have a default value. + try: + release, cached_tracks = cached_releases[preexisting_release_id or ""] + except KeyError: + logger.debug( + f"First-time unidentified release found at release {source_path}, " + "writing UUID and new" + ) + release_dirty = True + release = CachedRelease( + id=preexisting_release_id or "", + source_path=source_path, + datafile_mtime="", + cover_image_path=None, + virtual_dirname="", + title="", + type="", + year=None, + new=True, + multidisc=False, + genres=[], + labels=[], + artists=[], + formatted_artists="", + ) + cached_tracks = {} + + # Handle source path change; if it's changed, update the release. + if source_path != release.source_path: + logger.debug(f"Source path change detected for release {source_path}, updating") + release.source_path = source_path + release_dirty = True + + # The directory does not have a release ID, so create the stored data file. + if not preexisting_release_id: + logger.debug(f"Creating new stored data file for release {source_path}") + stored_release_data = StoredDataFile(new=True) + new_release_id = str(uuid6.uuid7()) + datafile_path = source_path / f".rose.{new_release_id}.toml" + with datafile_path.open("wb") as fp: + tomli_w.dump(asdict(stored_release_data), fp) + release.id = new_release_id + release.new = stored_release_data.new + release.datafile_mtime = str(os.stat(datafile_path).st_mtime) + release_dirty = True + else: + # Otherwise, check to see if the mtime changed from what we know. If it has, read from + # the datafile. + datafile_path = source_path / f".rose.{preexisting_release_id}.toml" + datafile_mtime = str(os.stat(datafile_path).st_mtime) + if datafile_mtime != release.datafile_mtime: + logger.debug(f"Datafile mtime changed for release {source_path}, updating") + release.datafile_mtime = datafile_mtime + release_dirty = True + with datafile_path.open("rb") as fp: + diskdata = tomllib.load(fp) + datafile = StoredDataFile(new=diskdata.get("new", True)) + release.new = datafile.new + # And then write the data back to disk if it changed. This allows us to update + # datafiles to contain newer default values. + new_resolved_data = asdict(datafile) + if new_resolved_data != diskdata: + logger.debug(f"Updating values in stored data file for release {source_path}") + with datafile_path.open("wb") as fp: + tomli_w.dump(new_resolved_data, fp) + + # Handle cover art change. + try: + cover = next(Path(f.path).resolve() for f in files if f.name in VALID_COVER_FILENAMES) + except StopIteration: # No cover art in directory. + cover = None + if cover != release.cover_image_path: + logger.debug(f"Cover art file for release {source_path} updated to path {cover}") + release.cover_image_path = cover + release_dirty = True + + # Now we'll switch over to processing some of the tracks. We need track metadata in order to + # calculate some fields of the release, so we'll first compute the valid set of + # CachedTracks, and then we will finalize the release and execute any required database + # operations for the release and tracks. + + # We want to know which cached tracks are no longer on disk. By the end of the following + # loop, this set should only contain the such tracks, which will be deleted in the database + # execution handling step. + unknown_cached_tracks: set[str] = set(cached_tracks.keys()) + # Next, we will construct the list of tracks that are on the release. We will also leverage + # mtimes and such to avoid unnecessary recomputations. If a release has changed and should + # be updated in the database, we add its ID to track_ids_to_insert, which will be used in + # the database execution step. + # + # Note that we do NOT calculate the virtual_filename in this loop, because we need to know + # whether the release is multidisc to do that. But we only know whether a release is + # multidisc after having all the track metadata. So we do virtual_dirname calculation in a + # follow-up loop. + tracks: list[CachedTrack] = [] + track_ids_to_upsert: set[str] = set() + # This value is set to true if we read an AudioFile and used it to confirm the release tags. + # If this value is false after the following loop, we will use the cached values instead. + pulled_release_tags = False + with connect(c) as conn, transaction(conn) as conn: + for f in files: + if not any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): + continue + track_path = Path(f.path).resolve() + cached_track = cached_tracks.get(str(track_path), None) + track_mtime = str(os.stat(track_path).st_mtime) + # Skip re-read if we can reuse a cached entry. + if cached_track and track_mtime == cached_track.source_mtime: + logger.debug(f"Track cache hit (mtime) for {f}, reusing cached data") + tracks.append(cached_track) + unknown_cached_tracks.remove(str(track_path)) + continue + + # Otherwise, read tags from disk and construct a new cached_track. + logger.debug(f"Track cache miss for {f}, reading tags from disk") + tags = AudioFile.from_file(track_path) + + # Now that we're here, pull the release tags. We also need them to compute the + # formatted artist string. + if not pulled_release_tags: + release_title = tags.album or "Unknown Release" + if release_title != release.title: + logger.debug(f"Release title change detected for {source_path}, updating") + release.title = release_title + release_dirty = True + + release_type = ( tags.release_type.lower() if tags.release_type and tags.release_type.lower() in SUPPORTED_RELEASE_TYPES else "unknown" - ), - release_year=tags.year, - new=True, - genres=tags.genre, - labels=tags.label, + ) + if release_type != release.type: + logger.debug(f"Release type change detected for {source_path}, updating") + release.type = release_type + release_dirty = True + + if tags.year != release.year: + logger.debug(f"Release year change detected for {source_path}, updating") + release.year = tags.year + release_dirty = True + + if set(tags.genre) != set(release.genres): + logger.debug(f"Release genre change detected for {source_path}, updating") + release.genres = tags.genre + release_dirty = True + + if set(tags.label) != set(release.labels): + logger.debug(f"Release label change detected for {source_path}, updating") + release.labels = tags.label + release_dirty = True + + release_artists = [] + for role, names in asdict(tags.album_artists).items(): + for name in names: + release_artists.append(CachedArtist(name=name, role=role)) + if release_artists != release.artists: + logger.debug(f"Release artists change detected for {source_path}, updating") + release.artists = release_artists + release_dirty = True + + release_formatted_artists = format_artist_string( + tags.album_artists, release.genres + ) + if release_formatted_artists != release.formatted_artists: + logger.debug( + f"Release formatted artists change detected for {source_path}, updating" + ) + release.formatted_artists = release_formatted_artists + release_dirty = True + + # Calculate the release's virtual dirname. + release_virtual_dirname = release.formatted_artists + "-" + if release.year: + release_virtual_dirname += str(release.year) + ". " + release_virtual_dirname += release.title + if release.type not in ["album", "unknown"]: + release_virtual_dirname += " - " + release.type.title() + if release.genres: + release_virtual_dirname += " [" + ";".join(release.genres) + "]" + if release.labels: + release_virtual_dirname += " {" + ";".join(release.labels) + "}" + if release.new: + release_virtual_dirname += " +NEW!+" + release_virtual_dirname = sanitize_filename(release_virtual_dirname) + # And in case of a name collision, add an extra number at the end. Iterate to + # find the first unused number. + original_virtual_dirname = release_virtual_dirname + collision_no = 1 + while True: + collision_no += 1 + cursor = conn.execute( + "SELECT EXISTS(SELECT * FROM releases WHERE virtual_dirname = ? AND id <> ?)", # noqa: E501 + (release_virtual_dirname, release.id), + ) + if not cursor.fetchone()[0]: + break + release_virtual_dirname = f"{original_virtual_dirname} [{collision_no}]" + + if release_virtual_dirname != release.virtual_dirname: + logger.debug( + f"Release virtual dirname change detected for {source_path}, updating" + ) + release.virtual_dirname = release_virtual_dirname + release_dirty = True + + # And now create the cached track. + track = CachedTrack( + id=str(uuid6.uuid7()), + source_path=track_path, + source_mtime=track_mtime, + virtual_filename="", + title=tags.title or "Unknown Title", + release_id=release.id, + track_number=tags.track_number or "1", + disc_number=tags.disc_number or "1", + duration_seconds=tags.duration_sec, artists=[], + formatted_artists=format_artist_string(tags.artists, release.genres), ) - for role, names in asdict(tags.album_artists).items(): + tracks.append(track) + for role, names in asdict(tags.artists).items(): for name in names: - release.artists.append(CachedArtist(name=name, role=role)) + track.artists.append(CachedArtist(name=name, role=role)) + track_ids_to_upsert.add(track.id) + + # Now calculate whether this release is multidisc, and then assign virtual_filenames for + # each track that lacks one. + multidisc = len({t.disc_number for t in tracks}) > 1 + if release.multidisc != multidisc: + logger.debug(f"Release multidisc change detected for {source_path}, updating") + release_dirty = True + release.multidisc = multidisc + # Use this set to avoid name collisions. + seen_track_names: set[str] = set() + for i, t in enumerate(tracks): + virtual_filename = "" + if multidisc and t.disc_number: + virtual_filename += f"{t.disc_number:0>2}-" + if t.track_number: + virtual_filename += f"{t.track_number:0>2}. " + virtual_filename += t.title or "Unknown Title" + if release.type in ["compilation", "soundtrack", "remix", "djmix", "mixtape"]: + virtual_filename += f" (by {t.formatted_artists})" + virtual_filename += t.source_path.suffix + virtual_filename = sanitize_filename(virtual_filename) + # And in case of a name collision, add an extra number at the end. Iterate to find + # the first unused number. + original_virtual_filename = virtual_filename + collision_no = 1 + while True: + collision_no += 1 + if virtual_filename not in seen_track_names: + break + virtual_filename = f"{original_virtual_filename} [{collision_no}]" + seen_track_names.add(virtual_filename) + if virtual_filename != t.virtual_filename: + tracks[i].virtual_filename = virtual_filename + track_ids_to_upsert.add(t.id) + + # Database executions. + logger.debug(f"Deleting {len(unknown_cached_tracks)} unknown tracks from cache") + conn.execute( + f""" + DELETE FROM tracks + WHERE release_id = ? + AND source_path IN ({','.join(['?']*len(unknown_cached_tracks))}) + """, + [release.id, *unknown_cached_tracks], + ) - # Upsert the release. + if release_dirty: + logger.debug(f"Upserting dirty release in database: {source_path}") conn.execute( """ - INSERT INTO releases - (id, source_path, cover_image_path, virtual_dirname, title, release_type, - release_year, new) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + INSERT INTO releases ( + id + , source_path + , cover_image_path + , datafile_mtime + , virtual_dirname + , title + , release_type + , release_year + , multidisc + , new + , formatted_artists + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT (id) DO UPDATE SET - source_path = ?, - cover_image_path = ?, - virtual_dirname = ?, - title = ?, - release_type = ?, - release_year = ?, - new = ? + source_path = ? + , cover_image_path = ? + , datafile_mtime = ? + , virtual_dirname = ? + , title = ? + , release_type = ? + , release_year = ? + , multidisc = ? + , new = ? + , formatted_artists = ? """, ( release.id, str(release.source_path), str(release.cover_image_path), + release.datafile_mtime, release.virtual_dirname, release.title, - release.release_type, - release.release_year, + release.type, + release.year, + release.multidisc, release.new, + release.formatted_artists, str(release.source_path), str(release.cover_image_path), + release.datafile_mtime, release.virtual_dirname, release.title, - release.release_type, - release.release_year, + release.type, + release.year, + release.multidisc, release.new, + release.formatted_artists, ), ) for genre in release.genres: conn.execute( """ INSERT INTO releases_genres (release_id, genre, genre_sanitized) - VALUES (?, ?, ?) - ON CONFLICT (release_id, genre) DO NOTHING + VALUES (?, ?, ?) ON CONFLICT (release_id, genre) DO NOTHING """, (release.id, genre, sanitize_filename(genre)), ) @@ -337,8 +736,7 @@ def update_cache_for_release(c: Config, release_dir: Path) -> None: conn.execute( """ INSERT INTO releases_labels (release_id, label, label_sanitized) - VALUES (?, ?, ?) - ON CONFLICT (release_id, label) DO NOTHING + VALUES (?, ?, ?) ON CONFLICT (release_id, label) DO NOTHING """, (release.id, label, sanitize_filename(label)), ) @@ -346,146 +744,56 @@ def update_cache_for_release(c: Config, release_dir: Path) -> None: conn.execute( """ INSERT INTO releases_artists (release_id, artist, artist_sanitized, role) - VALUES (?, ?, ?, ?) - ON CONFLICT (release_id, artist) DO UPDATE SET role = ? + VALUES (?, ?, ?, ?) ON CONFLICT (release_id, artist) DO UPDATE SET role = ? """, (release.id, art.name, sanitize_filename(art.name), art.role, art.role), ) - # Now process the track. Release is guaranteed to exist here. - filepath = Path(f.path) + for track in tracks: + if track.id not in track_ids_to_upsert: + continue - # Track ID is transient with the cache; we don't put it in any persistent stores. - cursor = conn.execute( - "SELECT id FROM tracks WHERE release_id = ? AND source_path = ?", - (release.id, str(filepath)), - ) - track_id = row["id"] if (row := cursor.fetchone()) else str(uuid6.uuid7()) - - virtual_filename = "" - if multidisc and tags.disc_number: - virtual_filename += f"{tags.disc_number:0>2}-" - if tags.track_number: - virtual_filename += f"{tags.track_number:0>2}. " - virtual_filename += tags.title or "Unknown Title" - if tags.release_type in ["compilation", "soundtrack", "remix", "djmix", "mixtape"]: - virtual_filename += " (by " + format_artist_string(tags.artists, tags.genre) + ")" - virtual_filename += filepath.suffix - virtual_filename = sanitize_filename(virtual_filename) - # And in case of a name collision, add an extra number at the end. Iterate to find - # the first unused number. - original_virtual_filename = virtual_filename - collision_no = 1 - while True: - collision_no += 1 - cursor = conn.execute( - """ - SELECT EXISTS( - SELECT * FROM tracks - WHERE virtual_filename = ? AND release_id = ? AND id <> ? - ) - """, - (virtual_filename, release.id, track_id), - ) - if not cursor.fetchone()[0]: - break - virtual_filename = f"{original_virtual_filename} [{collision_no}]" - - track = CachedTrack( - id=track_id, - source_path=filepath, - virtual_filename=virtual_filename, - title=tags.title or "Unknown Title", - release_id=release.id, - track_number=tags.track_number or "1", - disc_number=tags.disc_number or "1", - duration_seconds=tags.duration_sec, - artists=[], - ) - for role, names in asdict(tags.artists).items(): - for name in names: - track.artists.append(CachedArtist(name=name, role=role)) - conn.execute( - """ - INSERT INTO tracks - (id, source_path, virtual_filename, title, release_id, - track_number, disc_number, duration_seconds) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT (id) DO UPDATE SET - source_path = ?, - virtual_filename = ?, - title = ?, - release_id = ?, - track_number = ?, - disc_number = ?, - duration_seconds = ? - """, - ( - track.id, - str(track.source_path), - track.virtual_filename, - track.title, - track.release_id, - track.track_number, - track.disc_number, - track.duration_seconds, - str(track.source_path), - track.virtual_filename, - track.title, - track.release_id, - track.track_number, - track.disc_number, - track.duration_seconds, - ), - ) - for art in track.artists: + # There should never be an upsert case, because when a track goes bad, we delete it + # from the database. We don't update it in place. This is because we lack stable IDs + # for tracks across refreshes. + logger.debug(f"Inserting dirty track in database: {track.source_path}") conn.execute( """ - INSERT INTO tracks_artists (track_id, artist, artist_sanitized, role) - VALUES (?, ?, ?, ?) - ON CONFLICT (track_id, artist) DO UPDATE SET role = ? + INSERT INTO tracks ( + id + , source_path + , source_mtime + , virtual_filename + , title + , release_id + , track_number + , disc_number + , duration_seconds + , formatted_artists + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, - (track.id, art.name, sanitize_filename(art.name), art.role, art.role), + ( + track.id, + str(track.source_path), + track.source_mtime, + track.virtual_filename, + track.title, + track.release_id, + track.track_number, + track.disc_number, + track.duration_seconds, + track.formatted_artists, + ), ) - - -STORED_DATA_FILE_NAME = ".rose.toml" - - -@dataclass -class StoredDataFile: - uuid: str - new: bool - - -def _read_stored_data_file(path: Path) -> StoredDataFile | None: - for f in path.iterdir(): - if f.name == STORED_DATA_FILE_NAME: - logger.debug(f"Found stored data file for {path}") - with f.open("rb") as fp: - diskdata = tomllib.load(fp) - datafile = StoredDataFile( - uuid=diskdata.get("uuid", str(uuid6.uuid7())), - new=diskdata.get("new", True), - ) - resolveddata = asdict(datafile) - if resolveddata != diskdata: - logger.debug(f"Setting new default values in stored data file for {path}") - with f.open("wb") as fp: - tomli_w.dump(resolveddata, fp) - return datafile - return None - - -def _create_stored_data_file(path: Path) -> StoredDataFile: - logger.debug(f"Creating stored data file for {path}") - data = StoredDataFile( - uuid=str(uuid6.uuid7()), - new=True, - ) - with (path / ".rose.toml").open("wb") as fp: - tomli_w.dump(asdict(data), fp) - return data + for art in track.artists: + conn.execute( + """ + INSERT INTO tracks_artists (track_id, artist, artist_sanitized, role) + VALUES (?, ?, ?, ?) ON CONFLICT (track_id, artist) DO UPDATE SET role = ? + """, + (track.id, art.name, sanitize_filename(art.name), art.role, art.role), + ) def list_releases( @@ -520,11 +828,14 @@ def list_releases( r.id , r.source_path , r.cover_image_path + , r.datafile_mtime , r.virtual_dirname , r.title , r.release_type , r.release_year + , r.multidisc , r.new + , r.formatted_artists , COALESCE(g.genres, '') AS genres , COALESCE(l.labels, '') AS labels , COALESCE(a.names, '') AS artist_names @@ -570,14 +881,17 @@ def list_releases( id=row["id"], source_path=Path(row["source_path"]), cover_image_path=Path(row["cover_image_path"]) if row["cover_image_path"] else None, + datafile_mtime=row["datafile_mtime"], virtual_dirname=row["virtual_dirname"], title=row["title"], - release_type=row["release_type"], - release_year=row["release_year"], + type=row["release_type"], + year=row["release_year"], + multidisc=bool(row["multidisc"]), new=bool(row["new"]), genres=row["genres"].split(r" \\ "), labels=row["labels"].split(r" \\ "), artists=artists, + formatted_artists=row["formatted_artists"], ) @@ -604,12 +918,14 @@ def get_release_files(c: Config, release_virtual_dirname: str) -> ReleaseFiles: SELECT t.id , t.source_path + , t.source_mtime , t.virtual_filename , t.title , t.release_id , t.track_number , t.disc_number , t.duration_seconds + , t.formatted_artists , COALESCE(a.names, '') AS artist_names , COALESCE(a.roles, '') AS artist_roles FROM tracks t @@ -627,12 +943,14 @@ def get_release_files(c: Config, release_virtual_dirname: str) -> ReleaseFiles: CachedTrack( id=row["id"], source_path=Path(row["source_path"]), + source_mtime=row["source_mtime"], virtual_filename=row["virtual_filename"], title=row["title"], release_id=row["release_id"], track_number=row["track_number"], disc_number=row["disc_number"], duration_seconds=row["duration_seconds"], + formatted_artists=row["formatted_artists"], artists=artists, ) ) diff --git a/rose/cache.sql b/rose/cache.sql index 950f8fd..53af653 100644 --- a/rose/cache.sql +++ b/rose/cache.sql @@ -16,11 +16,16 @@ CREATE TABLE releases ( id TEXT PRIMARY KEY, source_path TEXT NOT NULL UNIQUE, cover_image_path TEXT, + datafile_mtime TEXT NOT NULL, virtual_dirname TEXT NOT NULL UNIQUE, title TEXT NOT NULL, release_type TEXT NOT NULL REFERENCES release_type_enum(value), release_year INTEGER, - new BOOLEAN NOT NULL DEFAULT true + multidisc BOOLEAN NOT NULL, + new BOOLEAN NOT NULL DEFAULT true, + -- This is its own state because ordering matters--we preserve the ordering in the tags. + -- However, the one-to-many table does not have ordering. + formatted_artists TEXT NOT NULL ); CREATE INDEX releases_source_path ON releases(source_path); CREATE INDEX releases_release_year ON releases(release_year); @@ -48,12 +53,16 @@ CREATE INDEX releases_labels_label_sanitized ON releases_labels(label_sanitized) CREATE TABLE tracks ( id TEXT PRIMARY KEY, source_path TEXT NOT NULL UNIQUE, + source_mtime TEXT NOT NULL, virtual_filename TEXT NOT NULL, title TEXT NOT NULL, release_id TEXT NOT NULL REFERENCES releases(id) ON DELETE CASCADE, track_number TEXT NOT NULL, disc_number TEXT NOT NULL, duration_seconds INTEGER NOT NULL, + -- This is its own state because ordering matters--we preserve the ordering in the tags. + -- However, the one-to-many table does not have ordering. + formatted_artists TEXT NOT NULL, UNIQUE (release_id, virtual_filename) ); CREATE INDEX tracks_source_path ON tracks(source_path); @@ -94,7 +103,8 @@ CREATE INDEX tracks_artists_artist_sanitized ON tracks_artists(artist_sanitized) CREATE TABLE collections ( id TEXT PRIMARY KEY, name TEXT NOT NULL, - source_path TEXT UNIQUE NOT NULL + source_path TEXT UNIQUE NOT NULL, + source_mtime TEXT NOT NULL ); CREATE INDEX collections_source_path ON collections(source_path); @@ -110,7 +120,8 @@ CREATE UNIQUE INDEX collections_releases_collection_position ON collections_rele CREATE TABLE playlists ( id TEXT PRIMARY KEY, name TEXT NOT NULL, - source_path TEXT UNIQUE NOT NULL + source_path TEXT UNIQUE NOT NULL, + source_mtime TEXT NOT NULL ); CREATE INDEX playlists_source_path ON playlists(source_path); diff --git a/rose/cache_test.py b/rose/cache_test.py index afd590e..13d6316 100644 --- a/rose/cache_test.py +++ b/rose/cache_test.py @@ -3,11 +3,10 @@ from pathlib import Path import pytest -import tomllib from rose.cache import ( CACHE_SCHEMA_PATH, - STORED_DATA_FILE_NAME, + STORED_DATA_FILE_REGEX, CachedArtist, CachedRelease, CachedTrack, @@ -24,8 +23,9 @@ migrate_database, release_exists, track_exists, + update_cache_delete_nonexistent_releases, update_cache_for_all_releases, - update_cache_for_release, + update_cache_for_releases, ) from rose.config import Config @@ -65,14 +65,13 @@ def test_migration(config: Config) -> None: def test_update_cache_for_release(config: Config) -> None: release_dir = config.music_source_dir / TEST_RELEASE_1.name shutil.copytree(TEST_RELEASE_1, release_dir) - update_cache_for_release(config, release_dir) + update_cache_for_releases(config, [release_dir]) # Check that the release directory was given a UUID. release_id: str | None = None for f in release_dir.iterdir(): - if f.name == STORED_DATA_FILE_NAME: - with f.open("rb") as fp: - release_id = tomllib.load(fp)["uuid"] + if m := STORED_DATA_FILE_REGEX.match(f.name): + release_id = m[1] assert release_id is not None # Assert that the release metadata was read correctly. @@ -157,22 +156,138 @@ def test_update_cache_for_release(config: Config) -> None: } -def test_update_cache_with_existing_id(config: Config) -> None: +def test_update_cache_uncached_release_with_existing_id(config: Config) -> None: """Test that IDs in filenames are read and preserved.""" release_dir = config.music_source_dir / TEST_RELEASE_2.name shutil.copytree(TEST_RELEASE_2, release_dir) - update_cache_for_release(config, release_dir) + update_cache_for_releases(config, [release_dir]) # Check that the release directory was given a UUID. release_id: str | None = None for f in release_dir.iterdir(): - if f.name == STORED_DATA_FILE_NAME: - with f.open("rb") as fp: - release_id = tomllib.load(fp)["uuid"] + if m := STORED_DATA_FILE_REGEX.match(f.name): + release_id = m[1] assert release_id == "ilovecarly" # Hardcoded ID for testing. +def test_update_cache_already_fully_cached_release(config: Config) -> None: + """Test that a fully cached release No Ops when updated again.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + update_cache_for_releases(config, [release_dir]) + update_cache_for_releases(config, [release_dir]) + + # Assert that the release metadata was read correctly. + with connect(config) as conn: + cursor = conn.execute( + "SELECT id, source_path, title, release_type, release_year, new FROM releases", + ) + row = cursor.fetchone() + assert row["source_path"] == str(release_dir) + assert row["title"] == "A Cool Album" + assert row["release_type"] == "album" + assert row["release_year"] == 1990 + assert row["new"] + + +def test_update_cache_disk_update_to_cached_release(config: Config) -> None: + """Test that a cached release is updated after a track updates.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + update_cache_for_releases(config, [release_dir]) + # I'm too lazy to mutagen update the files, so instead we're going to update the database. And + # then touch a file to signify that "we modified it." + with connect(config) as conn: + conn.execute("UPDATE releases SET title = 'An Uncool Album'") + (release_dir / "01.m4a").touch() + update_cache_for_releases(config, [release_dir]) + + # Assert that the release metadata was re-read and updated correctly. + with connect(config) as conn: + cursor = conn.execute( + "SELECT id, source_path, title, release_type, release_year, new FROM releases", + ) + row = cursor.fetchone() + assert row["source_path"] == str(release_dir) + assert row["title"] == "A Cool Album" + assert row["release_type"] == "album" + assert row["release_year"] == 1990 + assert row["new"] + + +def test_update_cache_disk_update_to_datafile(config: Config) -> None: + """Test that a cached release is updated after a datafile updates.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + update_cache_for_releases(config, [release_dir]) + with connect(config) as conn: + conn.execute("UPDATE releases SET datafile_mtime = '0' AND new = false") + update_cache_for_releases(config, [release_dir]) + + # Assert that the release metadata was re-read and updated correctly. + with connect(config) as conn: + cursor = conn.execute("SELECT new FROM releases") + row = cursor.fetchone() + assert row["new"] + + +def test_update_cache_disk_upgrade_old_datafile(config: Config) -> None: + """Test that a legacy invalid datafile is upgraded on index.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + datafile = release_dir / ".rose.lalala.toml" + datafile.touch() + update_cache_for_releases(config, [release_dir]) + + # Assert that the release metadata was re-read and updated correctly. + with connect(config) as conn: + cursor = conn.execute("SELECT id, new FROM releases") + row = cursor.fetchone() + assert row["id"] == "lalala" + assert row["new"] + with datafile.open("r") as fp: + assert "new = true" in fp.read() + + +def test_update_cache_disk_directory_renamed(config: Config) -> None: + """Test that a cached release is updated after a directory rename.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + update_cache_for_releases(config, [release_dir]) + moved_release_dir = config.music_source_dir / "moved lol" + release_dir.rename(moved_release_dir) + update_cache_for_releases(config, [moved_release_dir]) + + # Assert that the release metadata was re-read and updated correctly. + with connect(config) as conn: + cursor = conn.execute( + "SELECT id, source_path, title, release_type, release_year, new FROM releases", + ) + row = cursor.fetchone() + assert row["source_path"] == str(moved_release_dir) + assert row["title"] == "A Cool Album" + assert row["release_type"] == "album" + assert row["release_year"] == 1990 + assert row["new"] + + +def test_update_cache_delete_nonexistent_releases(config: Config) -> None: + """Test that deleted releases that are no longer on disk are cleared from cache.""" + with connect(config) as conn: + conn.execute( + """ + INSERT INTO releases (id, source_path, virtual_dirname, datafile_mtime, title, release_type, multidisc, formatted_artists) + VALUES ('aaaaaa', '/nonexistent', '999', 'nonexistent', 'aa', 'unknown', false, 'aa;aa') + """ # noqa: E501 + ) + update_cache_delete_nonexistent_releases(config) + with connect(config) as conn: + cursor = conn.execute("SELECT COUNT(*) FROM releases") + assert cursor.fetchone()[0] == 0 + + def test_update_cache_for_all_releases(config: Config) -> None: + """Test that the update all function works.""" shutil.copytree(TEST_RELEASE_1, config.music_source_dir / TEST_RELEASE_1.name) shutil.copytree(TEST_RELEASE_2, config.music_source_dir / TEST_RELEASE_2.name) @@ -180,9 +295,9 @@ def test_update_cache_for_all_releases(config: Config) -> None: with connect(config) as conn: conn.execute( """ - INSERT INTO releases (id, source_path, virtual_dirname, title, release_type) - VALUES ('aaaaaa', '/nonexistent', 'nonexistent', 'aa', 'unknown') - """ + INSERT INTO releases (id, source_path, virtual_dirname, datafile_mtime, title, release_type, multidisc, formatted_artists) + VALUES ('aaaaaa', '/nonexistent', '999', 'nonexistent', 'aa', 'unknown', false, 'aa;aa') + """ # noqa: E501 ) update_cache_for_all_releases(config) @@ -194,18 +309,44 @@ def test_update_cache_for_all_releases(config: Config) -> None: assert cursor.fetchone()[0] == 4 +def test_update_cache_skips_empty_directory(config: Config) -> None: + """Test that an directory with no audio files is skipped.""" + rd = config.music_source_dir / "lalala" + rd.mkdir() + (rd / "ignoreme.file").touch() + update_cache_for_releases(config, [rd]) + with connect(config) as conn: + cursor = conn.execute("SELECT COUNT(*) FROM releases") + assert cursor.fetchone()[0] == 0 + + +def test_update_cache_uncaches_empty_directory(config: Config) -> None: + """Test that a previously-cached directory with no audio files now is cleared from cache.""" + release_dir = config.music_source_dir / TEST_RELEASE_1.name + shutil.copytree(TEST_RELEASE_1, release_dir) + update_cache_for_releases(config, [release_dir]) + shutil.rmtree(release_dir) + release_dir.mkdir() + update_cache_for_releases(config, [release_dir]) + with connect(config) as conn: + cursor = conn.execute("SELECT COUNT(*) FROM releases") + assert cursor.fetchone()[0] == 0 + + @pytest.mark.usefixtures("seeded_cache") def test_list_releases(config: Config) -> None: albums = list(list_releases(config)) assert albums == [ CachedRelease( + datafile_mtime=albums[0].datafile_mtime, # IGNORE THIS FIELD. id="r1", source_path=Path(config.music_source_dir / "r1"), cover_image_path=None, virtual_dirname="r1", title="Release 1", - release_type="album", - release_year=2023, + type="album", + year=2023, + multidisc=False, new=True, genres=["Deep House", "Techno"], labels=["Silk Music"], @@ -213,15 +354,18 @@ def test_list_releases(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), CachedRelease( + datafile_mtime=albums[1].datafile_mtime, # IGNORE THIS FIELD. id="r2", source_path=Path(config.music_source_dir / "r2"), cover_image_path=Path(config.music_source_dir / "r2" / "cover.jpg"), virtual_dirname="r2", title="Release 2", - release_type="album", - release_year=2021, + type="album", + year=2021, + multidisc=False, new=False, genres=["Classical"], labels=["Native State"], @@ -229,18 +373,22 @@ def test_list_releases(config: Config) -> None: CachedArtist(name="Violin Woman", role="main"), CachedArtist(name="Conductor Woman", role="guest"), ], + formatted_artists="Violin Woman feat. Conductor Woman", ), ] - assert list(list_releases(config, sanitized_artist_filter="Techno Man")) == [ + albums = list(list_releases(config, sanitized_artist_filter="Techno Man")) + assert albums == [ CachedRelease( + datafile_mtime=albums[0].datafile_mtime, # IGNORE THIS FIELD. id="r1", source_path=Path(config.music_source_dir / "r1"), cover_image_path=None, virtual_dirname="r1", title="Release 1", - release_type="album", - release_year=2023, + type="album", + year=2023, + multidisc=False, new=True, genres=["Deep House", "Techno"], labels=["Silk Music"], @@ -248,18 +396,22 @@ def test_list_releases(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), ] - assert list(list_releases(config, sanitized_genre_filter="Techno")) == [ + albums = list(list_releases(config, sanitized_genre_filter="Techno")) + assert albums == [ CachedRelease( + datafile_mtime=albums[0].datafile_mtime, # IGNORE THIS FIELD. id="r1", source_path=Path(config.music_source_dir / "r1"), cover_image_path=None, virtual_dirname="r1", title="Release 1", - release_type="album", - release_year=2023, + type="album", + year=2023, + multidisc=False, new=True, genres=["Deep House", "Techno"], labels=["Silk Music"], @@ -267,18 +419,22 @@ def test_list_releases(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), ] - assert list(list_releases(config, sanitized_label_filter="Silk Music")) == [ + albums = list(list_releases(config, sanitized_label_filter="Silk Music")) + assert albums == [ CachedRelease( + datafile_mtime=albums[0].datafile_mtime, # IGNORE THIS FIELD. id="r1", source_path=Path(config.music_source_dir / "r1"), cover_image_path=None, virtual_dirname="r1", title="Release 1", - release_type="album", - release_year=2023, + type="album", + year=2023, + multidisc=False, new=True, genres=["Deep House", "Techno"], labels=["Silk Music"], @@ -286,6 +442,7 @@ def test_list_releases(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), ] @@ -295,6 +452,7 @@ def test_get_release_files(config: Config) -> None: rf = get_release_files(config, "r1") assert rf.tracks == [ CachedTrack( + source_mtime=rf.tracks[0].source_mtime, # IGNORE THIS FIELD. id="t1", source_path=Path(config.music_source_dir / "r1" / "01.m4a"), virtual_filename="01.m4a", @@ -307,8 +465,10 @@ def test_get_release_files(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), CachedTrack( + source_mtime=rf.tracks[1].source_mtime, # IGNORE THIS FIELD. id="t2", source_path=Path(config.music_source_dir / "r1" / "02.m4a"), virtual_filename="02.m4a", @@ -321,6 +481,7 @@ def test_get_release_files(config: Config) -> None: CachedArtist(name="Techno Man", role="main"), CachedArtist(name="Bass Man", role="main"), ], + formatted_artists="Techno Man;Bass Man", ), ] assert rf.cover is None diff --git a/testdata/cache/Test Release 1/ignorethis.file b/testdata/cache/Test Release 1/ignorethis.file new file mode 100644 index 0000000..e69de29 diff --git a/testdata/cache/Test Release 2/.rose.ilovecarly.toml b/testdata/cache/Test Release 2/.rose.ilovecarly.toml new file mode 100644 index 0000000..8d40824 --- /dev/null +++ b/testdata/cache/Test Release 2/.rose.ilovecarly.toml @@ -0,0 +1 @@ +new = true diff --git a/testdata/cache/Test Release 2/.rose.toml b/testdata/cache/Test Release 2/.rose.toml deleted file mode 100644 index 5ec810f..0000000 --- a/testdata/cache/Test Release 2/.rose.toml +++ /dev/null @@ -1,2 +0,0 @@ -uuid = "ilovecarly" -new = true