diff --git a/rose/cache.py b/rose/cache.py index eeb4220..2a03e38 100644 --- a/rose/cache.py +++ b/rose/cache.py @@ -1,8 +1,6 @@ -import binascii import hashlib import logging import os -import random import re import sqlite3 import time @@ -43,39 +41,6 @@ def connect(c: Config) -> Iterator[sqlite3.Connection]: conn.close() -@contextmanager -def transaction(conn: sqlite3.Connection) -> Iterator[sqlite3.Connection]: - """ - A simple context wrapper for a database transaction. If connection is null, - a new connection is created. - """ - tx_log_id = binascii.b2a_hex(random.randbytes(8)).decode() - start_time = time.time() - - # If we're already in a transaction, don't create a nested transaction. - if conn.in_transaction: - logger.debug(f"Transaction {tx_log_id}. Starting nested transaction, NoOp.") - yield conn - logger.debug( - f"Transaction {tx_log_id}. End of nested transaction. " - f"Duration: {time.time() - start_time}." - ) - return - - logger.debug(f"Transaction {tx_log_id}. Starting transaction from conn.") - with conn: - # We BEGIN IMMEDIATE to avoid deadlocks, which pisses the hell out of me because no one's - # documenting this properly and SQLite just dies without respecting the timeout and without - # a reasonable error message. Absurd. - # - https://sqlite.org/forum/forumpost/a3db6dbff1cd1d5d - conn.execute("BEGIN IMMEDIATE") - yield conn - logger.debug( - f"Transaction {tx_log_id}. End of transaction from conn. " - f"Duration: {time.time() - start_time}." - ) - - def migrate_database(c: Config) -> None: """ "Migrate" the database. If the schema in the database does not match that on disk, then nuke the @@ -376,6 +341,7 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: # Now iterate over all releases in the source directory. Leverage mtime from stat to determine # whether to even check the file tags or not. Only perform database updates if necessary. + loop_start = time.time() for source_path, preexisting_release_id, files in dir_tree: logger.debug(f"Updating release {source_path.name}") # Check to see if we should even process the directory. If the directory does not have any @@ -494,98 +460,94 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: track_ids_to_insert: set[str] = set() # This value is set to true if we read an AudioFile and used it to confirm the release tags. pulled_release_tags = False - with connect(c) as conn, transaction(conn) as conn: - for f in files: - if not any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): - continue - track_path = Path(f.path).resolve() - cached_track = cached_tracks.get(str(track_path), None) - track_mtime = str(os.stat(track_path).st_mtime) - # Skip re-read if we can reuse a cached entry. - if cached_track and track_mtime == cached_track.source_mtime: - logger.debug(f"Track cache hit (mtime) for {f}, reusing cached data") - tracks.append(cached_track) - unknown_cached_tracks.remove(str(track_path)) - continue - - # Otherwise, read tags from disk and construct a new cached_track. - logger.debug(f"Track cache miss for {f}, reading tags from disk") - tags = AudioFile.from_file(track_path) - - # Now that we're here, pull the release tags. We also need them to compute the - # formatted artist string. - if not pulled_release_tags: - release_title = tags.album or "Unknown Release" - if release_title != release.title: - logger.debug(f"Release title change detected for {source_path}, updating") - release.title = release_title - release_dirty = True - - release_type = ( - tags.release_type.lower() - if tags.release_type - and tags.release_type.lower() in SUPPORTED_RELEASE_TYPES - else "unknown" - ) - if release_type != release.type: - logger.debug(f"Release type change detected for {source_path}, updating") - release.type = release_type - release_dirty = True - - if tags.year != release.year: - logger.debug(f"Release year change detected for {source_path}, updating") - release.year = tags.year - release_dirty = True - - if set(tags.genre) != set(release.genres): - logger.debug(f"Release genre change detected for {source_path}, updating") - release.genres = tags.genre - release_dirty = True - - if set(tags.label) != set(release.labels): - logger.debug(f"Release label change detected for {source_path}, updating") - release.labels = tags.label - release_dirty = True - - release_artists = [] - for role, names in asdict(tags.album_artists).items(): - for name in names: - release_artists.append(CachedArtist(name=name, role=role)) - if release_artists != release.artists: - logger.debug(f"Release artists change detected for {source_path}, updating") - release.artists = release_artists - release_dirty = True - - release_formatted_artists = format_artist_string( - tags.album_artists, release.genres + for f in files: + if not any(f.name.endswith(ext) for ext in SUPPORTED_EXTENSIONS): + continue + track_path = Path(f.path).resolve() + cached_track = cached_tracks.get(str(track_path), None) + track_mtime = str(os.stat(track_path).st_mtime) + # Skip re-read if we can reuse a cached entry. + if cached_track and track_mtime == cached_track.source_mtime: + logger.debug(f"Track cache hit (mtime) for {f}, reusing cached data") + tracks.append(cached_track) + unknown_cached_tracks.remove(str(track_path)) + continue + + # Otherwise, read tags from disk and construct a new cached_track. + logger.debug(f"Track cache miss for {f}, reading tags from disk") + tags = AudioFile.from_file(track_path) + + # Now that we're here, pull the release tags. We also need them to compute the + # formatted artist string. + if not pulled_release_tags: + release_title = tags.album or "Unknown Release" + if release_title != release.title: + logger.debug(f"Release title change detected for {source_path}, updating") + release.title = release_title + release_dirty = True + + release_type = ( + tags.release_type.lower() + if tags.release_type and tags.release_type.lower() in SUPPORTED_RELEASE_TYPES + else "unknown" + ) + if release_type != release.type: + logger.debug(f"Release type change detected for {source_path}, updating") + release.type = release_type + release_dirty = True + + if tags.year != release.year: + logger.debug(f"Release year change detected for {source_path}, updating") + release.year = tags.year + release_dirty = True + + if set(tags.genre) != set(release.genres): + logger.debug(f"Release genre change detected for {source_path}, updating") + release.genres = tags.genre + release_dirty = True + + if set(tags.label) != set(release.labels): + logger.debug(f"Release label change detected for {source_path}, updating") + release.labels = tags.label + release_dirty = True + + release_artists = [] + for role, names in asdict(tags.album_artists).items(): + for name in names: + release_artists.append(CachedArtist(name=name, role=role)) + if release_artists != release.artists: + logger.debug(f"Release artists change detected for {source_path}, updating") + release.artists = release_artists + release_dirty = True + + release_formatted_artists = format_artist_string(tags.album_artists, release.genres) + if release_formatted_artists != release.formatted_artists: + logger.debug( + f"Release formatted artists change detected for {source_path}, updating" ) - if release_formatted_artists != release.formatted_artists: - logger.debug( - f"Release formatted artists change detected for {source_path}, updating" - ) - release.formatted_artists = release_formatted_artists - release_dirty = True - - # Calculate the release's virtual dirname. - release_virtual_dirname = release.formatted_artists + "-" - if release.year: - release_virtual_dirname += str(release.year) + ". " - release_virtual_dirname += release.title - if release.type not in ["album", "unknown"]: - release_virtual_dirname += " - " + release.type.title() - if release.genres: - release_virtual_dirname += " [" + ";".join(release.genres) + "]" - if release.labels: - release_virtual_dirname += " {" + ";".join(release.labels) + "}" - if release.new: - release_virtual_dirname += " +NEW!+" - release_virtual_dirname = sanitize_filename(release_virtual_dirname) - # And in case of a name collision, add an extra number at the end. Iterate to - # find the first unused number. - original_virtual_dirname = release_virtual_dirname - collision_no = 1 + release.formatted_artists = release_formatted_artists + release_dirty = True + + # Calculate the release's virtual dirname. + release_virtual_dirname = release.formatted_artists + "-" + if release.year: + release_virtual_dirname += str(release.year) + ". " + release_virtual_dirname += release.title + if release.type not in ["album", "unknown"]: + release_virtual_dirname += " - " + release.type.title() + if release.genres: + release_virtual_dirname += " [" + ";".join(release.genres) + "]" + if release.labels: + release_virtual_dirname += " {" + ";".join(release.labels) + "}" + if release.new: + release_virtual_dirname += " +NEW!+" + release_virtual_dirname = sanitize_filename(release_virtual_dirname) + # And in case of a name collision, add an extra number at the end. Iterate to + # find the first unused number. + original_virtual_dirname = release_virtual_dirname + collision_no = 2 + with connect(c) as conn: while True: - collision_no += 1 cursor = conn.execute( "SELECT EXISTS(SELECT * FROM releases WHERE virtual_dirname = ? AND id <> ?)", # noqa: E501 (release_virtual_dirname, release.id), @@ -593,84 +555,89 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: if not cursor.fetchone()[0]: break release_virtual_dirname = f"{original_virtual_dirname} [{collision_no}]" + collision_no += 1 - if release_virtual_dirname != release.virtual_dirname: - logger.debug( - f"Release virtual dirname change detected for {source_path}, updating" - ) - release.virtual_dirname = release_virtual_dirname - release_dirty = True - - # And now create the cached track. - track = CachedTrack( - id=str(uuid6.uuid7()), - source_path=track_path, - source_mtime=track_mtime, - virtual_filename="", - title=tags.title or "Unknown Title", - release_id=release.id, - track_number=tags.track_number or "1", - disc_number=tags.disc_number or "1", - duration_seconds=tags.duration_sec, - artists=[], - formatted_artists=format_artist_string(tags.artists, release.genres), - ) - tracks.append(track) - for role, names in asdict(tags.artists).items(): - for name in names: - track.artists.append(CachedArtist(name=name, role=role)) - track_ids_to_insert.add(track.id) - - # Now calculate whether this release is multidisc, and then assign virtual_filenames for - # each track that lacks one. - multidisc = len({t.disc_number for t in tracks}) > 1 - if release.multidisc != multidisc: - logger.debug(f"Release multidisc change detected for {source_path}, updating") - release_dirty = True - release.multidisc = multidisc - # Use this set to avoid name collisions. - seen_track_names: set[str] = set() - for i, t in enumerate(tracks): - virtual_filename = "" - if multidisc and t.disc_number: - virtual_filename += f"{t.disc_number:0>2}-" - if t.track_number: - virtual_filename += f"{t.track_number:0>2}. " - virtual_filename += t.title or "Unknown Title" - if release.type in ["compilation", "soundtrack", "remix", "djmix", "mixtape"]: - virtual_filename += f" (by {t.formatted_artists})" - virtual_filename += t.source_path.suffix - virtual_filename = sanitize_filename(virtual_filename) - # And in case of a name collision, add an extra number at the end. Iterate to find - # the first unused number. - original_virtual_filename = virtual_filename - collision_no = 1 - while True: - collision_no += 1 - if virtual_filename not in seen_track_names: - break - virtual_filename = f"{original_virtual_filename} [{collision_no}]" - seen_track_names.add(virtual_filename) - if virtual_filename != t.virtual_filename: + if release_virtual_dirname != release.virtual_dirname: logger.debug( - f"Track virtual filename change detected for {t.source_path}, updating" + f"Release virtual dirname change detected for {source_path}, updating" ) - tracks[i].virtual_filename = virtual_filename - track_ids_to_insert.add(t.id) - - # Database executions. - logger.debug(f"Deleting {len(unknown_cached_tracks)} unknown tracks from cache") - conn.execute( - f""" - DELETE FROM tracks - WHERE release_id = ? - AND source_path IN ({','.join(['?']*len(unknown_cached_tracks))}) - """, - [release.id, *unknown_cached_tracks], + release.virtual_dirname = release_virtual_dirname + release_dirty = True + + # And now create the cached track. + track = CachedTrack( + id=str(uuid6.uuid7()), + source_path=track_path, + source_mtime=track_mtime, + virtual_filename="", + title=tags.title or "Unknown Title", + release_id=release.id, + track_number=tags.track_number or "1", + disc_number=tags.disc_number or "1", + duration_seconds=tags.duration_sec, + artists=[], + formatted_artists=format_artist_string(tags.artists, release.genres), ) + tracks.append(track) + for role, names in asdict(tags.artists).items(): + for name in names: + track.artists.append(CachedArtist(name=name, role=role)) + track_ids_to_insert.add(track.id) + + # Now calculate whether this release is multidisc, and then assign virtual_filenames for + # each track that lacks one. + multidisc = len({t.disc_number for t in tracks}) > 1 + if release.multidisc != multidisc: + logger.debug(f"Release multidisc change detected for {source_path}, updating") + release_dirty = True + release.multidisc = multidisc + # Use this set to avoid name collisions. + seen_track_names: set[str] = set() + for i, t in enumerate(tracks): + virtual_filename = "" + if multidisc and t.disc_number: + virtual_filename += f"{t.disc_number:0>2}-" + if t.track_number: + virtual_filename += f"{t.track_number:0>2}. " + virtual_filename += t.title or "Unknown Title" + if release.type in ["compilation", "soundtrack", "remix", "djmix", "mixtape"]: + virtual_filename += f" (by {t.formatted_artists})" + virtual_filename += t.source_path.suffix + virtual_filename = sanitize_filename(virtual_filename) + # And in case of a name collision, add an extra number at the end. Iterate to find + # the first unused number. + original_virtual_filename = virtual_filename + collision_no = 2 + while True: + if virtual_filename not in seen_track_names: + break + virtual_filename = f"{original_virtual_filename} [{collision_no}]" + collision_no += 1 + seen_track_names.add(virtual_filename) + if virtual_filename != t.virtual_filename: + logger.debug( + f"Track virtual filename change detected for {t.source_path}, updating" + ) + tracks[i].virtual_filename = virtual_filename + track_ids_to_insert.add(t.id) + + # Database executions. Only start a database connection if we actually have operations to + # run. Otherwise, continue early. + if not unknown_cached_tracks and not release_dirty and not track_ids_to_insert: + continue - if release_dirty or track_ids_to_insert: - logger.info(f"Applying cache updates for release {source_path.name}") + logger.info(f"Applying cache updates for release {source_path.name}") + with connect(c) as conn: + if unknown_cached_tracks: + logger.debug(f"Deleting {len(unknown_cached_tracks)} unknown tracks from cache") + conn.execute( + f""" + DELETE FROM tracks + WHERE release_id = ? + AND source_path IN ({','.join(['?']*len(unknown_cached_tracks))}) + """, + [release.id, *unknown_cached_tracks], + ) if release_dirty: logger.debug(f"Upserting dirty release in database: {source_path}") @@ -812,6 +779,8 @@ def update_cache_for_releases(c: Config, release_dirs: list[Path]) -> None: (track.id, art.name, sanitize_filename(art.name), art.role, art.role), ) + logger.debug(f"Update loop time {time.time() - loop_start=}") + def list_releases( c: Config,