Skip to content

Commit

Permalink
Cleanup Matching logic + add tests (#1472)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelveldt authored Jul 8, 2024
1 parent 4e07a5d commit 95a3060
Show file tree
Hide file tree
Showing 22 changed files with 455 additions and 110 deletions.
47 changes: 30 additions & 17 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Module",
"type": "debugpy",
"request": "launch",
"module": "music_assistant",
"justMyCode": false,
"args":[
"--log-level", "debug"
],
"env": {"PYTHONDEVMODE": "1"}
}
]
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Music Assistant: Server",
"type": "debugpy",
"request": "launch",
"module": "music_assistant",
"justMyCode": false,
"args": ["--log-level", "debug"],
"env": { "PYTHONDEVMODE": "1" }
},
{
"name": "Music Assistant: Tests",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"justMyCode": false,
"args": ["tests"]
},
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}
6 changes: 4 additions & 2 deletions music_assistant/common/models/media_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ class MediaItemMetadata(DataClassDictMixin):
performers: set[str] | None = None
preview: str | None = None
popularity: int | None = None
# cache_checksum: optional value to (in)validate cache / detect changes (used for playlists)
cache_checksum: str | None = None
# last_refresh: timestamp the (full) metadata was last collected
last_refresh: int | None = None

Expand Down Expand Up @@ -478,6 +476,10 @@ class Playlist(MediaItem):
owner: str = ""
is_editable: bool = False

# cache_checksum: optional value to (in)validate cache
# detect changes to the playlist tracks listing
cache_checksum: str | None = None


@dataclass(kw_only=True)
class Radio(MediaItem):
Expand Down
2 changes: 1 addition & 1 deletion music_assistant/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

API_SCHEMA_VERSION: Final[int] = 24
MIN_SCHEMA_VERSION: Final[int] = 24
DB_SCHEMA_VERSION: Final[int] = 2


MASS_LOGGER_NAME: Final[str] = "music_assistant"

Expand Down
8 changes: 2 additions & 6 deletions music_assistant/server/controllers/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,7 @@
from music_assistant.common.helpers.json import json_dumps, json_loads
from music_assistant.common.models.config_entries import ConfigEntry, ConfigValueType
from music_assistant.common.models.enums import ConfigEntryType
from music_assistant.constants import (
DB_SCHEMA_VERSION,
DB_TABLE_CACHE,
DB_TABLE_SETTINGS,
MASS_LOGGER_NAME,
)
from music_assistant.constants import DB_TABLE_CACHE, DB_TABLE_SETTINGS, MASS_LOGGER_NAME
from music_assistant.server.helpers.database import DatabaseConnection
from music_assistant.server.models.core_controller import CoreController

Expand All @@ -28,6 +23,7 @@

LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.cache")
CONF_CLEAR_CACHE = "clear_cache"
DB_SCHEMA_VERSION = 1


class CacheController(CoreController):
Expand Down
10 changes: 3 additions & 7 deletions music_assistant/server/controllers/media/albums.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from music_assistant.server.helpers.compare import (
compare_album,
compare_artists,
compare_media_item,
loose_compare_strings,
)

Expand Down Expand Up @@ -119,11 +120,6 @@ async def get(
prov_track = await self.mass.music.tracks.get_provider_item(
track_prov_map.item_id, track_prov_map.provider_instance, force_refresh=True
)
if (
prov_track.metadata.cache_checksum
== prov_album_track.metadata.cache_checksum
):
continue
await self.mass.music.tracks._update_library_item(
prov_album_track.item_id, prov_track, True
)
Expand Down Expand Up @@ -442,9 +438,9 @@ async def find_prov_match(provider: MusicProvider):
for search_result_item in search_result:
if not search_result_item.available:
continue
if not compare_album(db_album, search_result_item):
if not compare_media_item(db_album, search_result_item):
continue
# we must fetch the full album version, search results are simplified objects
# we must fetch the full album version, search results can be simplified objects
prov_album = await self.get_provider_item(
search_result_item.item_id,
search_result_item.provider,
Expand Down
2 changes: 1 addition & 1 deletion music_assistant/server/controllers/media/playlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def tracks(
lazy=not force_refresh,
)
prov_map = next(x for x in playlist.provider_mappings)
cache_checksum = playlist.metadata.cache_checksum
cache_checksum = playlist.cache_checksum
tracks = await self._get_provider_playlist_tracks(
prov_map.item_id,
prov_map.provider_instance,
Expand Down
5 changes: 3 additions & 2 deletions music_assistant/server/controllers/media/tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from music_assistant.server.helpers.compare import (
compare_artists,
compare_media_item,
compare_track,
loose_compare_strings,
)
Expand Down Expand Up @@ -268,9 +269,9 @@ async def _match(self, db_track: Track) -> None:
if not search_result_item.available:
continue
# do a basic compare first
if not compare_track(db_track, search_result_item, strict=False):
if not compare_media_item(db_track, search_result_item, strict=False):
continue
# we must fetch the full version, search results are simplified objects
# we must fetch the full version, search results can be simplified objects
prov_track = await self.get_provider_item(
search_result_item.item_id,
search_result_item.provider,
Expand Down
4 changes: 2 additions & 2 deletions music_assistant/server/controllers/music.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from contextlib import suppress
from itertools import zip_longest
from math import inf
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Final

from music_assistant.common.helpers.datetime import utc_timestamp
from music_assistant.common.helpers.global_cache import get_global_cache_value
Expand All @@ -33,7 +33,6 @@
from music_assistant.common.models.provider import SyncTask
from music_assistant.common.models.streamdetails import LoudnessMeasurement
from music_assistant.constants import (
DB_SCHEMA_VERSION,
DB_TABLE_ALBUM_ARTISTS,
DB_TABLE_ALBUM_TRACKS,
DB_TABLE_ALBUMS,
Expand Down Expand Up @@ -66,6 +65,7 @@
CONF_SYNC_INTERVAL = "sync_interval"
CONF_DELETED_PROVIDERS = "deleted_providers"
CONF_ADD_LIBRARY_ON_PLAY = "add_library_on_play"
DB_SCHEMA_VERSION: Final[int] = 2


class MusicController(CoreController):
Expand Down
100 changes: 69 additions & 31 deletions music_assistant/server/helpers/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
)

IGNORE_VERSIONS = (
"remaster",
"explicit",
"explicit", # explicit is matched separately
"music from and inspired by the motion picture",
"original soundtrack",
"hi-res",
"hi-res", # quality is handled separately
)


Expand Down Expand Up @@ -107,18 +106,21 @@ def compare_album(
# for strict matching we REQUIRE both items to be a real album object
assert isinstance(base_item, Album)
assert isinstance(compare_item, Album)
# compare year
if base_item.year and compare_item.year and base_item.year != compare_item.year:
return False
# compare explicitness
if compare_explicit(base_item.metadata, compare_item.metadata) is False:
return False
# compare album artist
return compare_artists(base_item.artists, compare_item.artists, True)
# compare album artist(s)
return compare_artists(base_item.artists, compare_item.artists, not strict)


def compare_track(
base_item: Track | ItemMapping,
compare_item: Track | ItemMapping,
base_item: Track,
compare_item: Track,
strict: bool = True,
track_albums: list[Album | ItemMapping] | None = None,
track_albums: list[Album] | None = None,
) -> bool:
"""Compare two track items and return True if they match."""
if base_item is None or compare_item is None:
Expand All @@ -142,7 +144,22 @@ def compare_track(
)
if external_id_match is not None:
return external_id_match
# return early on exact albumtrack match = 100% match
if (
base_item.album
and compare_item.album
and compare_album(base_item.album, compare_item.album, False)
and base_item.disc_number
and compare_item.disc_number
and base_item.track_number
and compare_item.track_number
and base_item.disc_number == compare_item.disc_number
and base_item.track_number == compare_item.track_number
):
return True

## fallback to comparing on attributes

# compare name
if not compare_strings(base_item.name, compare_item.name, strict=True):
return False
Expand All @@ -159,26 +176,17 @@ def compare_track(
compare_item.metadata.explicit = compare_item.album.metadata.explicit
if strict and compare_explicit(base_item.metadata, compare_item.metadata) is False:
return False
if not strict and not (base_item.album or track_albums):
# in non-strict mode, the album does not have to match (but duration needs to)
return abs(base_item.duration - compare_item.duration) <= 2
# exact albumtrack match = 100% match
if (
base_item.album
and compare_item.album
and compare_album(base_item.album, compare_item.album, False)
and base_item.disc_number == compare_item.disc_number
and base_item.track_number == compare_item.track_number
):
return True

# fallback: exact album match and (near-exact) track duration match
if (
base_item.album is not None
and compare_item.album is not None
and (base_item.track_number == 0 or compare_item.track_number == 0)
and compare_album(base_item.album, compare_item.album, False)
and abs(base_item.duration - compare_item.duration) <= 3
):
return True

# fallback: additional compare albums provided for base track
if (
compare_item.album is not None
Expand All @@ -188,13 +196,28 @@ def compare_track(
for track_album in track_albums:
if compare_album(track_album, compare_item.album, False):
return True
# accept last resort: albumless track and (near) exact duration
# otherwise fail all other cases
return (

# fallback edge case: albumless track with same duration
if (
base_item.album is None
and compare_item.album is None
and abs(base_item.duration - compare_item.duration) <= 1
)
and base_item.disc_number == 0
and compare_item.disc_number == 0
and base_item.track_number == 0
and compare_item.track_number == 0
and base_item.duration == compare_item.duration
):
return True

if strict:
# in strict mode, we require an exact album match so return False here
return False

# Accept last resort (in non strict mode): (near) exact duration,
# otherwise fail all other cases.
# Note that as this stage, all other info already matches,
# such as title artist etc.
return abs(base_item.duration - compare_item.duration) <= 2


def compare_playlist(
Expand Down Expand Up @@ -265,14 +288,22 @@ def compare_artists(
any_match: bool = True,
) -> bool:
"""Compare two lists of artist and return True if both lists match (exactly)."""
if not base_items and not compare_items:
return True
if not base_items or not compare_items:
return False
# match if first artist matches in both lists
if compare_artist(base_items[0], compare_items[0]):
return True
# compare the artist lists
matches = 0
for base_item in base_items:
for compare_item in compare_items:
if compare_artist(base_item, compare_item):
if any_match:
return True
matches += 1
return len(base_items) == matches
return len(base_items) == len(compare_items) == matches


def compare_albums(
Expand Down Expand Up @@ -399,7 +430,7 @@ def compare_strings(str1: str, str2: str, strict: bool = True) -> bool:
if create_safe_string(str1) == create_safe_string(str2):
return True
# last resort: use difflib to compare strings
required_accuracy = 0.91 if len(str1) > 8 else 0.85
required_accuracy = 0.9 if (len(str1) + len(str2)) > 18 else 0.8
return SequenceMatcher(a=str1_lower, b=str2).ratio() > required_accuracy


Expand All @@ -415,11 +446,18 @@ def compare_version(base_version: str, compare_version: str) -> bool:
return False
if base_version and not compare_version:
return False
if " " not in base_version:
return compare_strings(base_version, compare_version)

if " " not in base_version and " " not in compare_version:
return compare_strings(base_version, compare_version, False)

# do this the hard way as sometimes the version string is in the wrong order
base_versions = base_version.lower().split(" ").sort()
compare_versions = compare_version.lower().split(" ").sort()
base_versions = sorted(base_version.lower().split(" "))
compare_versions = sorted(compare_version.lower().split(" "))
# filter out words we can ignore (such as 'version')
ignore_words = [*IGNORE_VERSIONS, "version", "edition", "variant", "versie", "versione"]
base_versions = [x for x in base_versions if x not in ignore_words]
compare_versions = [x for x in compare_versions if x not in ignore_words]

return base_versions == compare_versions


Expand Down
6 changes: 4 additions & 2 deletions music_assistant/server/models/music_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,10 @@ async def sync_library(self, media_types: tuple[MediaType, ...]) -> None:
library_item = await controller.add_item_to_library(
prov_item, metadata_lookup=False
)
elif library_item.metadata.cache_checksum != prov_item.metadata.cache_checksum:
# existing dbitem checksum changed
elif getattr(library_item, "cache_checksum", None) != getattr(
prov_item, "cache_checksum", None
):
# existing dbitem checksum changed (playlists only)
library_item = await controller.update_item_in_library(
library_item.item_id, prov_item
)
Expand Down
2 changes: 1 addition & 1 deletion music_assistant/server/providers/apple_music/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ def _parse_playlist(self, playlist_obj) -> Playlist:
playlist.metadata.description = description.get("standard")
playlist.is_editable = attributes.get("canEdit", False)
if checksum := attributes.get("lastModifiedDate"):
playlist.metadata.cache_checksum = checksum
playlist.cache_checksum = checksum
return playlist

async def _get_all_items(self, endpoint, key="data", **kwargs) -> list[dict]:
Expand Down
Loading

0 comments on commit 95a3060

Please sign in to comment.