Skip to content

Commit

Permalink
artist: parse singleton artist a bit more reliably
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Dec 9, 2024
1 parent e1057e2 commit 27a7b66
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 36 deletions.
17 changes: 8 additions & 9 deletions beetsplug/bandcamp/catalognum.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,15 @@ class Catalognum:
anywhere = cached_patternprop(rf"({MATCH}(?:\ [-/]\ {MATCH})?)", re.VERBOSE)
in_album_pat = cached_patternprop(
r"""
(^\d*[A-Z]+\d+)(?::|\s*[|-])\s# '^ABC123: ' or '^ABC123 - ' or '^ABC123 | '
(^\d*[A-Z$]+\d+) # ^ABC123, ^A$C123
(?:
:\s+ # ': '
| \s+[|-]\s+ # ' | ', ' - '
| -\s+ # '- '
| \s+- # ' -'
)
# or
| \s[|-]\s([A-Z]+\d+$) # ' - ABC123$' or ' | ABC123$'
| \s[|-]\s([A-Z]{2,}\d+$) # ' - ABC123$' or ' | ABC123$'
# or
| [([] # just about anything within parens or brackets
(?!Part|VA\b|LP\b) # does not start with 'Part', 'VA', 'LP'
Expand All @@ -134,13 +140,6 @@ class Catalognum:
label: str
artists_and_titles: Iterable[str]

@classmethod
def from_album(cls, album: str) -> str | None:
if m := cls.in_album_pat.search(album):
return next(filter(None, m.groups()))

return None

@cached_property
def label_variations(self) -> set[str]:
"""Return variations of the label name.
Expand Down
58 changes: 36 additions & 22 deletions beetsplug/bandcamp/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Names:
TITLE_IN_QUOTES = cached_patternprop(r'^(.+[^ -])[ -]+"([^"]+)"$')
NUMBER_PREFIX = cached_patternprop(r"((?<=^)|(?<=- ))\d{1,2}\W+(?=\D)")

meta: JSONDict
meta: JSONDict = field(repr=False)
album_artist: str
album_in_titles: str | None = None
catalognum_in_titles: str | None = None
Expand All @@ -43,29 +43,43 @@ def label(self) -> str:
def original_album(self) -> str:
return str(self.meta["name"])

@cached_property
def singleton(self) -> bool:
return "track" not in self.meta

@cached_property
def json_tracks(self) -> list[JSONDict]:
try:
return [{**t, **t["item"]} for t in self.meta["track"]["itemListElement"]]
except KeyError as e:
if "track" in str(e):
# a single track release
return [{**self.meta}]
if self.singleton:
return [{**self.meta, "byArtist": {"name": self.album_artist}}]

# no tracks (sold out release or defunct label, potentially)
return []
if tracks := self.meta["track"].get("itemListElement"):
return [{**t, **t["item"]} for t in tracks]

# no tracks (sold out release or defunct label, potentially)
return []

@cached_property
def original_titles(self) -> list[str]:
return [i["name"] for i in self.json_tracks]

@cached_property
def _catalognum_in_album_match(self) -> re.Match[str] | None:
return Catalognum.in_album_pat.search(self.original_album)

@cached_property
def catalognum_in_album(self) -> str | None:
if cat := Catalognum.from_album(self.original_album):
return cat
if m := self._catalognum_in_album_match:
return next(filter(None, m.groups()))

return None

@cached_property
def album(self) -> str:
if m := self._catalognum_in_album_match:
return self.original_album.replace(m[0], "")

return self.original_album

@cached_property
def catalognum(self) -> str | None:
for cat in (self.catalognum_in_album, self.catalognum_in_titles):
Expand Down Expand Up @@ -101,9 +115,6 @@ def remove_number_prefix(cls, names: list[str]) -> list[str]:
If there is more than one track and at least half of the track names have
a number prefix remove it from the names.
"""
if len(names) == 1:
return names

prefix_matches = [cls.NUMBER_PREFIX.search(n) for n in names]
if len([p for p in prefix_matches if p]) > len(names) / 2:
return [
Expand Down Expand Up @@ -217,12 +228,15 @@ def resolve(self) -> None:
if not self.original_titles:
return

self.catalognum_in_titles, titles = self.eject_common_catalognum(
self.remove_album_catalognum(self.split_quoted_titles(self.original_titles))
)
self.album_in_titles, titles = self.eject_album_name(
self.remove_label(
self.normalize_delimiter(self.remove_number_prefix(titles))
)
)
titles = self.split_quoted_titles(self.original_titles)
if self.singleton:
titles = [self.album]
else:
titles = self.remove_album_catalognum(titles)
self.catalognum_in_titles, titles = self.eject_common_catalognum(titles)
titles = self.remove_number_prefix(titles)

titles = self.normalize_delimiter(titles)
titles = self.remove_label(titles)
self.album_in_titles, titles = self.eject_album_name(titles)
self.titles = self.ensure_artist_first(titles)
8 changes: 8 additions & 0 deletions beetsplug/bandcamp/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,11 @@ def lyrics(self) -> str:
@cached_property
def name_split(self) -> list[str]:
name = self.name
if (a := self.json_artist) and name.lower().startswith(
artist_start := f"{a.lower()} - "
):
return [name[len(artist_start) :]]

split = self.DELIM_NOT_INSIDE_PARENS.split(name.strip())
if self.json_artist and " - " not in name:
return [self.json_artist.strip(), *split]
Expand Down Expand Up @@ -268,6 +273,9 @@ def artist(self) -> str:
if not self.title_without_remix:
return ""

if self.json_artist and len(self.name_split) == 1:
return self.json_artist

artist = " - ".join(self.name_split[:-1])
initial_artist = artist
artist = Remix.PATTERN.sub("", artist.strip(", -"))
Expand Down
16 changes: 14 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,20 @@ def track_name() -> str:


@pytest.fixture
def json_track(track_name) -> JSONDict:
return {"item": {"@id": "track_url", "name": track_name}, "position": 1}
def track_artist() -> str | None:
return None


@pytest.fixture
def json_track(track_name, track_artist) -> JSONDict:
return {
"item": {
"@id": "track_url",
"name": track_name,
**({"byArtist": {"name": track_artist}} if track_artist else {}),
},
"position": 1,
}


@pytest.fixture
Expand Down
1 change: 1 addition & 0 deletions tests/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def test_remove_label(json_meta, expected_titles):
("Album [CAT001]", "", "CAT001"),
("CAT001 - Album", "", "CAT001"),
("CAT001- Album", "", "CAT001"),
("CAT001 -Album", "", "CAT001"),
("Album - CAT001", "", "CAT001"),
("Album | CAT001", "", "CAT001"),
("Album [CAT001]", "CAT001", None),
Expand Down
22 changes: 19 additions & 3 deletions tests/test_track.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@pytest.mark.parametrize(
("name", "expected"),
("track_name", "expected"),
[
("Title", ("", "", "", "Title", "Title")),
("Artist - Title", ("", "Artist", "", "Title", "Title")),
Expand Down Expand Up @@ -102,17 +102,33 @@
("Live - 2020", ("", "", "", "Live - 2020", "Live - 2020")),
],
)
def test_parse_track_name(name, expected, json_track):
def test_parse_track_name(expected, json_track):
fields = "track_alt", "artist", "ft", "title", "title_without_remix"
expected = dict(zip(fields, expected))
if not expected["track_alt"]:
expected["track_alt"] = None

result_track = Track.make({**json_track["item"], "name": name})
result_track = Track.make(json_track["item"])
result = dict(zip(fields, attrgetter(*fields)(result_track)))
assert result == expected


@pytest.mark.parametrize(
"track_name, track_artist, expected_artist, expected_title",
[
("Artist - Title", None, "Artist", "Title"),
("Artist - Title", "Artist", "Artist", "Title"),
("Artist - Title - Something", None, "Artist - Title", "Something"),
("Artist - Title - Something", "Artist", "Artist", "Title - Something"),
],
)
def test_track_artist(json_track, expected_artist, expected_title):
result_track = Track.make(json_track["item"])

assert result_track.artist == expected_artist
assert result_track.title == expected_title


@pytest.mark.parametrize(
("name", "expected_title", "expected_catalognum"),
[
Expand Down

0 comments on commit 27a7b66

Please sign in to comment.