Skip to content

Commit 5ea12d2

Browse files
committed
lastgenre: Separator func, defaults, word boundaries
- Move albumartist splitting to separator method - Default separators without whitespace, handle in method - Use word boundaries for alnum seps and no boundaries for symbols
1 parent 2f4b679 commit 5ea12d2

File tree

1 file changed

+51
-22
lines changed

1 file changed

+51
-22
lines changed

beetsplug/lastgenre/__init__.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,20 @@
4343
pylast.NetworkError,
4444
)
4545

46+
DEFAULT_ARTIST_SEPARATORS = [
47+
"feat.",
48+
"featuring",
49+
"&",
50+
"vs.",
51+
"x", # Match "x" only as whole word
52+
"/",
53+
"+",
54+
"and",
55+
"|",
56+
]
4657

47-
# Canonicalization tree processing.
58+
59+
# Canonicalization tree processing and other helpers.
4860

4961

5062
def flatten_tree(elem, path, branches):
@@ -77,6 +89,36 @@ def find_parents(candidate, branches):
7789
return [candidate]
7890

7991

92+
def split_on_separators(text, separators):
93+
"""Split text on configured separators; returns [text] if none."""
94+
# Normalize and drop empty/whitespace-only separators
95+
if isinstance(separators, str):
96+
seps = [separators]
97+
else:
98+
seps = list(separators or [])
99+
seps = [s for s in seps if isinstance(s, str) and s.strip()]
100+
if not seps:
101+
return [text]
102+
103+
# Build patterns: word boundaries for pure alphanumeric, no boundaries for others
104+
patterns = []
105+
for s in seps:
106+
escaped = re.escape(s)
107+
if s.replace(" ", "").isalnum(): # treat spaced separators like symbols
108+
# Alphanumeric needs word boundaries (like "x", "and")
109+
patterns.append(rf"\b{escaped}\b")
110+
else:
111+
# Symbols like "/", " / " need no boundaries
112+
patterns.append(escaped)
113+
114+
pattern = "|".join(patterns)
115+
116+
if not re.search(pattern, text, flags=re.IGNORECASE):
117+
return [text]
118+
parts = re.split(pattern, text, flags=re.IGNORECASE)
119+
return [p.strip() for p in parts if p.strip()]
120+
121+
80122
# Main plugin logic.
81123

82124
WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt")
@@ -102,6 +144,7 @@ def __init__(self):
102144
"prefer_specific": False,
103145
"title_case": True,
104146
"extended_debug": False,
147+
"artist_separators": DEFAULT_ARTIST_SEPARATORS,
105148
}
106149
)
107150
self.setup()
@@ -424,31 +467,17 @@ def _try_resolve_stage(stage_label: str, keep_genres, new_genres):
424467
'No album artist genre found for "{0.albumartist}"',
425468
obj,
426469
)
427-
separators = [
428-
re.escape(self.config["separator"].get()),
429-
" feat\\. ",
430-
" featuring ",
431-
" & ",
432-
" vs\\. ",
433-
" x ",
434-
" / ",
435-
" + ",
436-
" and ",
437-
" \\| ",
438-
]
439-
if any(
440-
re.sub(r"\\", "", sep) in obj.albumartist
441-
for sep in separators
442-
):
470+
# Try splitting on separators for multi-artist albums
471+
albumartists = split_on_separators(
472+
obj.albumartist,
473+
separators=self.config["artist_separators"].as_str_seq()
474+
)
475+
if len(albumartists) > 1:
443476
if self.config["extended_debug"]:
444477
self._log.debug(
445478
"Found separators in album artist - splitting..."
446479
)
447-
# Split on all separators using regex
448-
pattern = "|".join(separators)
449-
albumartists = re.split(pattern, obj.albumartist)
450480
for albumartist in albumartists:
451-
albumartist = albumartist.strip()
452481
if self.config["extended_debug"]:
453482
self._log.debug(
454483
'Fetching multi-artist album genre for "{0}"',
@@ -458,7 +487,7 @@ def _try_resolve_stage(stage_label: str, keep_genres, new_genres):
458487
albumartist
459488
)
460489
if new_genres:
461-
label = "album artist (split)"
490+
stage_label = "album artist (split)"
462491
else:
463492
# For "Various Artists", pick the most popular track genre.
464493
item_genres = []

0 commit comments

Comments
 (0)