From ca16d856826e49d681fa6d4891974c129543b6ab Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 27 Mar 2026 07:22:08 +0100 Subject: [PATCH 01/22] lastgenre: Test aliases and ship default config --- beetsplug/lastgenre/aliases.yaml | 113 ++++++++++++ test/plugins/test_lastgenre.py | 283 ++++++++++++++++++++++++++++++- 2 files changed, 395 insertions(+), 1 deletion(-) create mode 100644 beetsplug/lastgenre/aliases.yaml diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml new file mode 100644 index 0000000000..1ae20f47e9 --- /dev/null +++ b/beetsplug/lastgenre/aliases.yaml @@ -0,0 +1,113 @@ +# Default genre aliases for the lastgenre plugin. +# +# Keys are canonical names and support \g back-references to pattern groups. +# Patterns are case-insensitive full-matches. Order matters: first match wins. + +# --------------------------------------------------------------------------- +# Ampersands / Delimiters +# --------------------------------------------------------------------------- + +# drum and bass (d&b, dnb, drum n bass, ...) +drum and bass: + - d(rum)?[ &n/]*b(ass)? + +# rhythm and blues (r&b, rnb, ...) +rhythm and blues: + - r(hythm)?[ &n/]*b(lues)? + +# rock and roll (rock & roll, rock'n'roll, rock-n-roll, ...) +rock and roll: + - rock[ '‐&n/ \-]*roll + +# --------------------------------------------------------------------------- +# Consistent Delimiters (Hyphenation) +# --------------------------------------------------------------------------- + + +# Hyphenate j-pop, k-pop, c-pop, post-rock, etc. +# Matches: kpop, k pop -> k-pop; j rock -> j-rock; post rock -> post-rock +# +# Hyphenate neo-soul, euro-house, tech-house, etc. +# (Negative lookaheads exclude 'european' and 'techno'/'technic*'.) +\g<1>-\g<2>: + - (c|k|j)[ /-]*(folk|goth|hip hop|pop|rock|ska|trance) + - (euro(?!p[ae]+n?)|neo|post|tech(?!n[io]))[ /-]*(\w+) + +# lo-fi, glo-fi (lofi, lo fi -> lo-fi) +lo-fi: + - (g?lo)[ /-]*fi + +# p-funk, g-funk, etc. (p funk -> p-funk) +\g<1>-funk: + - (p|g)[ /-]*funk + +# synthpop, synthwave, etc. (synth pop -> synthpop) +synth\g<1>: + - synth[ /-]+(\w+) + +# avant-garde (avantgarde, avant gard, avant-gard) +avant-garde: + - avant[ /-]*(gard(e)?)? + - avant-gard + - avant + +# --------------------------------------------------------------------------- +# Nu- Genre Spelling (nu jazz, nu-disco, etc.) +# --------------------------------------------------------------------------- + +# Matches: nu-jazz -> nu jazz; nu disco -> nu disco +# Note: 'nu-disco' is hyphenated in the tree but 'nu jazz' isn't in genres.txt +nu \g<1>: + - nu[ /-]*(disco|jazz|metal|soul) + +# --------------------------------------------------------------------------- +# Terminology / Synonym / Translation fixes +# --------------------------------------------------------------------------- + +# electronic (electronic music, elektronika) +electronic: + - electronic[ /]music + - elektronika + +# downtempo (downbeat) +downtempo: + - down[ /-]*beat + +# shoegaze (shoegazer, shoegazing) +shoegaze: + - shoegaze?r? + - shoegazing + +# --------------------------------------------------------------------------- +# Form Fixes (Hip Hop, Trip Hop, etc.) +# --------------------------------------------------------------------------- + +# Normalized spacing: hip-hop, hiphop -> hip hop +\g<1> hop: + - (glitch|hip|jazz|trip)y?([ /-]*hip)?[ /-]*hop + +# --------------------------------------------------------------------------- +# Abbreviations & International Spellings +# --------------------------------------------------------------------------- + +# alternative rock (alt, alternative, alt rock, ...) +alternative rock: + - alt([ /-]*rock)? + - alternative([ /-]*rock)? + - alternatv[ /-]*rock + +# gothic rock (goth, goth rock) - doesn't catch gothic metal +gothic rock: + - goth(?!ic)([ /-]*rock)? + - gothic[ /-]*rock + +# progressive rock (prog, prog rock, progressive rock) +# Note: mapping standalone 'progressive' is avoided to prevent catching 'progressive metal', etc. +progressive rock: + - prog([ /-]*rock)? + - progressive[ /-]*rock + +# traditional folk (trad, traditional) +# Note: avoids matching 'trad jazz' or 'traditional country'. +traditional folk: + - trad(/|ition(/|al)?)?-? diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 87d8f44158..ec7c521b31 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -25,7 +25,7 @@ from beets.test import _common from beets.test.helper import IOMixin, PluginTestCase from beetsplug import lastgenre -from beetsplug.lastgenre.utils import is_ignored +from beetsplug.lastgenre.utils import is_ignored, normalize_genre class LastGenrePluginTest(IOMixin, PluginTestCase): @@ -931,3 +931,284 @@ def fake_fetch(_, kind, obj, *args): assert "multi-valued album artist" in label assert "Metal" in genres + + +# Aliases: normalize_genre() unit tests + + +@pytest.mark.parametrize( + "aliases_dict, genre, expected", + [ + # Static (no back-references) + ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "dnb", "drum and bass"), + ( + {"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, + "drum n bass", + "drum and bass", + ), + ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "d&b", "drum and bass"), + # Template with \g<1> back-reference + ({r"\g<1> hop": [r"(hip)[ /-]*hop"]}, "hip-hop", "hip hop"), + ({r"\g<1> hop": [r"(trip)[ /-]*hop"]}, "trip hop", "trip hop"), + ({r"\g<1>-\g<2>": [r"(post)[ /-]*(\w+)"]}, "post rock", "post-rock"), + ({r"\g<1>-\g<2>": [r"(post)[ /-]*(\w+)"]}, "post/rock", "post-rock"), + # Case-insensitive matching, result is lowercased template + ({"hip hop": ["hiphop"]}, "HipHop", "hip hop"), + # No match — genre returned as-is (lowercased) + ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "jazz", "jazz"), + # Empty alias list → no-op + ({}, "hip-hop", "hip-hop"), + ], +) +def test_normalize_genre( + aliases_dict: dict, + genre: str, + expected: str, +) -> None: + """Test normalize_genre() with static and template canonical names.""" + aliases = [ + (re.compile(pat, re.IGNORECASE), template.lower()) + for template, patterns in aliases_dict.items() + for pat in patterns + ] + assert normalize_genre(Mock(), aliases, genre) == expected + + +# Aliases: _load_aliases() config parsing tests + + +@pytest.mark.parametrize( + "aliases_config, input_genre, expected_genre", + [ + # Inline static alias + ({"hip hop": ["hip-hop", "hiphop"]}, "hip-hop", "hip hop"), + ({"hip hop": ["hip-hop", "hiphop"]}, "hiphop", "hip hop"), + # Inline template alias + ({r"\g<1> hop": [r"(trip)[ /-]*hop"]}, "trip-hop", "trip hop"), + # Pattern that does not match — genre unchanged + ({"drum and bass": [r"d(rum)?[ &n/]*b(ass)?"]}, "jazz", "jazz"), + # False → aliases disabled, genre not normalised + (False, "hip-hop", "hip-hop"), + ], +) +def test_aliases_config_format( + config, aliases_config, input_genre, expected_genre +): + """Test _load_aliases() loading from inline config dict (and False).""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = aliases_config + plugin = lastgenre.LastGenrePlugin() + result = normalize_genre(plugin._log, plugin.aliases, input_genre) + assert result == expected_genre + + +@pytest.mark.parametrize( + "invalid_config, expected_error", + [ + # Plain string instead of mapping + ("/path/to/aliases.txt", "must be a dict"), + # Integer + (42, "must be a dict"), + # Mapping with non-list value + ({"hip hop": "hip-hop"}, "must be a list"), + ], +) +def test_aliases_config_format_errors(config, invalid_config, expected_error): + """Test that invalid aliases config values raise confuse.ConfigTypeError.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = invalid_config + with pytest.raises(confuse.ConfigTypeError) as exc_info: + lastgenre.LastGenrePlugin() + assert expected_error in str(exc_info.value) + + +# Aliases: integration with _resolve_genres() + + +def test_aliases_normalize_before_whitelist(config): + """Aliases normalize BEFORE whitelist filtering. + + 'hip-hop' is not on the whitelist but 'hip hop' is. With aliases + enabled the tag must survive whitelist filtering. + """ + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop", "hiphop"]} + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + # Inject only 'hip hop' into the whitelist to prove the alias fired. + plugin.whitelist = {"hip hop"} + + result = plugin._resolve_genres(["hip-hop"]) + assert result == ["hip hop"], ( + "alias must normalize 'hip-hop' → 'hip hop' before whitelist check" + ) + + +def test_aliases_normalize_before_ignorelist(config): + """Aliases normalize BEFORE ignorelist filtering. + + If 'hip hop' is ignored but 'hip-hop' is fed in, the alias fires first + so the result is empty (correctly ignored). + """ + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop"]} + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + plugin.ignorelist = { + "*": [re.compile("hip hop", re.IGNORECASE)], + } + + result = plugin._resolve_genres(["hip-hop"]) + assert result == [], ( + "alias must normalize 'hip-hop' before ignorelist check drops it" + ) + + +def test_aliases_normalize_existing_tags(config): + """Aliases also normalize genres already in the file tag. + + Existing genres passed as *old* in keep_genres are lowercased and then + flow through _resolve_genres, so aliases must fire for them too. + """ + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = { + "drum and bass": [r"d(rum)?[ &n/]*b(ass)?"] + } + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + plugin.whitelist = {"drum and bass"} + + # Simulate existing tag variant 'dnb' passing through resolve. + result = plugin._resolve_genres(["dnb"]) + assert result == ["drum and bass"], ( + "alias must normalize existing tag variant 'dnb' before whitelist check" + ) + + +def test_aliases_default_bundled_loads(config): + """With aliases: true (default), the bundled aliases.yaml is loaded.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = True + plugin = lastgenre.LastGenrePlugin() + # Bundled file should have at least one entry. + assert len(plugin.aliases) > 0, "bundled aliases.yaml must contain entries" + + +def test_aliases_disabled(config): + """With aliases: false, no normalization is performed.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = False + plugin = lastgenre.LastGenrePlugin() + assert plugin.aliases == [] + # normalize_genre with an empty list must return the genre unchanged. + assert normalize_genre(plugin._log, plugin.aliases, "hip-hop") == "hip-hop" + assert normalize_genre(plugin._log, plugin.aliases, "hiphop") == "hiphop" + + +# Aliases: LastFmClient normalization tests + + +def test_client_normalization(config): + """LastFmClient must normalize tags using aliases before filtering.""" + # Setup aliases: 'hip-hop' -> 'hip hop' + aliases_config = {r"hip hop": [r"hip-hop"]} + # Setup ignorelist: ignore 'hip hop' + ignorelist_config = {"*": ["hip hop"]} + + # Mock pylast objects + mock_tag = Mock() + mock_tag.item.get_name.return_value = "hip-hop" + mock_tag.weight = 100 + mock_lastfm_obj = Mock() + mock_lastfm_obj.get_top_tags.return_value = [mock_tag] + + # Initialize client manually + aliases = [ + (re.compile(pat, re.IGNORECASE), template.lower()) + for template, patterns in aliases_config.items() + for pat in patterns + ] + ignorelist = { + artist: [re.compile(pattern, re.IGNORECASE) for pattern in patterns] + for artist, patterns in ignorelist_config.items() + } + client = lastgenre.client.LastFmClient(Mock(), 10, ignorelist, aliases) + + # 1. Test _tags_for directly: returns raw (un-normalized) tags from pylast + tags = client._tags_for(mock_lastfm_obj) + assert tags == ["hip-hop"], "client._tags_for must return raw tags" + + # 2. Test _last_lookup with ignorelist and aliases: + # 'hip-hop' normalized to 'hip hop', which is then ignored. + # Result should be empty. + def mock_method(*args): + return mock_lastfm_obj + + result = client._last_lookup("track", mock_method, "artist", "title") + assert result == [], ( + "normalized 'hip hop' must be caught and filtered by ignorelist in _last_lookup" + ) + + +@pytest.mark.parametrize( + "input_genre, expected_genre", + [ + ("dnb", "drum and bass"), + ("drum n bass", "drum and bass"), + ("r&b", "rhythm and blues"), + ("rnb", "rhythm and blues"), + ("rock & roll", "rock and roll"), + ("rock'n'roll", "rock and roll"), + ("kpop", "k-pop"), + ("j rock", "j-rock"), + ("post rock", "post-rock"), + ("lofi", "lo-fi"), + ("lo fi", "lo-fi"), + ("p funk", "p-funk"), + ("synth pop", "synthpop"), + ("avantgarde", "avant-garde"), + ("avant gard", "avant-garde"), + ("nu-jazz", "nu jazz"), + ("nu-metal", "nu metal"), + ("nu-soul", "nu soul"), + ("nu disco", "nu disco"), + ("elektronika", "electronic"), + ("electronic music", "electronic"), + ("downbeat", "downtempo"), + ("shoegazer", "shoegaze"), + ("shoegazing", "shoegaze"), + ("hip-hop", "hip hop"), + ("triphop", "trip hop"), + ("alt", "alternative rock"), + ("alternative", "alternative rock"), + ("goth", "gothic rock"), + ("goth rock", "gothic rock"), + ("gothic rock", "gothic rock"), + ("prog", "progressive rock"), + ("prog rock", "progressive rock"), + ("progressive rock", "progressive rock"), + ("trad", "traditional folk"), + ("traditional", "traditional folk"), + ], +) +def test_default_aliases_logic(config, input_genre, expected_genre): + """Verify that bundled aliases.yaml correctly handles common variants.""" + config["lastgenre"]["ignorelist"] = False + config["lastgenre"]["aliases"] = True + plugin = lastgenre.LastGenrePlugin() + result = normalize_genre(plugin._log, plugin.aliases, input_genre) + assert result == expected_genre From 9ed89de878e66b099e21680946b9047cb121007c Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sun, 29 Mar 2026 11:45:31 +0200 Subject: [PATCH 02/22] lastgenre: Docs for genre normalization (aliases) --- docs/plugins/lastgenre.rst | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index db8118ee23..717d2bfd04 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -212,6 +212,55 @@ plain ``metal`` will not match ``heavy metal`` unless you write a regex like double-escape backslashes in unquoted or single-quoted strings (e.g., use ``\w``, not ``\\w``). +Genre Normalization (Aliases) +----------------------------- + +Last.fm tags often contain variant spellings, abbreviations, or inconsistent +formatting (e.g., "hip-hop", "hiphop", and "hip hop"). The normalization feature +uses an ordered list of regular expression aliases to map these variants to a +single canonical name *before* any other filtering or canonicalization takes +place. + +This feature is enabled by default (``aliases: yes``) and uses a bundled +``aliases.yaml`` file which covers many common cases, such as mapping "dnb" to +"drum and bass" or "r&b" to "rhythm and blues". + +You can extend or override these aliases in your configuration. The keys are the +canonical genre names (which support ``\g<1>`` back-references to regex capture +groups) and the values are lists of regex patterns: + +:: + + lastgenre: + aliases: + drum and bass: + - d(rum)?[ &n/]*b(ass)? + \g<1> hop: + - (glitch|hip|jazz|trip)y?[ /-]*hop + +.. note:: + + The same formatting and quoting rules regarding YAML special characters and + backslashes apply here as well. See the **Attention** box in the **Genre + Ignorelist** section above for details. + +Choosing the Right Tool +----------------------- + +With multiple ways to filter and map genres, here is a quick guide on when to +use what: + +- **Aliases**: Use these first to fix spelling variants and abbreviations (e.g., + ``dnb`` → ``drum and bass``). +- **Ignorelist**: Use this for error correction when Last.fm results are not + accurate, or for precise per-artist or global exclusions (e.g., rejecting + ``Metal`` for specific electronic artists). +- **Canonicalization**: Use this to automatically map specific sub-genres to + broader categories (e.g., ``Grindcore`` → ``Metal``). +- **Whitelist**: Use this to finally limit your library to a predefined set of + genres. When combined with canonicalization, the plugin will try to map a + sub-genre to its closest whitelisted parent. Anything else is dropped. + Configuration ------------- From 7b2d4b059cf0dc95d1b8a838820c055452f6a0e6 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sun, 29 Mar 2026 12:08:23 +0200 Subject: [PATCH 03/22] lastgenre: Docs hint on canonical without whitelist --- docs/plugins/lastgenre.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/plugins/lastgenre.rst b/docs/plugins/lastgenre.rst index 717d2bfd04..943f1ff0f2 100644 --- a/docs/plugins/lastgenre.rst +++ b/docs/plugins/lastgenre.rst @@ -70,6 +70,11 @@ contains about any genre contained in the tree) with canonicalization because nothing would ever be matched to a more generic node since all the specific subgenres are in the whitelist to begin with. +If you use canonicalization *without* a whitelist, the plugin will simply map +every genre to its top-most root category in the tree (e.g., ``Viking Metal`` → +``Rock``). This is a great way to keep your library broad without needing to +maintain a manual list of allowed genres. + .. _tree of nested genre names: https://raw.githubusercontent.com/beetbox/beets/master/beetsplug/lastgenre/genres-tree.yaml .. _yaml: https://yaml.org/ From b1cfe11f1b96ec4c6fa6c71adc5aefe768154c17 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 27 Mar 2026 07:19:47 +0100 Subject: [PATCH 04/22] lastgenre: Implement genre alias normalization Add comment on alias and log in client Refactor aliases load using confuse MappingValues Get rid of drop/norm helper again --- beetsplug/lastgenre/__init__.py | 89 ++++++++++++++++++++++++++++++--- beetsplug/lastgenre/client.py | 19 +++++-- beetsplug/lastgenre/utils.py | 32 ++++++++---- 3 files changed, 119 insertions(+), 21 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 0a32394c6a..28ef1f2ecc 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -37,7 +37,7 @@ from beets import config, library, plugins, ui from beets.library import Album, Item from beets.util import plurality, unique_list -from beetsplug.lastgenre.utils import drop_ignored_genres, is_ignored +from beetsplug.lastgenre.utils import is_ignored, normalize_genre from .client import LastFmClient @@ -48,7 +48,7 @@ from beets.importer import ImportSession, ImportTask from beets.library import LibModel - from .utils import GenreIgnorePatterns + from .utils import Aliases, GenreIgnorePatterns Whitelist = set[str] """Set of valid genre names (lowercase). Empty set means all genres allowed.""" @@ -115,6 +115,7 @@ def sort_by_depth(tags: list[str], branches: CanonTree) -> list[str]: WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt") C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml") +ALIASES = os.path.join(os.path.dirname(__file__), "aliases.yaml") class LastGenrePlugin(plugins.BeetsPlugin): @@ -137,6 +138,7 @@ def __init__(self) -> None: "title_case": True, "pretend": False, "ignorelist": {}, + "aliases": True, } ) self.setup() @@ -150,8 +152,12 @@ def setup(self) -> None: self.c14n_branches: CanonTree self.c14n_branches, self.canonicalize = self._load_c14n_tree() self.ignore_patterns: GenreIgnorePatterns = self._load_ignorelist() + self.aliases: Aliases = self._load_aliases() self.client = LastFmClient( - self._log, self.config["min_weight"].get(int), self.ignore_patterns + self._log, + self.config["min_weight"].get(int), + self.ignore_patterns, + self.aliases, ) def _load_whitelist(self) -> Whitelist: @@ -246,6 +252,67 @@ def _load_ignorelist(self) -> GenreIgnorePatterns: return compiled_ignorelist + def _load_aliases(self) -> Aliases: + """Load the genre alias table from the beets config. + + Reads ``lastgenre.aliases`` as a mapping of genre names to lists of + regex patterns:: + + lastgenre: + aliases: + drum and bass: + - d(rum)?[ &n/]*b(ass)? + \\g<1> hop: + - (glitch|hip|jazz|trip)y?[ /-]*hop + + The key (genre name) is used as a ``re.Match.expand()`` template, + so ``\\g`` back-references to capture groups are supported. + + Setting ``aliases: true`` (the default) loads the bundled + ``aliases.yaml`` file. Setting ``aliases: false`` disables + normalization entirely. + + Raises: + confuse.ConfigTypeError: when the config value is not a mapping + or a list entry is not a string. + """ + aliases_raw = self.config["aliases"].get() + if aliases_raw is False: + return [] + if aliases_raw in (True, "", None): + self._log.debug("Loading default aliases from {}", ALIASES) + with Path(ALIASES).open(encoding="utf-8") as f: + aliases_dict = yaml.safe_load(f) + if not aliases_dict: + return [] + else: + # Validate only the effective aliases value to avoid stale lower- + # priority config layers affecting type checking. + aliases_cfg = confuse.Configuration("lastgenre_aliases", read=False) + aliases_cfg.set({"aliases": aliases_raw}) + aliases_dict = aliases_cfg["aliases"].get( + confuse.MappingValues(confuse.Sequence(str)) + ) + + entries: Aliases = [] + for canonical, patterns in aliases_dict.items(): + template = str(canonical).lower() + for raw_pat in patterns: + try: + entries.append( + (re.compile(str(raw_pat), re.IGNORECASE), template) + ) + except re.error: + entries.append( + ( + re.compile(re.escape(str(raw_pat)), re.IGNORECASE), + template, + ) + ) + + self._log.extra_debug("Loaded {} alias entries", len(entries)) + return entries + @property def sources(self) -> tuple[str, ...]: """A tuple of allowed genre sources. May contain 'track', @@ -267,6 +334,8 @@ def _resolve_genres( """Canonicalize, sort and filter a list of genres. - Returns an empty list if the input tags list is empty. + - If aliases are configured, variant spellings are normalised first + (e.g. 'hip-hop' → 'hip hop', 'dnb' → 'drum and bass'). - If canonicalization is enabled, it extends the list by incorporating parent genres from the canonicalization tree. When a whitelist is set, only parent tags that pass the whitelist filter are included; @@ -286,6 +355,12 @@ def _resolve_genres( if not tags: return [] + # Normalize variant spellings before any other processing. + if self.aliases: + tags = [ + normalize_genre(self._log, self.aliases, tag) for tag in tags + ] + count = self.config["count"].get(int) # Canonicalization (if enabled) @@ -358,9 +433,11 @@ def _filter_valid( for g in cleaned if not self.whitelist or g.lower() in self.whitelist ] - return drop_ignored_genres( - self._log, self.ignore_patterns, whitelisted, artist - ) + return [ + g + for g in whitelisted + if not is_ignored(self._log, self.ignore_patterns, g, artist) + ] # Genre resolution pipeline. diff --git a/beetsplug/lastgenre/client.py b/beetsplug/lastgenre/client.py index d7118ee089..2a531c1beb 100644 --- a/beetsplug/lastgenre/client.py +++ b/beetsplug/lastgenre/client.py @@ -25,7 +25,7 @@ from beets import plugins -from .utils import drop_ignored_genres +from .utils import is_ignored, normalize_genre if TYPE_CHECKING: from collections.abc import Callable @@ -33,7 +33,7 @@ from beets.library import LibModel from beets.logging import BeetsLogger - from .utils import GenreIgnorePatterns + from .utils import Aliases, GenreIgnorePatterns GenreCache = dict[str, list[str]] """Cache mapping entity keys to their genre lists. @@ -69,6 +69,7 @@ def __init__( log: BeetsLogger, min_weight: int, ignore_patterns: GenreIgnorePatterns, + aliases: Aliases, ): """Initialize the client. @@ -78,6 +79,7 @@ def __init__( self._log = log self._min_weight = min_weight self._ignore_patterns: GenreIgnorePatterns = ignore_patterns + self._aliases: Aliases = aliases self._genre_cache: GenreCache = {} def fetch_genres( @@ -127,11 +129,18 @@ def _last_lookup( "last.fm (unfiltered) {} tags: {}", entity, genres ) + # Apply aliases and log each change. # Filter forbidden genres on every call so ignorelist hits are logged. # Artist is always the first element in args (album, artist, track lookups). - return drop_ignored_genres( - self._log, self._ignore_patterns, genres, args[0] - ) + result = [] + for genre in genres: + if self._aliases: + genre = normalize_genre(self._log, self._aliases, genre) + + if not is_ignored(self._log, self._ignore_patterns, genre, args[0]): + result.append(genre) + + return result def fetch(self, kind: str, obj: LibModel, *args: str) -> list[str]: """Fetch Last.fm genres for the specified kind and entity. diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py index bd73f6aca6..44e95b1e4d 100644 --- a/beetsplug/lastgenre/utils.py +++ b/beetsplug/lastgenre/utils.py @@ -27,17 +27,11 @@ GenreIgnorePatterns = dict[str, list[re.Pattern[str]]] """Mapping of artist name to list of compiled case-insensitive patterns.""" + AliasEntry = tuple[re.Pattern[str], str] + """A compiled full-match pattern paired with its replacement template.""" -def drop_ignored_genres( - logger: BeetsLogger, - ignore_patterns: GenreIgnorePatterns, - genres: list[str], - artist: str | None = None, -) -> list[str]: - """Drop genres that match the ignorelist.""" - return [ - g for g in genres if not is_ignored(logger, ignore_patterns, g, artist) - ] + Aliases = list[AliasEntry] + """Ordered list of (pattern, replacement_template) alias entries.""" def is_ignored( @@ -57,3 +51,21 @@ def is_ignored( logger.extra_debug("ignored (artist: {}): {}", artist, genre) return True return False + + +def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str: + """Return the canonical form of *genre* using *aliases*. + + Tries each alias entry in order. The first full-match wins; the + replacement template is expanded via ``re.Match.expand()`` so + ``\\g`` back-references work. Returns *genre* unchanged when + no alias matches. + """ + genre_lower = genre.lower() + for pattern, template in aliases: + if m := pattern.fullmatch(genre_lower): + expanded = m.expand(template) + if expanded != genre: + logger.extra_debug("aliased: {} -> {}", genre, expanded) + return expanded + return genre_lower From 5630b17fce6d2bfda13f7016d74ffa3f9cf2dc1b Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 10 Apr 2026 08:36:03 +0200 Subject: [PATCH 05/22] lastgenre: Slightly optimize filter_valid performance which also slightly improves readability after removing the redundant drop_ignore_genres helper. --- beetsplug/lastgenre/__init__.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 28ef1f2ecc..51f96ead31 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -428,16 +428,19 @@ def _filter_valid( if not self.whitelist and not self.ignore_patterns: return cleaned - whitelisted = [ - g - for g in cleaned - if not self.whitelist or g.lower() in self.whitelist - ] - return [ - g - for g in whitelisted - if not is_ignored(self._log, self.ignore_patterns, g, artist) - ] + result = [] + for genre in cleaned: + if self.whitelist and genre.lower() not in self.whitelist: + continue + + if self.ignore_patterns and is_ignored( + self._log, self.ignore_patterns, genre, artist + ): + continue + + result.append(genre) + + return result # Genre resolution pipeline. From 04fe3e9105f02234f4776160b33592c3705c4351 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 11 Apr 2026 03:12:19 +0200 Subject: [PATCH 06/22] Fixes/additions to default aliases --- beetsplug/lastgenre/aliases.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index 1ae20f47e9..2825d4050b 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -91,10 +91,9 @@ shoegaze: # --------------------------------------------------------------------------- # alternative rock (alt, alternative, alt rock, ...) -alternative rock: - - alt([ /-]*rock)? - - alternative([ /-]*rock)? - - alternatv[ /-]*rock +# indie rock (indie, indie rock) +\g<1> rock: + - (alt|alternative|indie)([ /-]*rock)? # gothic rock (goth, goth rock) - doesn't catch gothic metal gothic rock: From 2efb8bc4a1139db02cb2b5cdc5c731c0f4ef6037 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 11 Apr 2026 03:18:51 +0200 Subject: [PATCH 07/22] Fix normalize_genre docstring wording --- beetsplug/lastgenre/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py index 44e95b1e4d..dd0bb60ddd 100644 --- a/beetsplug/lastgenre/utils.py +++ b/beetsplug/lastgenre/utils.py @@ -56,10 +56,10 @@ def is_ignored( def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str: """Return the canonical form of *genre* using *aliases*. - Tries each alias entry in order. The first full-match wins; the - replacement template is expanded via ``re.Match.expand()`` so - ``\\g`` back-references work. Returns *genre* unchanged when - no alias matches. + Tries each alias entry in order. The first full-match wins; the replacement + template is expanded via ``re.Match.expand()`` so ``\\g`` + back-references work. Returns original (lowercased) *genre* when no alias + matches. """ genre_lower = genre.lower() for pattern, template in aliases: From 56251ecc67570a81833cb7c345cd39264f10c512 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 11 Apr 2026 03:26:54 +0200 Subject: [PATCH 08/22] Test invalid alias template catch --- test/plugins/test_lastgenre.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index ec7c521b31..d3a081c0da 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -974,6 +974,15 @@ def test_normalize_genre( assert normalize_genre(Mock(), aliases, genre) == expected +def test_normalize_genre_invalid_template_does_not_crash() -> None: + """Invalid replacement templates are skipped instead of crashing.""" + logger = Mock() + aliases = [(re.compile(r"(hip)[ /-]*hop", re.IGNORECASE), r"\g<2> hop")] + + assert normalize_genre(logger, aliases, "hip-hop") == "hip-hop" + logger.warning.assert_called_once() + + # Aliases: _load_aliases() config parsing tests From 95fdc90b37ea6456d8be0b9736680e7c4b0c3d7c Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sat, 11 Apr 2026 03:27:18 +0200 Subject: [PATCH 09/22] Catch exception on invalid alias template --- beetsplug/lastgenre/utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py index dd0bb60ddd..4a5dab6d53 100644 --- a/beetsplug/lastgenre/utils.py +++ b/beetsplug/lastgenre/utils.py @@ -17,11 +17,10 @@ from __future__ import annotations +import re from typing import TYPE_CHECKING if TYPE_CHECKING: - import re - from beets.logging import BeetsLogger GenreIgnorePatterns = dict[str, list[re.Pattern[str]]] @@ -64,7 +63,16 @@ def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str: genre_lower = genre.lower() for pattern, template in aliases: if m := pattern.fullmatch(genre_lower): - expanded = m.expand(template) + try: + expanded = m.expand(template) + except (re.error, IndexError) as exc: + logger.warning( + "invalid alias template {}; skipping for genre {}: {}", + template, + genre, + exc, + ) + continue if expanded != genre: logger.extra_debug("aliased: {} -> {}", genre, expanded) return expanded From 0e979d427fddb7091d77b9bdc315122050c87e38 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 10:05:08 +0200 Subject: [PATCH 10/22] fix test aliases --- test/plugins/test_lastgenre.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index d3a081c0da..4646865fe2 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -812,8 +812,8 @@ def test_ignorelist_patterns( {"*": ["spoken word"], "metallica": ["metal"]}, {"*": ["spoken word"], "metallica": ["metal"]}, ), - # Artist names are preserved by the current loader implementation. - ({"METALLICA": ["METAL"]}, {"METALLICA": ["METAL"]}), + # Artist names are lowercased so lookup in is_ignored() matches. + ({"METALLICA": ["METAL"]}, {"metallica": ["METAL"]}), # Invalid regex pattern that gets escaped (full-match literal fallback) ( {"artist": ["[invalid(regex"]}, @@ -852,7 +852,7 @@ def test_ignorelist_config_format( re.escape(pattern), re.IGNORECASE ).pattern ) - string_ignorelist[artist] = compiled_patterns + string_ignorelist[artist.lower()] = compiled_patterns assert string_ignorelist == expected_ignorelist @@ -1071,7 +1071,7 @@ def test_aliases_normalize_before_ignorelist(config): config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop"]} plugin = lastgenre.LastGenrePlugin() plugin.setup() - plugin.ignorelist = { + plugin.ignore_patterns = { "*": [re.compile("hip hop", re.IGNORECASE)], } From d60ec41c0000f606dcc100f69d10aa6dbe999922 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 10:07:36 +0200 Subject: [PATCH 11/22] lastgenre: Dedup ignore/alias regex compilation --- beetsplug/lastgenre/__init__.py | 30 ++++++++---------------------- beetsplug/lastgenre/utils.py | 12 ++++++++++++ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 51f96ead31..828e8ada52 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -25,7 +25,6 @@ from __future__ import annotations import os -import re from collections import defaultdict from functools import singledispatchmethod from pathlib import Path @@ -37,7 +36,11 @@ from beets import config, library, plugins, ui from beets.library import Album, Item from beets.util import plurality, unique_list -from beetsplug.lastgenre.utils import is_ignored, normalize_genre +from beetsplug.lastgenre.utils import ( + compile_pattern, + is_ignored, + normalize_genre, +) from .client import LastFmClient @@ -234,21 +237,14 @@ def _load_ignorelist(self) -> GenreIgnorePatterns: compiled_ignorelist: GenreIgnorePatterns = defaultdict(list) for artist, patterns in raw_ignorelist.items(): - artist_patterns = [] - for pattern in patterns: - try: - artist_patterns.append(re.compile(pattern, re.IGNORECASE)) - except re.error: - artist_patterns.append( - re.compile(re.escape(pattern), re.IGNORECASE) - ) + artist_patterns = [compile_pattern(p) for p in patterns] self._log.extra_debug( "ignore for {}: {}", artist, [p.pattern for p in artist_patterns], ) - compiled_ignorelist[artist] = artist_patterns + compiled_ignorelist[artist.lower()] = artist_patterns return compiled_ignorelist @@ -298,17 +294,7 @@ def _load_aliases(self) -> Aliases: for canonical, patterns in aliases_dict.items(): template = str(canonical).lower() for raw_pat in patterns: - try: - entries.append( - (re.compile(str(raw_pat), re.IGNORECASE), template) - ) - except re.error: - entries.append( - ( - re.compile(re.escape(str(raw_pat)), re.IGNORECASE), - template, - ) - ) + entries.append((compile_pattern(str(raw_pat)), template)) self._log.extra_debug("Loaded {} alias entries", len(entries)) return entries diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py index 4a5dab6d53..2aa33d195a 100644 --- a/beetsplug/lastgenre/utils.py +++ b/beetsplug/lastgenre/utils.py @@ -33,6 +33,18 @@ """Ordered list of (pattern, replacement_template) alias entries.""" +def compile_pattern(pattern: str) -> re.Pattern[str]: + """Compile *pattern* as a case-insensitive regex. + + Falls back to a literal (``re.escape``'d) pattern when *pattern* is not + valid regex syntax. + """ + try: + return re.compile(pattern, re.IGNORECASE) + except re.error: + return re.compile(re.escape(pattern), re.IGNORECASE) + + def is_ignored( logger: BeetsLogger, ignore_patterns: GenreIgnorePatterns, From a65d0b30a10fe398c21e9e7ac1ae90522cac18a5 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 10:46:07 +0200 Subject: [PATCH 12/22] Streamline in test alias type with ignore naming streamline vars naming in tests --- test/plugins/test_lastgenre.py | 44 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 4646865fe2..f046ce91e7 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -966,20 +966,22 @@ def test_normalize_genre( expected: str, ) -> None: """Test normalize_genre() with static and template canonical names.""" - aliases = [ + alias_patterns = [ (re.compile(pat, re.IGNORECASE), template.lower()) for template, patterns in aliases_dict.items() for pat in patterns ] - assert normalize_genre(Mock(), aliases, genre) == expected + assert normalize_genre(Mock(), alias_patterns, genre) == expected def test_normalize_genre_invalid_template_does_not_crash() -> None: """Invalid replacement templates are skipped instead of crashing.""" logger = Mock() - aliases = [(re.compile(r"(hip)[ /-]*hop", re.IGNORECASE), r"\g<2> hop")] + alias_patterns = [ + (re.compile(r"(hip)[ /-]*hop", re.IGNORECASE), r"\g<2> hop") + ] - assert normalize_genre(logger, aliases, "hip-hop") == "hip-hop" + assert normalize_genre(logger, alias_patterns, "hip-hop") == "hip-hop" logger.warning.assert_called_once() @@ -1009,7 +1011,7 @@ def test_aliases_config_format( ) config["lastgenre"]["aliases"] = aliases_config plugin = lastgenre.LastGenrePlugin() - result = normalize_genre(plugin._log, plugin.aliases, input_genre) + result = normalize_genre(plugin._log, plugin.alias_patterns, input_genre) assert result == expected_genre @@ -1112,7 +1114,9 @@ def test_aliases_default_bundled_loads(config): config["lastgenre"]["aliases"] = True plugin = lastgenre.LastGenrePlugin() # Bundled file should have at least one entry. - assert len(plugin.aliases) > 0, "bundled aliases.yaml must contain entries" + assert len(plugin.alias_patterns) > 0, ( + "bundled aliases.yaml must contain entries" + ) def test_aliases_disabled(config): @@ -1122,10 +1126,16 @@ def test_aliases_disabled(config): ) config["lastgenre"]["aliases"] = False plugin = lastgenre.LastGenrePlugin() - assert plugin.aliases == [] + assert plugin.alias_patterns == [] # normalize_genre with an empty list must return the genre unchanged. - assert normalize_genre(plugin._log, plugin.aliases, "hip-hop") == "hip-hop" - assert normalize_genre(plugin._log, plugin.aliases, "hiphop") == "hiphop" + assert ( + normalize_genre(plugin._log, plugin.alias_patterns, "hip-hop") + == "hip-hop" + ) + assert ( + normalize_genre(plugin._log, plugin.alias_patterns, "hiphop") + == "hiphop" + ) # Aliases: LastFmClient normalization tests @@ -1146,20 +1156,22 @@ def test_client_normalization(config): mock_lastfm_obj.get_top_tags.return_value = [mock_tag] # Initialize client manually - aliases = [ + alias_patterns = [ (re.compile(pat, re.IGNORECASE), template.lower()) for template, patterns in aliases_config.items() for pat in patterns ] - ignorelist = { + ignore_patterns = { artist: [re.compile(pattern, re.IGNORECASE) for pattern in patterns] for artist, patterns in ignorelist_config.items() } - client = lastgenre.client.LastFmClient(Mock(), 10, ignorelist, aliases) + client = lastgenre.client.LastFmClient( + Mock(), 10, ignore_patterns, alias_patterns + ) - # 1. Test _tags_for directly: returns raw (un-normalized) tags from pylast - tags = client._tags_for(mock_lastfm_obj) - assert tags == ["hip-hop"], "client._tags_for must return raw tags" + # 1. Test fetch directly: returns raw (un-normalized) tags from pylast + tags = client.fetch("track", mock_lastfm_obj) + assert tags == ["hip-hop"], "client.fetch must return raw tags" # 2. Test _last_lookup with ignorelist and aliases: # 'hip-hop' normalized to 'hip hop', which is then ignored. @@ -1219,5 +1231,5 @@ def test_default_aliases_logic(config, input_genre, expected_genre): config["lastgenre"]["ignorelist"] = False config["lastgenre"]["aliases"] = True plugin = lastgenre.LastGenrePlugin() - result = normalize_genre(plugin._log, plugin.aliases, input_genre) + result = normalize_genre(plugin._log, plugin.alias_patterns, input_genre) assert result == expected_genre From f56dc28c103a5130c31a54b00f2b9d968ad4d4bc Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 10:46:33 +0200 Subject: [PATCH 13/22] Streamline alias type with ignore naming --- beetsplug/lastgenre/__init__.py | 15 ++++++++------- beetsplug/lastgenre/client.py | 10 +++++----- beetsplug/lastgenre/utils.py | 13 ++++++------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 828e8ada52..597de30795 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -51,7 +51,7 @@ from beets.importer import ImportSession, ImportTask from beets.library import LibModel - from .utils import Aliases, GenreIgnorePatterns + from .utils import GenreAliasPatterns, GenreIgnorePatterns Whitelist = set[str] """Set of valid genre names (lowercase). Empty set means all genres allowed.""" @@ -155,12 +155,12 @@ def setup(self) -> None: self.c14n_branches: CanonTree self.c14n_branches, self.canonicalize = self._load_c14n_tree() self.ignore_patterns: GenreIgnorePatterns = self._load_ignorelist() - self.aliases: Aliases = self._load_aliases() + self.alias_patterns: GenreAliasPatterns = self._load_aliases() self.client = LastFmClient( self._log, self.config["min_weight"].get(int), self.ignore_patterns, - self.aliases, + self.alias_patterns, ) def _load_whitelist(self) -> Whitelist: @@ -248,7 +248,7 @@ def _load_ignorelist(self) -> GenreIgnorePatterns: return compiled_ignorelist - def _load_aliases(self) -> Aliases: + def _load_aliases(self) -> GenreAliasPatterns: """Load the genre alias table from the beets config. Reads ``lastgenre.aliases`` as a mapping of genre names to lists of @@ -290,7 +290,7 @@ def _load_aliases(self) -> Aliases: confuse.MappingValues(confuse.Sequence(str)) ) - entries: Aliases = [] + entries: GenreAliasPatterns = [] for canonical, patterns in aliases_dict.items(): template = str(canonical).lower() for raw_pat in patterns: @@ -342,9 +342,10 @@ def _resolve_genres( return [] # Normalize variant spellings before any other processing. - if self.aliases: + if self.alias_patterns: tags = [ - normalize_genre(self._log, self.aliases, tag) for tag in tags + normalize_genre(self._log, self.alias_patterns, tag) + for tag in tags ] count = self.config["count"].get(int) diff --git a/beetsplug/lastgenre/client.py b/beetsplug/lastgenre/client.py index 2a531c1beb..f770b1d134 100644 --- a/beetsplug/lastgenre/client.py +++ b/beetsplug/lastgenre/client.py @@ -33,7 +33,7 @@ from beets.library import LibModel from beets.logging import BeetsLogger - from .utils import Aliases, GenreIgnorePatterns + from .utils import GenreAliasPatterns, GenreIgnorePatterns GenreCache = dict[str, list[str]] """Cache mapping entity keys to their genre lists. @@ -69,7 +69,7 @@ def __init__( log: BeetsLogger, min_weight: int, ignore_patterns: GenreIgnorePatterns, - aliases: Aliases, + alias_patterns: GenreAliasPatterns, ): """Initialize the client. @@ -79,7 +79,7 @@ def __init__( self._log = log self._min_weight = min_weight self._ignore_patterns: GenreIgnorePatterns = ignore_patterns - self._aliases: Aliases = aliases + self._alias_patterns: GenreAliasPatterns = alias_patterns self._genre_cache: GenreCache = {} def fetch_genres( @@ -134,8 +134,8 @@ def _last_lookup( # Artist is always the first element in args (album, artist, track lookups). result = [] for genre in genres: - if self._aliases: - genre = normalize_genre(self._log, self._aliases, genre) + if self._alias_patterns: + genre = normalize_genre(self._log, self._alias_patterns, genre) if not is_ignored(self._log, self._ignore_patterns, genre, args[0]): result.append(genre) diff --git a/beetsplug/lastgenre/utils.py b/beetsplug/lastgenre/utils.py index 2aa33d195a..d637918648 100644 --- a/beetsplug/lastgenre/utils.py +++ b/beetsplug/lastgenre/utils.py @@ -26,10 +26,7 @@ GenreIgnorePatterns = dict[str, list[re.Pattern[str]]] """Mapping of artist name to list of compiled case-insensitive patterns.""" - AliasEntry = tuple[re.Pattern[str], str] - """A compiled full-match pattern paired with its replacement template.""" - - Aliases = list[AliasEntry] + GenreAliasPatterns = list[tuple[re.Pattern[str], str]] """Ordered list of (pattern, replacement_template) alias entries.""" @@ -64,8 +61,10 @@ def is_ignored( return False -def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str: - """Return the canonical form of *genre* using *aliases*. +def normalize_genre( + logger: BeetsLogger, alias_patterns: GenreAliasPatterns, genre: str +) -> str: + """Return the canonical form of *genre* using *alias_patterns*. Tries each alias entry in order. The first full-match wins; the replacement template is expanded via ``re.Match.expand()`` so ``\\g`` @@ -73,7 +72,7 @@ def normalize_genre(logger: BeetsLogger, aliases: Aliases, genre: str) -> str: matches. """ genre_lower = genre.lower() - for pattern, template in aliases: + for pattern, template in alias_patterns: if m := pattern.fullmatch(genre_lower): try: expanded = m.expand(template) From 271a23f84ddef428df05b52ec11e52d432fc4d17 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 11:04:42 +0200 Subject: [PATCH 14/22] lastgenre: Fix ignorelist tests naming inconsistencies --- test/plugins/test_lastgenre.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index f046ce91e7..416e48b5dc 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -785,14 +785,14 @@ def test_ignorelist_patterns( logger = Mock() - # Set up compiled ignorelist directly (skipping file parsing) - compiled_ignorelist = defaultdict(list) + # Set up compiled ignore_patterns directly (skipping file parsing) + ignore_patterns = defaultdict(list) for artist_name, patterns in ignorelist_dict.items(): - compiled_ignorelist[artist_name.lower()] = [ + ignore_patterns[artist_name.lower()] = [ re.compile(pattern, re.IGNORECASE) for pattern in patterns ] - result = is_ignored(logger, compiled_ignorelist, genre, artist) + result = is_ignored(logger, ignore_patterns, genre, artist) assert result == expected_forbidden @pytest.mark.parametrize( @@ -833,12 +833,12 @@ def test_ignorelist_config_format( # Mimic the plugin loader behavior in isolation to avoid global config bleed. if not cfg["lastgenre"]["ignorelist"].get(): - string_ignorelist = {} + ignore_patterns = {} else: raw_strs = cfg["lastgenre"]["ignorelist"].get( confuse.MappingValues(confuse.Sequence(str)) ) - string_ignorelist = {} + ignore_patterns = {} for artist, patterns in raw_strs.items(): compiled_patterns = [] for pattern in patterns: @@ -852,9 +852,9 @@ def test_ignorelist_config_format( re.escape(pattern), re.IGNORECASE ).pattern ) - string_ignorelist[artist.lower()] = compiled_patterns + ignore_patterns[artist.lower()] = compiled_patterns - assert string_ignorelist == expected_ignorelist + assert ignore_patterns == expected_ignorelist @pytest.mark.parametrize( "invalid_config, expected_error_message", From 1f48c9211aafb04e9e75b5655dfa9619fac61d38 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 11:34:03 +0200 Subject: [PATCH 15/22] Add alt rock to default aliases test --- test/plugins/test_lastgenre.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 416e48b5dc..55908e1c4c 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -1215,6 +1215,7 @@ def mock_method(*args): ("hip-hop", "hip hop"), ("triphop", "trip hop"), ("alt", "alternative rock"), + ("alt rock", "alternative rock"), ("alternative", "alternative rock"), ("goth", "gothic rock"), ("goth rock", "gothic rock"), From 942221a18150a2cdb4c357786c837c83eac07da6 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 11:34:56 +0200 Subject: [PATCH 16/22] Finalize default aliases and fix genres,genres-tree --- beetsplug/lastgenre/aliases.yaml | 31 ++++++++++++++++++++++++---- beetsplug/lastgenre/genres-tree.yaml | 6 +++--- beetsplug/lastgenre/genres.txt | 2 +- test/plugins/test_lastgenre.py | 14 ++++++++++--- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index 2825d4050b..013cf52dc0 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -67,7 +67,18 @@ nu \g<1>: # electronic (electronic music, elektronika) electronic: - electronic[ /]music - - elektronika + +# world music (world) +world music: + - world + +# chillout (chill, chill out, chill-out) +chillout: + - chill([ /-]*out)? + +# darkwave (dark wave) +darkwave: + - dark[ /-]*wave # downtempo (downbeat) downtempo: @@ -90,10 +101,22 @@ shoegaze: # Abbreviations & International Spellings # --------------------------------------------------------------------------- -# alternative rock (alt, alternative, alt rock, ...) +# blues rock (blues-rock) +blues rock: + - blues[ /-]*rock + +# folk rock (folk-rock) +folk rock: + - folk[ /-]*rock + +# alternative rock (alt, alternative, alt rock, alternative rock, ...) +alternative rock: + - alt([ /-]*rock)? + - alternative([ /-]*rock)? + # indie rock (indie, indie rock) -\g<1> rock: - - (alt|alternative|indie)([ /-]*rock)? +indie rock: + - indie([ /-]*rock)? # gothic rock (goth, goth rock) - doesn't catch gothic metal gothic rock: diff --git a/beetsplug/lastgenre/genres-tree.yaml b/beetsplug/lastgenre/genres-tree.yaml index d7acfbc1f4..2ebd145fca 100644 --- a/beetsplug/lastgenre/genres-tree.yaml +++ b/beetsplug/lastgenre/genres-tree.yaml @@ -273,7 +273,7 @@ - downtempo: - acid jazz - balearic beat - - chill out + - chillout - dub music - dubtronica - ethnic electronica @@ -319,7 +319,7 @@ - madchester - dance-punk - dance-rock - - dark wave + - darkwave - electroclash - electronicore - electropunk @@ -786,7 +786,7 @@ - chanson - canción de autor - nueva canción -- world: +- world music: - world dub - world fusion - worldbeat diff --git a/beetsplug/lastgenre/genres.txt b/beetsplug/lastgenre/genres.txt index 571b6f3500..7b8716faa1 100644 --- a/beetsplug/lastgenre/genres.txt +++ b/beetsplug/lastgenre/genres.txt @@ -112,7 +112,7 @@ blue-eyed soul bluegrass blues blues ballad -blues-rock +blues rock boogie boogie woogie boogie-woogie diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 55908e1c4c..8bf90481d6 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -204,8 +204,8 @@ def test_sort_by_depth(self): tags = ("electronic", "ambient", "post-rock", "downtempo") res = lastgenre.sort_by_depth(tags, self.plugin.c14n_branches) assert res == ["post-rock", "downtempo", "ambient", "electronic"] - # Non-canonical tag ('chillout') present. - tags = ("electronic", "ambient", "chillout") + # Non-canonical tag ('chill out') present. + tags = ("electronic", "ambient", "chill out") res = lastgenre.sort_by_depth(tags, self.plugin.c14n_branches) assert res == ["ambient", "electronic"] @@ -1207,8 +1207,16 @@ def mock_method(*args): ("nu-metal", "nu metal"), ("nu-soul", "nu soul"), ("nu disco", "nu disco"), - ("elektronika", "electronic"), ("electronic music", "electronic"), + ("world", "world music"), + ("chill", "chillout"), + ("chill out", "chillout"), + ("chill-out", "chillout"), + ("dark wave", "darkwave"), + ("dark-wave", "darkwave"), + ("blues rock", "blues rock"), + ("blues-rock", "blues rock"), + ("folk-rock", "folk rock"), ("downbeat", "downtempo"), ("shoegazer", "shoegaze"), ("shoegazing", "shoegaze"), From f813e95f39f456e68ef414247abe6deeb77d6b07 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 12:58:31 +0200 Subject: [PATCH 17/22] Restructure and reduce alias tests --- test/plugins/test_lastgenre.py | 493 ++++++++++++++------------------- 1 file changed, 202 insertions(+), 291 deletions(-) diff --git a/test/plugins/test_lastgenre.py b/test/plugins/test_lastgenre.py index 8bf90481d6..3a3448ef48 100644 --- a/test/plugins/test_lastgenre.py +++ b/test/plugins/test_lastgenre.py @@ -933,312 +933,223 @@ def fake_fetch(_, kind, obj, *args): assert "Metal" in genres -# Aliases: normalize_genre() unit tests +class TestAliases: + """Alias pattern matching and loading tests.""" - -@pytest.mark.parametrize( - "aliases_dict, genre, expected", - [ - # Static (no back-references) - ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "dnb", "drum and bass"), - ( - {"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, - "drum n bass", - "drum and bass", - ), - ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "d&b", "drum and bass"), - # Template with \g<1> back-reference - ({r"\g<1> hop": [r"(hip)[ /-]*hop"]}, "hip-hop", "hip hop"), - ({r"\g<1> hop": [r"(trip)[ /-]*hop"]}, "trip hop", "trip hop"), - ({r"\g<1>-\g<2>": [r"(post)[ /-]*(\w+)"]}, "post rock", "post-rock"), - ({r"\g<1>-\g<2>": [r"(post)[ /-]*(\w+)"]}, "post/rock", "post-rock"), - # Case-insensitive matching, result is lowercased template - ({"hip hop": ["hiphop"]}, "HipHop", "hip hop"), - # No match — genre returned as-is (lowercased) - ({"drum and bass": ["d(rum)?[ &n/]*b(ass)?"]}, "jazz", "jazz"), - # Empty alias list → no-op - ({}, "hip-hop", "hip-hop"), - ], -) -def test_normalize_genre( - aliases_dict: dict, - genre: str, - expected: str, -) -> None: - """Test normalize_genre() with static and template canonical names.""" - alias_patterns = [ - (re.compile(pat, re.IGNORECASE), template.lower()) - for template, patterns in aliases_dict.items() - for pat in patterns - ] - assert normalize_genre(Mock(), alias_patterns, genre) == expected - - -def test_normalize_genre_invalid_template_does_not_crash() -> None: - """Invalid replacement templates are skipped instead of crashing.""" - logger = Mock() - alias_patterns = [ - (re.compile(r"(hip)[ /-]*hop", re.IGNORECASE), r"\g<2> hop") - ] - - assert normalize_genre(logger, alias_patterns, "hip-hop") == "hip-hop" - logger.warning.assert_called_once() - - -# Aliases: _load_aliases() config parsing tests - - -@pytest.mark.parametrize( - "aliases_config, input_genre, expected_genre", - [ - # Inline static alias - ({"hip hop": ["hip-hop", "hiphop"]}, "hip-hop", "hip hop"), - ({"hip hop": ["hip-hop", "hiphop"]}, "hiphop", "hip hop"), - # Inline template alias - ({r"\g<1> hop": [r"(trip)[ /-]*hop"]}, "trip-hop", "trip hop"), - # Pattern that does not match — genre unchanged - ({"drum and bass": [r"d(rum)?[ &n/]*b(ass)?"]}, "jazz", "jazz"), - # False → aliases disabled, genre not normalised - (False, "hip-hop", "hip-hop"), - ], -) -def test_aliases_config_format( - config, aliases_config, input_genre, expected_genre -): - """Test _load_aliases() loading from inline config dict (and False).""" - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = aliases_config - plugin = lastgenre.LastGenrePlugin() - result = normalize_genre(plugin._log, plugin.alias_patterns, input_genre) - assert result == expected_genre - - -@pytest.mark.parametrize( - "invalid_config, expected_error", - [ - # Plain string instead of mapping - ("/path/to/aliases.txt", "must be a dict"), - # Integer - (42, "must be a dict"), - # Mapping with non-list value - ({"hip hop": "hip-hop"}, "must be a list"), - ], -) -def test_aliases_config_format_errors(config, invalid_config, expected_error): - """Test that invalid aliases config values raise confuse.ConfigTypeError.""" - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = invalid_config - with pytest.raises(confuse.ConfigTypeError) as exc_info: - lastgenre.LastGenrePlugin() - assert expected_error in str(exc_info.value) - - -# Aliases: integration with _resolve_genres() - - -def test_aliases_normalize_before_whitelist(config): - """Aliases normalize BEFORE whitelist filtering. - - 'hip-hop' is not on the whitelist but 'hip hop' is. With aliases - enabled the tag must survive whitelist filtering. - """ - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop", "hiphop"]} - plugin = lastgenre.LastGenrePlugin() - plugin.setup() - # Inject only 'hip hop' into the whitelist to prove the alias fired. - plugin.whitelist = {"hip hop"} - - result = plugin._resolve_genres(["hip-hop"]) - assert result == ["hip hop"], ( - "alias must normalize 'hip-hop' → 'hip hop' before whitelist check" - ) - - -def test_aliases_normalize_before_ignorelist(config): - """Aliases normalize BEFORE ignorelist filtering. - - If 'hip hop' is ignored but 'hip-hop' is fed in, the alias fires first - so the result is empty (correctly ignored). - """ - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop"]} - plugin = lastgenre.LastGenrePlugin() - plugin.setup() - plugin.ignore_patterns = { - "*": [re.compile("hip hop", re.IGNORECASE)], - } - - result = plugin._resolve_genres(["hip-hop"]) - assert result == [], ( - "alias must normalize 'hip-hop' before ignorelist check drops it" + @pytest.mark.parametrize( + "aliases_dict, genre, expected", + [ + # Static replacement + ({"foo bar": ["foobar"]}, "foobar", "foo bar"), + # Template with \g<1> back-reference + ( + {r"\g<1> music": [r"(fake)[ /-]*music"]}, + "fake-music", + "fake music", + ), + # Template with \g<1> and \g<2> back-references + ({r"\g<1>-\g<2>": [r"(x)[ /-]*(y)"]}, "x y", "x-y"), + # Case-insensitive matching + ({"foo bar": ["foobar"]}, "FOOBAR", "foo bar"), + # No match — genre returned as-is (lowercased) + ({"foo bar": ["foobar"]}, "jazz", "jazz"), + # Empty alias list → no-op + ({}, "something", "something"), + ], ) + def test_normalize_genre( + self, + aliases_dict: dict[str, list[str]], + genre: str, + expected: str, + ) -> None: + """Test normalize_genre() with static and template canonical names.""" + alias_patterns = [ + (re.compile(pat, re.IGNORECASE), template.lower()) + for template, patterns in aliases_dict.items() + for pat in patterns + ] + assert normalize_genre(Mock(), alias_patterns, genre) == expected + def test_normalize_genre_invalid_template_does_not_crash(self) -> None: + """Invalid replacement templates are skipped instead of crashing.""" + logger = Mock() + alias_patterns = [ + (re.compile(r"(hip)[ /-]*hop", re.IGNORECASE), r"\g<2> hop") + ] -def test_aliases_normalize_existing_tags(config): - """Aliases also normalize genres already in the file tag. + assert normalize_genre(logger, alias_patterns, "hip-hop") == "hip-hop" + logger.warning.assert_called_once() - Existing genres passed as *old* in keep_genres are lowercased and then - flow through _resolve_genres, so aliases must fire for them too. - """ - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = { - "drum and bass": [r"d(rum)?[ &n/]*b(ass)?"] - } - plugin = lastgenre.LastGenrePlugin() - plugin.setup() - plugin.whitelist = {"drum and bass"} + def test_aliases_config_format(self, config): + """Test _load_aliases() loading from inline config dict.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + # Multi-pattern list: proves all patterns are loaded, not just the first + config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop", "hiphop"]} + plugin = lastgenre.LastGenrePlugin() + assert ( + normalize_genre(plugin._log, plugin.alias_patterns, "hip-hop") + == "hip hop" + ) + assert ( + normalize_genre(plugin._log, plugin.alias_patterns, "hiphop") + == "hip hop" + ) - # Simulate existing tag variant 'dnb' passing through resolve. - result = plugin._resolve_genres(["dnb"]) - assert result == ["drum and bass"], ( - "alias must normalize existing tag variant 'dnb' before whitelist check" + @pytest.mark.parametrize( + "invalid_config, expected_error", + [ + # Plain string instead of mapping + ("/path/to/aliases.txt", "must be a dict"), + # Integer + (42, "must be a dict"), + # Mapping with non-list value + ({"hip hop": "hip-hop"}, "must be a list"), + ], ) + def test_aliases_config_format_errors( + self, config, invalid_config, expected_error + ): + """Test that invalid aliases config values raise confuse.ConfigTypeError.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = invalid_config + with pytest.raises(confuse.ConfigTypeError) as exc_info: + lastgenre.LastGenrePlugin() + assert expected_error in str(exc_info.value) + def test_normalize_before_whitelist(self, config): + """Aliases normalize BEFORE whitelist filtering. -def test_aliases_default_bundled_loads(config): - """With aliases: true (default), the bundled aliases.yaml is loaded.""" - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = True - plugin = lastgenre.LastGenrePlugin() - # Bundled file should have at least one entry. - assert len(plugin.alias_patterns) > 0, ( - "bundled aliases.yaml must contain entries" - ) - + 'hip-hop' is not on the whitelist but 'hip hop' is. With aliases + enabled the tag must survive whitelist filtering. + """ + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop", "hiphop"]} + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + # Inject only 'hip hop' into the whitelist to prove the alias fired. + plugin.whitelist = {"hip hop"} -def test_aliases_disabled(config): - """With aliases: false, no normalization is performed.""" - config["lastgenre"]["ignorelist"] = ( - False # prevent state leak from earlier tests - ) - config["lastgenre"]["aliases"] = False - plugin = lastgenre.LastGenrePlugin() - assert plugin.alias_patterns == [] - # normalize_genre with an empty list must return the genre unchanged. - assert ( - normalize_genre(plugin._log, plugin.alias_patterns, "hip-hop") - == "hip-hop" - ) - assert ( - normalize_genre(plugin._log, plugin.alias_patterns, "hiphop") - == "hiphop" - ) + result = plugin._resolve_genres(["hip-hop"]) + assert result == ["hip hop"], ( + "alias must normalize 'hip-hop' → 'hip hop' before whitelist check" + ) + def test_normalize_before_ignorelist(self, config): + """Aliases normalize BEFORE ignorelist filtering. -# Aliases: LastFmClient normalization tests - - -def test_client_normalization(config): - """LastFmClient must normalize tags using aliases before filtering.""" - # Setup aliases: 'hip-hop' -> 'hip hop' - aliases_config = {r"hip hop": [r"hip-hop"]} - # Setup ignorelist: ignore 'hip hop' - ignorelist_config = {"*": ["hip hop"]} - - # Mock pylast objects - mock_tag = Mock() - mock_tag.item.get_name.return_value = "hip-hop" - mock_tag.weight = 100 - mock_lastfm_obj = Mock() - mock_lastfm_obj.get_top_tags.return_value = [mock_tag] - - # Initialize client manually - alias_patterns = [ - (re.compile(pat, re.IGNORECASE), template.lower()) - for template, patterns in aliases_config.items() - for pat in patterns - ] - ignore_patterns = { - artist: [re.compile(pattern, re.IGNORECASE) for pattern in patterns] - for artist, patterns in ignorelist_config.items() - } - client = lastgenre.client.LastFmClient( - Mock(), 10, ignore_patterns, alias_patterns - ) + If 'hip hop' is ignored but 'hip-hop' is fed in, the alias fires first + so the result is empty (correctly ignored). + """ + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = {"hip hop": ["hip-hop"]} + plugin = lastgenre.LastGenrePlugin() + plugin.setup() + plugin.ignore_patterns = { + "*": [re.compile("hip hop", re.IGNORECASE)], + } - # 1. Test fetch directly: returns raw (un-normalized) tags from pylast - tags = client.fetch("track", mock_lastfm_obj) - assert tags == ["hip-hop"], "client.fetch must return raw tags" + result = plugin._resolve_genres(["hip-hop"]) + assert result == [], ( + "alias must normalize 'hip-hop' before ignorelist check drops it" + ) - # 2. Test _last_lookup with ignorelist and aliases: - # 'hip-hop' normalized to 'hip hop', which is then ignored. - # Result should be empty. - def mock_method(*args): - return mock_lastfm_obj + def test_disabled(self, config): + """With aliases: false, no normalization is performed.""" + config["lastgenre"]["ignorelist"] = ( + False # prevent state leak from earlier tests + ) + config["lastgenre"]["aliases"] = False + plugin = lastgenre.LastGenrePlugin() + assert plugin.alias_patterns == [] + # normalize_genre with an empty list must return the genre unchanged. + assert ( + normalize_genre(plugin._log, plugin.alias_patterns, "hip-hop") + == "hip-hop" + ) - result = client._last_lookup("track", mock_method, "artist", "title") - assert result == [], ( - "normalized 'hip hop' must be caught and filtered by ignorelist in _last_lookup" + @pytest.mark.parametrize( + "input_genre, expected_genre", + [ + ("dnb", "drum and bass"), + ("drum n bass", "drum and bass"), + ("r&b", "rhythm and blues"), + ("rnb", "rhythm and blues"), + ("rock & roll", "rock and roll"), + ("rock'n'roll", "rock and roll"), + ("kpop", "k-pop"), + ("j rock", "j-rock"), + ("post rock", "post-rock"), + ("lofi", "lo-fi"), + ("lo fi", "lo-fi"), + ("p funk", "p-funk"), + ("synth pop", "synthpop"), + ("avantgarde", "avant-garde"), + ("avant gard", "avant-garde"), + ("nu-jazz", "nu jazz"), + ("nu-metal", "nu metal"), + ("nu-soul", "nu soul"), + ("nu disco", "nu disco"), + ("electronic music", "electronic"), + ("world", "world music"), + ("chill", "chillout"), + ("chill out", "chillout"), + ("chill-out", "chillout"), + ("dark wave", "darkwave"), + ("dark-wave", "darkwave"), + ("blues rock", "blues rock"), + ("blues-rock", "blues rock"), + ("folk-rock", "folk rock"), + ("downbeat", "downtempo"), + ("shoegazer", "shoegaze"), + ("shoegazing", "shoegaze"), + ("hip-hop", "hip hop"), + ("triphop", "trip hop"), + ("alt", "alternative rock"), + ("alt rock", "alternative rock"), + ("alternative", "alternative rock"), + ("goth", "gothic rock"), + ("goth rock", "gothic rock"), + ("gothic rock", "gothic rock"), + ("prog", "progressive rock"), + ("prog rock", "progressive rock"), + ("progressive rock", "progressive rock"), + ("trad", "traditional folk"), + ("traditional", "traditional folk"), + ], ) + def test_default_aliases_logic(self, config, input_genre, expected_genre): + """Verify that bundled aliases.yaml correctly handles common variants.""" + config["lastgenre"]["ignorelist"] = False + config["lastgenre"]["aliases"] = True + plugin = lastgenre.LastGenrePlugin() + result = normalize_genre( + plugin._log, plugin.alias_patterns, input_genre + ) + assert result == expected_genre + + def test_client_normalizes_in_last_lookup(self): + """LastFmClient._last_lookup applies alias normalization then ignorelist.""" + alias_patterns = [(re.compile(r"hip-hop", re.IGNORECASE), "hip hop")] + ignore_patterns = {"*": [re.compile("hip hop", re.IGNORECASE)]} + client = lastgenre.client.LastFmClient( + Mock(), 0, ignore_patterns, alias_patterns + ) + mock_lastfm_obj = Mock() + mock_lastfm_obj.get_top_tags.return_value = [] + # Seed the cache directly to avoid a real network call. + client._genre_cache["track.artist-title"] = ["hip-hop"] -@pytest.mark.parametrize( - "input_genre, expected_genre", - [ - ("dnb", "drum and bass"), - ("drum n bass", "drum and bass"), - ("r&b", "rhythm and blues"), - ("rnb", "rhythm and blues"), - ("rock & roll", "rock and roll"), - ("rock'n'roll", "rock and roll"), - ("kpop", "k-pop"), - ("j rock", "j-rock"), - ("post rock", "post-rock"), - ("lofi", "lo-fi"), - ("lo fi", "lo-fi"), - ("p funk", "p-funk"), - ("synth pop", "synthpop"), - ("avantgarde", "avant-garde"), - ("avant gard", "avant-garde"), - ("nu-jazz", "nu jazz"), - ("nu-metal", "nu metal"), - ("nu-soul", "nu soul"), - ("nu disco", "nu disco"), - ("electronic music", "electronic"), - ("world", "world music"), - ("chill", "chillout"), - ("chill out", "chillout"), - ("chill-out", "chillout"), - ("dark wave", "darkwave"), - ("dark-wave", "darkwave"), - ("blues rock", "blues rock"), - ("blues-rock", "blues rock"), - ("folk-rock", "folk rock"), - ("downbeat", "downtempo"), - ("shoegazer", "shoegaze"), - ("shoegazing", "shoegaze"), - ("hip-hop", "hip hop"), - ("triphop", "trip hop"), - ("alt", "alternative rock"), - ("alt rock", "alternative rock"), - ("alternative", "alternative rock"), - ("goth", "gothic rock"), - ("goth rock", "gothic rock"), - ("gothic rock", "gothic rock"), - ("prog", "progressive rock"), - ("prog rock", "progressive rock"), - ("progressive rock", "progressive rock"), - ("trad", "traditional folk"), - ("traditional", "traditional folk"), - ], -) -def test_default_aliases_logic(config, input_genre, expected_genre): - """Verify that bundled aliases.yaml correctly handles common variants.""" - config["lastgenre"]["ignorelist"] = False - config["lastgenre"]["aliases"] = True - plugin = lastgenre.LastGenrePlugin() - result = normalize_genre(plugin._log, plugin.alias_patterns, input_genre) - assert result == expected_genre + result = client._last_lookup("track", Mock(), "artist", "title") + assert result == [], ( + "'hip-hop' must be normalized to 'hip hop' then filtered by ignorelist" + ) From 9359a7dd97322da73c032c6bbd9630b03927b77b Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Fri, 1 May 2026 19:02:06 +0200 Subject: [PATCH 18/22] Reword comment on config bool/mapping handling --- beetsplug/lastgenre/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beetsplug/lastgenre/__init__.py b/beetsplug/lastgenre/__init__.py index 597de30795..321593ae64 100644 --- a/beetsplug/lastgenre/__init__.py +++ b/beetsplug/lastgenre/__init__.py @@ -282,8 +282,8 @@ def _load_aliases(self) -> GenreAliasPatterns: if not aliases_dict: return [] else: - # Validate only the effective aliases value to avoid stale lower- - # priority config layers affecting type checking. + # aliases defaults to True (unlike ignorelist), so MappingValues + # would raise on the boolean default layer. aliases_cfg = confuse.Configuration("lastgenre_aliases", read=False) aliases_cfg.set({"aliases": aliases_raw}) aliases_dict = aliases_cfg["aliases"].get( From 0e27872609c2baae8a721dcde4fb0669d7c8ac1d Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Sun, 19 Apr 2026 20:32:58 +0200 Subject: [PATCH 19/22] Fixes to default aliases.yaml --- beetsplug/lastgenre/aliases.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index 013cf52dc0..2fcd856b9a 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -27,11 +27,8 @@ rock and roll: # Hyphenate j-pop, k-pop, c-pop, post-rock, etc. # Matches: kpop, k pop -> k-pop; j rock -> j-rock; post rock -> post-rock # -# Hyphenate neo-soul, euro-house, tech-house, etc. -# (Negative lookaheads exclude 'european' and 'techno'/'technic*'.) \g<1>-\g<2>: - - (c|k|j)[ /-]*(folk|goth|hip hop|pop|rock|ska|trance) - - (euro(?!p[ae]+n?)|neo|post|tech(?!n[io]))[ /-]*(\w+) + - (c|k|j)[ /-]*(folk|goth|pop|rock|ska|trance) # lo-fi, glo-fi (lofi, lo fi -> lo-fi) lo-fi: From dfa54b5613530368bdd5c49a90b7ccdf8742e43d Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Mon, 4 May 2026 17:40:18 +0200 Subject: [PATCH 20/22] Hypenate post rock in default aliases --- beetsplug/lastgenre/aliases.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index 2fcd856b9a..ef314ddeef 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -24,12 +24,16 @@ rock and roll: # --------------------------------------------------------------------------- -# Hyphenate j-pop, k-pop, c-pop, post-rock, etc. -# Matches: kpop, k pop -> k-pop; j rock -> j-rock; post rock -> post-rock +# Hyphenate j-pop, k-pop, c-pop, etc. +# Matches: kpop, k pop -> k-pop; j rock -> j-rock; # \g<1>-\g<2>: - (c|k|j)[ /-]*(folk|goth|pop|rock|ska|trance) +# post-rock, post-punk, post-metal, etc. (post rock -> post-rock) +post-\g<1>: + - post[ /]+(\w+) + # lo-fi, glo-fi (lofi, lo fi -> lo-fi) lo-fi: - (g?lo)[ /-]*fi From 112340045b55910e9750033a149ca3d083b9c65a Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Mon, 4 May 2026 17:45:09 +0200 Subject: [PATCH 21/22] Remove redundant hypens for some default aliases --- beetsplug/lastgenre/aliases.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index ef314ddeef..2fd563837f 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -28,7 +28,7 @@ rock and roll: # Matches: kpop, k pop -> k-pop; j rock -> j-rock; # \g<1>-\g<2>: - - (c|k|j)[ /-]*(folk|goth|pop|rock|ska|trance) + - (c|k|j)[ /]*(folk|goth|pop|rock|ska|trance) # post-rock, post-punk, post-metal, etc. (post rock -> post-rock) post-\g<1>: @@ -36,11 +36,11 @@ post-\g<1>: # lo-fi, glo-fi (lofi, lo fi -> lo-fi) lo-fi: - - (g?lo)[ /-]*fi + - (g?lo)[ /]*fi # p-funk, g-funk, etc. (p funk -> p-funk) \g<1>-funk: - - (p|g)[ /-]*funk + - (p|g)[ /]*funk # synthpop, synthwave, etc. (synth pop -> synthpop) synth\g<1>: @@ -48,7 +48,7 @@ synth\g<1>: # avant-garde (avantgarde, avant gard, avant-gard) avant-garde: - - avant[ /-]*(gard(e)?)? + - avant[ /]*(gard(e)?)? - avant-gard - avant From 0eaca9dd6bd9b968a9849af18689e9dc49bf0523 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Mon, 4 May 2026 17:50:40 +0200 Subject: [PATCH 22/22] Remove redundant slashes for some default aliases --- beetsplug/lastgenre/aliases.yaml | 44 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/beetsplug/lastgenre/aliases.yaml b/beetsplug/lastgenre/aliases.yaml index 2fd563837f..602b889acd 100644 --- a/beetsplug/lastgenre/aliases.yaml +++ b/beetsplug/lastgenre/aliases.yaml @@ -11,7 +11,7 @@ drum and bass: - d(rum)?[ &n/]*b(ass)? -# rhythm and blues (r&b, rnb, ...) +# rhythm and blues (r&b, rnb, rhythm/blues, ...) rhythm and blues: - r(hythm)?[ &n/]*b(lues)? @@ -28,27 +28,27 @@ rock and roll: # Matches: kpop, k pop -> k-pop; j rock -> j-rock; # \g<1>-\g<2>: - - (c|k|j)[ /]*(folk|goth|pop|rock|ska|trance) + - (c|k|j) *(folk|goth|pop|rock|ska|trance) # post-rock, post-punk, post-metal, etc. (post rock -> post-rock) post-\g<1>: - - post[ /]+(\w+) + - post +(\w+) # lo-fi, glo-fi (lofi, lo fi -> lo-fi) lo-fi: - - (g?lo)[ /]*fi + - (g?lo) *fi # p-funk, g-funk, etc. (p funk -> p-funk) \g<1>-funk: - - (p|g)[ /]*funk + - (p|g) *funk # synthpop, synthwave, etc. (synth pop -> synthpop) synth\g<1>: - - synth[ /-]+(\w+) + - synth[ -]+(\w+) # avant-garde (avantgarde, avant gard, avant-gard) avant-garde: - - avant[ /]*(gard(e)?)? + - avant *(gard(e)?)? - avant-gard - avant @@ -59,7 +59,7 @@ avant-garde: # Matches: nu-jazz -> nu jazz; nu disco -> nu disco # Note: 'nu-disco' is hyphenated in the tree but 'nu jazz' isn't in genres.txt nu \g<1>: - - nu[ /-]*(disco|jazz|metal|soul) + - nu[ -]*(disco|jazz|metal|soul) # --------------------------------------------------------------------------- # Terminology / Synonym / Translation fixes @@ -67,7 +67,7 @@ nu \g<1>: # electronic (electronic music, elektronika) electronic: - - electronic[ /]music + - electronic music # world music (world) world music: @@ -75,15 +75,15 @@ world music: # chillout (chill, chill out, chill-out) chillout: - - chill([ /-]*out)? + - chill([ -]*out)? # darkwave (dark wave) darkwave: - - dark[ /-]*wave + - dark[ -]*wave # downtempo (downbeat) downtempo: - - down[ /-]*beat + - down[ -]*beat # shoegaze (shoegazer, shoegazing) shoegaze: @@ -96,7 +96,7 @@ shoegaze: # Normalized spacing: hip-hop, hiphop -> hip hop \g<1> hop: - - (glitch|hip|jazz|trip)y?([ /-]*hip)?[ /-]*hop + - (glitch|hip|jazz|trip)y?([ -]*hip)?[ -]*hop # --------------------------------------------------------------------------- # Abbreviations & International Spellings @@ -104,31 +104,31 @@ shoegaze: # blues rock (blues-rock) blues rock: - - blues[ /-]*rock + - blues[ -]*rock # folk rock (folk-rock) folk rock: - - folk[ /-]*rock + - folk[ -]*rock # alternative rock (alt, alternative, alt rock, alternative rock, ...) alternative rock: - - alt([ /-]*rock)? - - alternative([ /-]*rock)? + - alt([ -]*rock)? + - alternative([ -]*rock)? # indie rock (indie, indie rock) indie rock: - - indie([ /-]*rock)? + - indie([ -]*rock)? # gothic rock (goth, goth rock) - doesn't catch gothic metal gothic rock: - - goth(?!ic)([ /-]*rock)? - - gothic[ /-]*rock + - goth(?!ic)([ -]*rock)? + - gothic[ -]*rock # progressive rock (prog, prog rock, progressive rock) # Note: mapping standalone 'progressive' is avoided to prevent catching 'progressive metal', etc. progressive rock: - - prog([ /-]*rock)? - - progressive[ /-]*rock + - prog([ -]*rock)? + - progressive[ -]*rock # traditional folk (trad, traditional) # Note: avoids matching 'trad jazz' or 'traditional country'.