Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ca16d85
lastgenre: Test aliases and ship default config
JOJ0 Mar 27, 2026
9ed89de
lastgenre: Docs for genre normalization (aliases)
JOJ0 Mar 29, 2026
7b2d4b0
lastgenre: Docs hint on canonical without whitelist
JOJ0 Mar 29, 2026
b1cfe11
lastgenre: Implement genre alias normalization
JOJ0 Mar 27, 2026
5630b17
lastgenre: Slightly optimize filter_valid performance
JOJ0 Apr 10, 2026
04fe3e9
Fixes/additions to default aliases
JOJ0 Apr 11, 2026
2efb8bc
Fix normalize_genre docstring wording
JOJ0 Apr 11, 2026
56251ec
Test invalid alias template catch
JOJ0 Apr 11, 2026
95fdc90
Catch exception on invalid alias template
JOJ0 Apr 11, 2026
0e979d4
fix test aliases
JOJ0 May 1, 2026
d60ec41
lastgenre: Dedup ignore/alias regex compilation
JOJ0 May 1, 2026
a65d0b3
Streamline in test alias type with ignore naming
JOJ0 May 1, 2026
f56dc28
Streamline alias type with ignore naming
JOJ0 May 1, 2026
271a23f
lastgenre: Fix ignorelist tests naming inconsistencies
JOJ0 May 1, 2026
1f48c92
Add alt rock to default aliases test
JOJ0 May 1, 2026
942221a
Finalize default aliases and fix genres,genres-tree
JOJ0 May 1, 2026
f813e95
Restructure and reduce alias tests
JOJ0 May 1, 2026
9359a7d
Reword comment on config bool/mapping handling
JOJ0 May 1, 2026
0e27872
Fixes to default aliases.yaml
JOJ0 Apr 19, 2026
dfa54b5
Hypenate post rock in default aliases
JOJ0 May 4, 2026
1123400
Remove redundant hypens for some default aliases
JOJ0 May 4, 2026
0eaca9d
Remove redundant slashes for some default aliases
JOJ0 May 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 88 additions & 21 deletions beetsplug/lastgenre/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from __future__ import annotations

import os
import re
from collections import defaultdict
from functools import singledispatchmethod
from pathlib import Path
Expand All @@ -37,7 +36,11 @@
from beets import config, library, plugins, ui
from beets.library import Album, Item
from beets.util import plurality, unique_list
from beetsplug.lastgenre.utils import drop_ignored_genres, is_ignored
from beetsplug.lastgenre.utils import (
compile_pattern,
is_ignored,
normalize_genre,
)

from .client import LastFmClient

Expand All @@ -48,7 +51,7 @@
from beets.importer import ImportSession, ImportTask
from beets.library import LibModel

from .utils import GenreIgnorePatterns
from .utils import GenreAliasPatterns, GenreIgnorePatterns

Whitelist = set[str]
"""Set of valid genre names (lowercase). Empty set means all genres allowed."""
Expand Down Expand Up @@ -115,6 +118,7 @@ def sort_by_depth(tags: list[str], branches: CanonTree) -> list[str]:

WHITELIST = os.path.join(os.path.dirname(__file__), "genres.txt")
C14N_TREE = os.path.join(os.path.dirname(__file__), "genres-tree.yaml")
ALIASES = os.path.join(os.path.dirname(__file__), "aliases.yaml")


class LastGenrePlugin(plugins.BeetsPlugin):
Expand All @@ -137,6 +141,7 @@ def __init__(self) -> None:
"title_case": True,
"pretend": False,
"ignorelist": {},
"aliases": True,
}
)
self.setup()
Expand All @@ -150,8 +155,12 @@ def setup(self) -> None:
self.c14n_branches: CanonTree
self.c14n_branches, self.canonicalize = self._load_c14n_tree()
self.ignore_patterns: GenreIgnorePatterns = self._load_ignorelist()
self.alias_patterns: GenreAliasPatterns = self._load_aliases()
self.client = LastFmClient(
self._log, self.config["min_weight"].get(int), self.ignore_patterns
self._log,
self.config["min_weight"].get(int),
self.ignore_patterns,
self.alias_patterns,
)

def _load_whitelist(self) -> Whitelist:
Expand Down Expand Up @@ -228,24 +237,68 @@ def _load_ignorelist(self) -> GenreIgnorePatterns:

compiled_ignorelist: GenreIgnorePatterns = defaultdict(list)
for artist, patterns in raw_ignorelist.items():
artist_patterns = []
for pattern in patterns:
try:
artist_patterns.append(re.compile(pattern, re.IGNORECASE))
except re.error:
artist_patterns.append(
re.compile(re.escape(pattern), re.IGNORECASE)
)
artist_patterns = [compile_pattern(p) for p in patterns]
self._log.extra_debug(
"ignore for {}: {}",
artist,
[p.pattern for p in artist_patterns],
)

compiled_ignorelist[artist] = artist_patterns
compiled_ignorelist[artist.lower()] = artist_patterns

return compiled_ignorelist

def _load_aliases(self) -> GenreAliasPatterns:
"""Load the genre alias table from the beets config.

Reads ``lastgenre.aliases`` as a mapping of genre names to lists of
regex patterns::

lastgenre:
aliases:
drum and bass:
- d(rum)?[ &n/]*b(ass)?
\\g<1> hop:
- (glitch|hip|jazz|trip)y?[ /-]*hop

The key (genre name) is used as a ``re.Match.expand()`` template,
so ``\\g<N>`` back-references to capture groups are supported.

Setting ``aliases: true`` (the default) loads the bundled
``aliases.yaml`` file. Setting ``aliases: false`` disables
normalization entirely.

Raises:
confuse.ConfigTypeError: when the config value is not a mapping
or a list entry is not a string.
"""
aliases_raw = self.config["aliases"].get()
if aliases_raw is False:
return []
if aliases_raw in (True, "", None):
self._log.debug("Loading default aliases from {}", ALIASES)
with Path(ALIASES).open(encoding="utf-8") as f:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not setting these aliases in the default plugin configuration?

aliases_dict = yaml.safe_load(f)
if not aliases_dict:
return []
else:
# aliases defaults to True (unlike ignorelist), so MappingValues
# would raise on the boolean default layer.
aliases_cfg = confuse.Configuration("lastgenre_aliases", read=False)
aliases_cfg.set({"aliases": aliases_raw})
aliases_dict = aliases_cfg["aliases"].get(
confuse.MappingValues(confuse.Sequence(str))
)

entries: GenreAliasPatterns = []
for canonical, patterns in aliases_dict.items():
template = str(canonical).lower()
for raw_pat in patterns:
entries.append((compile_pattern(str(raw_pat)), template))

self._log.extra_debug("Loaded {} alias entries", len(entries))
return entries

@property
def sources(self) -> tuple[str, ...]:
"""A tuple of allowed genre sources. May contain 'track',
Expand All @@ -267,6 +320,8 @@ def _resolve_genres(
"""Canonicalize, sort and filter a list of genres.

- Returns an empty list if the input tags list is empty.
- If aliases are configured, variant spellings are normalised first
(e.g. 'hip-hop' → 'hip hop', 'dnb' → 'drum and bass').
- If canonicalization is enabled, it extends the list by incorporating
parent genres from the canonicalization tree. When a whitelist is set,
only parent tags that pass the whitelist filter are included;
Expand All @@ -286,6 +341,13 @@ def _resolve_genres(
if not tags:
return []

# Normalize variant spellings before any other processing.
if self.alias_patterns:
tags = [
normalize_genre(self._log, self.alias_patterns, tag)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we normalizing genres twice: once in the client and once here?

for tag in tags
]

count = self.config["count"].get(int)

# Canonicalization (if enabled)
Expand Down Expand Up @@ -353,14 +415,19 @@ def _filter_valid(
if not self.whitelist and not self.ignore_patterns:
return cleaned

whitelisted = [
g
for g in cleaned
if not self.whitelist or g.lower() in self.whitelist
]
return drop_ignored_genres(
self._log, self.ignore_patterns, whitelisted, artist
)
result = []
for genre in cleaned:
if self.whitelist and genre.lower() not in self.whitelist:
continue

if self.ignore_patterns and is_ignored(
self._log, self.ignore_patterns, genre, artist
):
continue

result.append(genre)

return result

# Genre resolution pipeline.

Expand Down
136 changes: 136 additions & 0 deletions beetsplug/lastgenre/aliases.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Default genre aliases for the lastgenre plugin.
#
# Keys are canonical names and support \g<N> back-references to pattern groups.
# Patterns are case-insensitive full-matches. Order matters: first match wins.

# ---------------------------------------------------------------------------
# Ampersands / Delimiters
# ---------------------------------------------------------------------------

# drum and bass (d&b, dnb, drum n bass, ...)
drum and bass:
- d(rum)?[ &n/]*b(ass)?

# rhythm and blues (r&b, rnb, rhythm/blues, ...)
rhythm and blues:
- r(hythm)?[ &n/]*b(lues)?

# rock and roll (rock & roll, rock'n'roll, rock-n-roll, ...)
rock and roll:
- rock[ '‐&n/ \-]*roll

# ---------------------------------------------------------------------------
# Consistent Delimiters (Hyphenation)
# ---------------------------------------------------------------------------


# Hyphenate j-pop, k-pop, c-pop, etc.
# Matches: kpop, k pop -> k-pop; j rock -> j-rock;
#
\g<1>-\g<2>:
- (c|k|j) *(folk|goth|pop|rock|ska|trance)

# post-rock, post-punk, post-metal, etc. (post rock -> post-rock)
post-\g<1>:
- post +(\w+)

# lo-fi, glo-fi (lofi, lo fi -> lo-fi)
lo-fi:
- (g?lo) *fi

# p-funk, g-funk, etc. (p funk -> p-funk)
\g<1>-funk:
- (p|g) *funk

# synthpop, synthwave, etc. (synth pop -> synthpop)
synth\g<1>:
- synth[ -]+(\w+)

# avant-garde (avantgarde, avant gard, avant-gard)
avant-garde:
- avant *(gard(e)?)?
- avant-gard
- avant

# ---------------------------------------------------------------------------
# Nu- Genre Spelling (nu jazz, nu-disco, etc.)
# ---------------------------------------------------------------------------

# Matches: nu-jazz -> nu jazz; nu disco -> nu disco
# Note: 'nu-disco' is hyphenated in the tree but 'nu jazz' isn't in genres.txt
nu \g<1>:
- nu[ -]*(disco|jazz|metal|soul)

# ---------------------------------------------------------------------------
# Terminology / Synonym / Translation fixes
# ---------------------------------------------------------------------------

# electronic (electronic music, elektronika)
electronic:
- electronic music

# world music (world)
world music:
- world

# chillout (chill, chill out, chill-out)
chillout:
- chill([ -]*out)?

# darkwave (dark wave)
darkwave:
- dark[ -]*wave

# downtempo (downbeat)
downtempo:
- down[ -]*beat

# shoegaze (shoegazer, shoegazing)
shoegaze:
- shoegaze?r?
- shoegazing

# ---------------------------------------------------------------------------
# Form Fixes (Hip Hop, Trip Hop, etc.)
# ---------------------------------------------------------------------------

# Normalized spacing: hip-hop, hiphop -> hip hop
\g<1> hop:
- (glitch|hip|jazz|trip)y?([ -]*hip)?[ -]*hop

# ---------------------------------------------------------------------------
# Abbreviations & International Spellings
# ---------------------------------------------------------------------------

# blues rock (blues-rock)
blues rock:
- blues[ -]*rock

# folk rock (folk-rock)
folk rock:
- folk[ -]*rock

# alternative rock (alt, alternative, alt rock, alternative rock, ...)
alternative rock:
- alt([ -]*rock)?
- alternative([ -]*rock)?

# indie rock (indie, indie rock)
indie rock:
- indie([ -]*rock)?

# gothic rock (goth, goth rock) - doesn't catch gothic metal
gothic rock:
- goth(?!ic)([ -]*rock)?
- gothic[ -]*rock

# progressive rock (prog, prog rock, progressive rock)
# Note: mapping standalone 'progressive' is avoided to prevent catching 'progressive metal', etc.
progressive rock:
- prog([ -]*rock)?
- progressive[ -]*rock

# traditional folk (trad, traditional)
# Note: avoids matching 'trad jazz' or 'traditional country'.
traditional folk:
- trad(/|ition(/|al)?)?-?
19 changes: 14 additions & 5 deletions beetsplug/lastgenre/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@

from beets import plugins

from .utils import drop_ignored_genres
from .utils import is_ignored, normalize_genre

if TYPE_CHECKING:
from collections.abc import Callable

from beets.library import LibModel
from beets.logging import BeetsLogger

from .utils import GenreIgnorePatterns
from .utils import GenreAliasPatterns, GenreIgnorePatterns

GenreCache = dict[str, list[str]]
"""Cache mapping entity keys to their genre lists.
Expand Down Expand Up @@ -69,6 +69,7 @@ def __init__(
log: BeetsLogger,
min_weight: int,
ignore_patterns: GenreIgnorePatterns,
alias_patterns: GenreAliasPatterns,
):
"""Initialize the client.

Expand All @@ -78,6 +79,7 @@ def __init__(
self._log = log
self._min_weight = min_weight
self._ignore_patterns: GenreIgnorePatterns = ignore_patterns
self._alias_patterns: GenreAliasPatterns = alias_patterns
self._genre_cache: GenreCache = {}

def fetch_genres(
Expand Down Expand Up @@ -127,11 +129,18 @@ def _last_lookup(
"last.fm (unfiltered) {} tags: {}", entity, genres
)

# Apply aliases and log each change.
# Filter forbidden genres on every call so ignorelist hits are logged.
# Artist is always the first element in args (album, artist, track lookups).
return drop_ignored_genres(
self._log, self._ignore_patterns, genres, args[0]
)
result = []
for genre in genres:
if self._alias_patterns:
genre = normalize_genre(self._log, self._alias_patterns, genre)

if not is_ignored(self._log, self._ignore_patterns, genre, args[0]):
result.append(genre)

return result

def fetch(self, kind: str, obj: LibModel, *args: str) -> list[str]:
"""Fetch Last.fm genres for the specified kind and entity.
Expand Down
Loading
Loading