df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 14:50:59 -08:00
parent 22af776ee2
commit fcdd507d00
12 changed files with 1004 additions and 66 deletions

View File

@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
s = s.replace("\u2011", "-") # non-breaking hyphen
s = s.replace("\u2012", "-") # figure dash
s = s.replace("\u2015", "-") # horizontal bar
# Collapse any whitespace runs (including newlines/tabs) to a single space.
# Some sources wrap the artist name or title across lines.
try:
s = re.sub(r"\s+", " ", s).strip()
except Exception:
s = " ".join(s.split())
return s
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
if ch.isspace():
while i < len(literal) and literal[i].isspace():
i += 1
out.append(r"\\s*")
out.append(r"\s*")
continue
out.append(re.escape(ch))
i += 1
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
raise ValueError("extract template must contain at least one (field)")
field_names: List[str] = []
parts: List[str] = [r"^\\s*"]
parts: List[str] = [r"^\s*"]
last_end = 0
for idx, m in enumerate(matches):
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
field_names.append(raw_name)
name_lower = raw_name.lower()
is_last = idx == (len(matches) - 1)
if is_last:
parts.append(fr"(?P<{raw_name}>.+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
# Heuristic: common numeric fields should capture full digit runs.
# This avoids ambiguous splits like track='2', title='3 ...'.
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
parts.append(fr"(?P<{raw_name}>\d+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
last_end = m.end()
tail = tpl[last_end:]
if tail:
parts.append(_literal_to_title_pattern_regex(tail))
parts.append(r"\\s*$")
parts.append(r"\s*$")
rx = "".join(parts)
return re.compile(rx, flags=re.IGNORECASE), field_names