df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
This commit is contained in:
@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
|
||||
s = s.replace("\u2011", "-") # non-breaking hyphen
|
||||
s = s.replace("\u2012", "-") # figure dash
|
||||
s = s.replace("\u2015", "-") # horizontal bar
|
||||
|
||||
# Collapse any whitespace runs (including newlines/tabs) to a single space.
|
||||
# Some sources wrap the artist name or title across lines.
|
||||
try:
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
except Exception:
|
||||
s = " ".join(s.split())
|
||||
return s
|
||||
|
||||
|
||||
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
|
||||
if ch.isspace():
|
||||
while i < len(literal) and literal[i].isspace():
|
||||
i += 1
|
||||
out.append(r"\\s*")
|
||||
out.append(r"\s*")
|
||||
continue
|
||||
out.append(re.escape(ch))
|
||||
i += 1
|
||||
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
||||
raise ValueError("extract template must contain at least one (field)")
|
||||
|
||||
field_names: List[str] = []
|
||||
parts: List[str] = [r"^\\s*"]
|
||||
parts: List[str] = [r"^\s*"]
|
||||
last_end = 0
|
||||
|
||||
for idx, m in enumerate(matches):
|
||||
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
||||
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
|
||||
field_names.append(raw_name)
|
||||
|
||||
name_lower = raw_name.lower()
|
||||
is_last = idx == (len(matches) - 1)
|
||||
if is_last:
|
||||
parts.append(fr"(?P<{raw_name}>.+)")
|
||||
else:
|
||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||
# Heuristic: common numeric fields should capture full digit runs.
|
||||
# This avoids ambiguous splits like track='2', title='3 ...'.
|
||||
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
|
||||
parts.append(fr"(?P<{raw_name}>\d+)")
|
||||
else:
|
||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||
|
||||
last_end = m.end()
|
||||
|
||||
tail = tpl[last_end:]
|
||||
if tail:
|
||||
parts.append(_literal_to_title_pattern_regex(tail))
|
||||
parts.append(r"\\s*$")
|
||||
parts.append(r"\s*$")
|
||||
|
||||
rx = "".join(parts)
|
||||
return re.compile(rx, flags=re.IGNORECASE), field_names
|
||||
|
||||
Reference in New Issue
Block a user