dfdfdd
This commit is contained in:
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import re
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
@@ -26,6 +27,184 @@ from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
_FIELD_NAME_RE = re.compile(r"^[A-Za-z0-9_]+$")
|
||||
|
||||
|
||||
def _normalize_title_for_extract(text: str) -> str:
|
||||
"""Normalize common separators in titles for matching.
|
||||
|
||||
Helps when sources use unicode dashes or odd whitespace.
|
||||
"""
|
||||
|
||||
s = str(text or "").strip()
|
||||
if not s:
|
||||
return s
|
||||
# Common unicode dash variants -> '-'
|
||||
s = s.replace("\u2013", "-") # en dash
|
||||
s = s.replace("\u2014", "-") # em dash
|
||||
s = s.replace("\u2212", "-") # minus sign
|
||||
s = s.replace("\u2010", "-") # hyphen
|
||||
s = s.replace("\u2011", "-") # non-breaking hyphen
|
||||
s = s.replace("\u2012", "-") # figure dash
|
||||
s = s.replace("\u2015", "-") # horizontal bar
|
||||
return s
|
||||
|
||||
|
||||
def _strip_title_prefix(text: str) -> str:
|
||||
s = str(text or "").strip()
|
||||
if s.lower().startswith("title:"):
|
||||
s = s.split(":", 1)[1].strip()
|
||||
return s
|
||||
|
||||
|
||||
def _literal_to_title_pattern_regex(literal: str) -> str:
|
||||
"""Convert a literal chunk of a template into a regex fragment.
|
||||
|
||||
Keeps punctuation literal, but treats any whitespace run as \\s*.
|
||||
"""
|
||||
|
||||
out: List[str] = []
|
||||
i = 0
|
||||
while i < len(literal):
|
||||
ch = literal[i]
|
||||
if ch.isspace():
|
||||
while i < len(literal) and literal[i].isspace():
|
||||
i += 1
|
||||
out.append(r"\\s*")
|
||||
continue
|
||||
out.append(re.escape(ch))
|
||||
i += 1
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]]:
|
||||
"""Compile a simple (field) template into a regex.
|
||||
|
||||
Example template:
|
||||
(artist) - (album) - (disk)-(track) (title)
|
||||
|
||||
This is *not* user-facing regex: we only support named fields in parentheses.
|
||||
"""
|
||||
|
||||
tpl = str(template or "").strip()
|
||||
if not tpl:
|
||||
raise ValueError("empty extract template")
|
||||
|
||||
matches = list(re.finditer(r"\(([^)]+)\)", tpl))
|
||||
if not matches:
|
||||
raise ValueError("extract template must contain at least one (field)")
|
||||
|
||||
field_names: List[str] = []
|
||||
parts: List[str] = [r"^\\s*"]
|
||||
last_end = 0
|
||||
|
||||
for idx, m in enumerate(matches):
|
||||
literal = tpl[last_end : m.start()]
|
||||
if literal:
|
||||
parts.append(_literal_to_title_pattern_regex(literal))
|
||||
|
||||
raw_name = (m.group(1) or "").strip()
|
||||
if not raw_name or not _FIELD_NAME_RE.fullmatch(raw_name):
|
||||
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
|
||||
field_names.append(raw_name)
|
||||
|
||||
is_last = idx == (len(matches) - 1)
|
||||
if is_last:
|
||||
parts.append(fr"(?P<{raw_name}>.+)")
|
||||
else:
|
||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||
|
||||
last_end = m.end()
|
||||
|
||||
tail = tpl[last_end:]
|
||||
if tail:
|
||||
parts.append(_literal_to_title_pattern_regex(tail))
|
||||
parts.append(r"\\s*$")
|
||||
|
||||
rx = "".join(parts)
|
||||
return re.compile(rx, flags=re.IGNORECASE), field_names
|
||||
|
||||
|
||||
def _extract_tags_from_title(title_text: str, template: str) -> List[str]:
|
||||
"""Extract (field)->value from title_text and return ['field:value', ...]."""
|
||||
|
||||
title_clean = _normalize_title_for_extract(_strip_title_prefix(title_text))
|
||||
if not title_clean:
|
||||
return []
|
||||
|
||||
pattern, field_names = _compile_extract_template(template)
|
||||
m = pattern.match(title_clean)
|
||||
if not m:
|
||||
return []
|
||||
|
||||
out: List[str] = []
|
||||
for name in field_names:
|
||||
value = (m.group(name) or "").strip()
|
||||
if not value:
|
||||
continue
|
||||
out.append(f"{name}:{value}")
|
||||
return out
|
||||
|
||||
|
||||
def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[str]] = None) -> List[str]:
|
||||
"""Return a list of possible title strings in priority order."""
|
||||
|
||||
candidates: List[str] = []
|
||||
|
||||
def add_candidate(val: Any) -> None:
|
||||
if val is None:
|
||||
return
|
||||
s = _normalize_title_for_extract(_strip_title_prefix(str(val)))
|
||||
if not s:
|
||||
return
|
||||
if s not in candidates:
|
||||
candidates.append(s)
|
||||
|
||||
# 1) Item's title field (may be a display title, not the title: tag)
|
||||
try:
|
||||
add_candidate(get_field(res, "title"))
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(res, dict):
|
||||
add_candidate(res.get("title"))
|
||||
|
||||
# 2) title: tag from either store tags or piped tags
|
||||
tags = existing_tags if isinstance(existing_tags, list) else _extract_item_tags(res)
|
||||
add_candidate(_extract_title_tag(tags) or "")
|
||||
|
||||
# 3) Filename stem
|
||||
try:
|
||||
path_val = get_field(res, "path")
|
||||
if path_val:
|
||||
p = Path(str(path_val))
|
||||
add_candidate((p.stem or "").strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def _extract_tags_from_title_candidates(candidates: List[str], template: str) -> tuple[List[str], Optional[str]]:
|
||||
"""Try candidates in order; return (tags, matched_candidate)."""
|
||||
|
||||
for c in candidates:
|
||||
extracted = _extract_tags_from_title(c, template)
|
||||
if extracted:
|
||||
return extracted, c
|
||||
return [], None
|
||||
|
||||
|
||||
def _try_compile_extract_template(template: Optional[str]) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
|
||||
"""Compile template for debug; return (pattern, error_message)."""
|
||||
if template is None:
|
||||
return None, None
|
||||
try:
|
||||
pattern, _fields = _compile_extract_template(str(template))
|
||||
return pattern, None
|
||||
except Exception as exc:
|
||||
return None, str(exc)
|
||||
|
||||
|
||||
def _extract_title_tag(tags: List[str]) -> Optional[str]:
|
||||
"""Return the value of the first title: tag if present."""
|
||||
for t in tags:
|
||||
@@ -242,6 +421,8 @@ class Add_Tag(Cmdlet):
|
||||
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
|
||||
SharedArgs.QUERY,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-extract", type="string", description="Extract tags from the item's title using a simple template with (field) placeholders. Example: -extract \"(artist) - (album) - (disk)-(track) (title)\" will add artist:, album:, disk:, track:, title: tags."),
|
||||
CmdletArg("--extract-debug", type="flag", description="Print debug info for -extract matching (matched title source and extracted tags)."),
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
|
||||
@@ -258,6 +439,7 @@ class Add_Tag(Cmdlet):
|
||||
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||
"- The source namespace must already exist in the file being tagged.",
|
||||
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||
"- Use -extract to derive namespaced tags from the current title (title field or title: tag) using a simple template.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
@@ -272,6 +454,13 @@ class Add_Tag(Cmdlet):
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
extract_template = parsed.get("extract")
|
||||
if extract_template is not None:
|
||||
extract_template = str(extract_template)
|
||||
|
||||
extract_debug = bool(parsed.get("extract-debug", False))
|
||||
extract_debug_rx, extract_debug_err = _try_compile_extract_template(extract_template)
|
||||
|
||||
query_hash = sh.parse_single_hash_query(parsed.get("query"))
|
||||
if parsed.get("query") and not query_hash:
|
||||
log("[add_tag] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
|
||||
@@ -304,8 +493,10 @@ class Add_Tag(Cmdlet):
|
||||
if isinstance(raw_tag, str):
|
||||
raw_tag = [raw_tag]
|
||||
|
||||
# Fallback: if no tag provided explicitly, try to pull from first result payload
|
||||
if not raw_tag and results:
|
||||
# Fallback: if no tag provided explicitly, try to pull from first result payload.
|
||||
# IMPORTANT: when -extract is used, users typically want *only* extracted tags,
|
||||
# not "re-add whatever tags are already in the payload".
|
||||
if not raw_tag and results and not extract_template:
|
||||
first = results[0]
|
||||
payload_tag = None
|
||||
|
||||
@@ -341,8 +532,12 @@ class Add_Tag(Cmdlet):
|
||||
tag_to_add = parse_tag_arguments(raw_tag)
|
||||
tag_to_add = expand_tag_groups(tag_to_add)
|
||||
|
||||
if not tag_to_add:
|
||||
log("No tag provided to add", file=sys.stderr)
|
||||
if not tag_to_add and not extract_template:
|
||||
log("No tag provided to add (and no -extract template provided)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if extract_template and extract_debug and extract_debug_err:
|
||||
log(f"[add_tag] extract template error: {extract_debug_err}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get other flags
|
||||
@@ -355,6 +550,9 @@ class Add_Tag(Cmdlet):
|
||||
|
||||
store_registry = Store(config)
|
||||
|
||||
extract_matched_items = 0
|
||||
extract_no_match_items = 0
|
||||
|
||||
for res in results:
|
||||
store_name: Optional[str]
|
||||
raw_hash: Optional[str]
|
||||
@@ -389,6 +587,24 @@ class Add_Tag(Cmdlet):
|
||||
existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
|
||||
|
||||
item_tag_to_add = list(tag_to_add)
|
||||
|
||||
if extract_template:
|
||||
candidates = _get_title_candidates_for_extraction(res, existing_tag_list)
|
||||
extracted, matched = _extract_tags_from_title_candidates(candidates, extract_template)
|
||||
if extracted:
|
||||
extract_matched_items += 1
|
||||
if extract_debug:
|
||||
log(f"[add_tag] extract matched: {matched!r} -> {extracted}", file=sys.stderr)
|
||||
for new_tag in extracted:
|
||||
if new_tag.lower() not in existing_lower:
|
||||
item_tag_to_add.append(new_tag)
|
||||
else:
|
||||
extract_no_match_items += 1
|
||||
if extract_debug:
|
||||
rx_preview = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
|
||||
cand_preview = "; ".join([repr(c) for c in candidates[:3]])
|
||||
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}", file=sys.stderr)
|
||||
|
||||
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
|
||||
|
||||
if duplicate_arg:
|
||||
@@ -492,6 +708,24 @@ class Add_Tag(Cmdlet):
|
||||
|
||||
# Per-item tag list (do not mutate shared list)
|
||||
item_tag_to_add = list(tag_to_add)
|
||||
|
||||
if extract_template:
|
||||
candidates2 = _get_title_candidates_for_extraction(res, existing_tag_list)
|
||||
extracted2, matched2 = _extract_tags_from_title_candidates(candidates2, extract_template)
|
||||
if extracted2:
|
||||
extract_matched_items += 1
|
||||
if extract_debug:
|
||||
log(f"[add_tag] extract matched: {matched2!r} -> {extracted2}", file=sys.stderr)
|
||||
for new_tag in extracted2:
|
||||
if new_tag.lower() not in existing_lower:
|
||||
item_tag_to_add.append(new_tag)
|
||||
else:
|
||||
extract_no_match_items += 1
|
||||
if extract_debug:
|
||||
rx_preview2 = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
|
||||
cand_preview2 = "; ".join([repr(c) for c in candidates2[:3]])
|
||||
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}", file=sys.stderr)
|
||||
|
||||
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
|
||||
|
||||
# Handle -duplicate logic (copy existing tag to new namespaces)
|
||||
@@ -563,6 +797,12 @@ class Add_Tag(Cmdlet):
|
||||
f"[add_tag] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if extract_template and extract_matched_items == 0:
|
||||
log(f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)", file=sys.stderr)
|
||||
elif extract_template and extract_no_match_items > 0 and extract_debug:
|
||||
log(f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user