df

2025-12-27 14:50:59 -08:00
parent 22af776ee2
commit fcdd507d00
12 changed files with 1004 additions and 66 deletions
--- a/API/HydrusNetwork.py
+++ b/API/HydrusNetwork.py
@@ -373,6 +373,29 @@ class HydrusNetwork:
        body = {"hashes": hash_list}
        return self._post("/add_files/undelete_files", data=body)
    def delete_files(self, hashes: Union[str, Iterable[str]], *, reason: str | None = None) -> dict[str, Any]:
        """Delete files in Hydrus.
        Hydrus Client API: POST /add_files/delete_files
        Required JSON args: {"hashes": [<sha256 hex>, ...]}
        Optional JSON args: {"reason": "..."}
        """
        hash_list = self._ensure_hashes(hashes)
        body: dict[str, Any] = {"hashes": hash_list}
        if isinstance(reason, str) and reason.strip():
            body["reason"] = reason.strip()
        return self._post("/add_files/delete_files", data=body)
    def clear_file_deletion_record(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]:
        """Clear Hydrus's file deletion record for the provided hashes.
        Hydrus Client API: POST /add_files/clear_file_deletion_record
        Required JSON args: {"hashes": [<sha256 hex>, ...]}
        """
        hash_list = self._ensure_hashes(hashes)
        body = {"hashes": hash_list}
        return self._post("/add_files/clear_file_deletion_record", data=body)
    def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
        hash = self._ensure_hashes(hash)
        body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
--- a/CLI.py
+++ b/CLI.py
@@ -1012,6 +1012,14 @@ class CmdletExecutor:
        ensure_registry_loaded()
        # REPL guard: stage-local selection tables should not leak across independent
        # commands. @ selection can always re-seed from the last result table.
        try:
            if hasattr(ctx, "set_current_stage_table"):
                ctx.set_current_stage_table(None)
        except Exception:
            pass
        cmd_fn = REGISTRY.get(cmd_name)
        if not cmd_fn:
            # Lazy-import module and register its CMDLET.
@@ -1451,6 +1459,13 @@ class CmdletExecutor:
                        ctx.set_live_progress(None)
                except Exception:
                    pass
            # Do not keep stage tables around after a single command; it can cause
            # later @ selections to bind to stale tables (e.g. old add-file scans).
            try:
                if hasattr(ctx, "set_current_stage_table"):
                    ctx.set_current_stage_table(None)
            except Exception:
                pass
            try:
                if hasattr(ctx, "clear_current_cmdlet_name"):
                    ctx.clear_current_cmdlet_name()
@@ -2027,6 +2042,9 @@ class PipelineExecutor:
                    elif table_type == "bandcamp":
                        print("Auto-running Bandcamp selection via download-media")
                        stages.append(["download-media"])
                    elif table_type == "internetarchive":
                        print("Auto-loading Internet Archive item via download-data")
                        stages.append(["download-data"])
                    elif table_type in {"soulseek", "openlibrary", "libgen"}:
                        print("Auto-piping selection to download-file")
                        stages.append(["download-file"])
@@ -2056,6 +2074,16 @@ class PipelineExecutor:
                    ):
                        print("Auto-inserting download-media after Bandcamp selection")
                        stages.insert(0, ["download-media"])
                    if table_type == "internetarchive" and first_cmd not in (
                        "download-data",
                        "download_data",
                        "download-file",
                        "download-media",
                        "download_media",
                        ".pipe",
                    ):
                        debug("Auto-inserting download-data after Internet Archive selection")
                        stages.insert(0, ["download-data"])
                    if table_type == "libgen" and first_cmd not in (
                        "download-file",
                        "download-media",
@@ -2166,6 +2194,14 @@ class PipelineExecutor:
        try:
            self._try_clear_pipeline_stop(ctx)
            # REPL guard: stage-local tables should not persist across independent
            # commands. Selection stages can always seed from last/display tables.
            try:
                if hasattr(ctx, "set_current_stage_table"):
                    ctx.set_current_stage_table(None)
            except Exception:
                pass
            # Preflight (URL-duplicate prompts, etc.) should be cached within a single
            # pipeline run, not across independent pipelines.
            try:
@@ -2615,11 +2651,13 @@ class PipelineExecutor:
                        if (
                            (not stage_is_last)
                            and (not emits)
-                            and cmd_name in {"download-media", "download_media"}
+                            and cmd_name in {"download-media", "download_media", "download-data", "download_data"}
                            and stage_table is not None
                            and (
                                stage_table_type in {"ytdlp.formatlist", "download-media", "download_media", "bandcamp", "youtube"}
                                or stage_table_source in {"download-media", "download_media"}
                                or stage_table_type in {"internetarchive.formats"}
                                or stage_table_source in {"download-file"}
                            )
                        ):
                            try:
@@ -2812,6 +2850,13 @@ class PipelineExecutor:
                        _pipeline_ctx.set_live_progress(None)
                except Exception:
                    pass
                # End-of-command cleanup: avoid leaking current stage tables into
                # the next REPL command (causes stale @ selection sources).
                try:
                    if hasattr(ctx, "set_current_stage_table"):
                        ctx.set_current_stage_table(None)
                except Exception:
                    pass
                if pipeline_session:
                    pipeline_session.close(status=pipeline_status, error_msg=pipeline_error)
        except Exception as exc:
--- a/Provider/internetarchive.py
+++ b/Provider/internetarchive.py
@@ -220,7 +220,6 @@ class InternetArchive(Provider):
            "mediatype",
            "creator",
            "date",
            "downloads",
            "collection",
        ]
@@ -243,7 +242,11 @@ class InternetArchive(Provider):
            title = str(row.get("title") or identifier).strip() or identifier
            mediatype = str(row.get("mediatype") or "").strip()
-            creator = str(row.get("creator") or "").strip()
+            creator_raw = row.get("creator")
            if isinstance(creator_raw, list):
                creator = ", ".join(str(x) for x in creator_raw if x)
            else:
                creator = str(creator_raw or "").strip()
            date = str(row.get("date") or "").strip()
            annotations: List[str] = []
@@ -272,9 +275,10 @@ class InternetArchive(Provider):
                size_bytes=None,
                tag=set(),
                columns=[
-                    ("identifier", identifier),
+                    ("title", title),
                    ("mediatype", mediatype),
                    ("date", date),
                    ("creator", creator),
                ],
                full_metadata=dict(row),
            )
--- a/Provider/matrix.py
+++ b/Provider/matrix.py
@@ -15,6 +15,115 @@ from ProviderCore.base import Provider
 _MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
 def _sniff_mime_from_header(path: Path) -> Optional[str]:
 	"""Best-effort MIME sniffing from file headers.
 	Used when the file has no/unknown extension (common for exported/temp files).
 	Keeps dependencies to stdlib only.
 	"""
 	try:
 		if not path.exists() or not path.is_file():
 			return None
 		with open(path, "rb") as handle:
 			header = handle.read(512)
 		if not header:
 			return None
 		# Images
 		if header.startswith(b"\xFF\xD8\xFF"):
 			return "image/jpeg"
 		if header.startswith(b"\x89PNG\r\n\x1a\n"):
 			return "image/png"
 		if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
 			return "image/gif"
 		if header.startswith(b"BM"):
 			return "image/bmp"
 		if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
 			return "image/webp"
 		# Audio
 		if header.startswith(b"fLaC"):
 			return "audio/flac"
 		if header.startswith(b"OggS"):
 			# Could be audio or video; treat as audio unless extension suggests video.
 			return "audio/ogg"
 		if header.startswith(b"ID3"):
 			return "audio/mpeg"
 		if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
 			return "audio/mpeg"
 		if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
 			return "audio/wav"
 		# Video
 		if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
 			return "video/x-msvideo"
 		if header.startswith(b"\x1A\x45\xDF\xA3"):
 			# EBML container: Matroska/WebM.
 			return "video/x-matroska"
 		if len(header) >= 12 and header[4:8] == b"ftyp":
 			# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
 			return "video/mp4"
 		# MPEG-TS / M2TS (sync byte every 188 bytes)
 		try:
 			if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
 				with open(path, "rb") as handle:
 					handle.seek(188)
 					b = handle.read(1)
 					if b == b"\x47":
 						return "video/mp2t"
 		except Exception:
 			pass
 		return None
 	except Exception:
 		return None
 def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = None) -> Tuple[str, str]:
 	"""Return (mime_type, msgtype) for Matrix uploads."""
 	mime_type = str(explicit_mime_type or "").strip() or None
 	if not mime_type:
 		# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
 		mime_type, _ = mimetypes.guess_type(str(path))
 	if not mime_type:
 		mime_type = _sniff_mime_from_header(path)
 	# Refinements based on extension for ambiguous containers.
 	ext = path.suffix.lower()
 	if ext in {".m4a", ".aac"}:
 		mime_type = mime_type or "audio/mp4"
 	if ext in {".mkv", ".webm"}:
 		mime_type = mime_type or "video/x-matroska"
 	if ext in {".ogv"}:
 		mime_type = mime_type or "video/ogg"
 	msgtype = "m.file"
 	if mime_type:
 		mt = mime_type.casefold()
 		if mt.startswith("image/"):
 			msgtype = "m.image"
 		elif mt.startswith("audio/"):
 			msgtype = "m.audio"
 		elif mt.startswith("video/"):
 			msgtype = "m.video"
 	# Final fallback for unknown MIME types.
 	if msgtype == "m.file":
 		audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
 		video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", ".m2ts", ".mts", ".3gp", ".ogv"}
 		image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
 		if ext in audio_exts:
 			msgtype = "m.audio"
 		elif ext in video_exts:
 			msgtype = "m.video"
 		elif ext in image_exts:
 			msgtype = "m.image"
 	return (mime_type or "application/octet-stream"), msgtype
 def _normalize_homeserver(value: str) -> str:
 	text = str(value or "").strip()
 	if not text:
@@ -189,9 +298,8 @@ class Matrix(Provider):
 			"Content-Type": "application/octet-stream",
 		}
-		mime_type, _ = mimetypes.guess_type(path)
+		mime_type, msgtype = _classify_matrix_upload(path, explicit_mime_type=kwargs.get("mime_type"))
-		if mime_type:
+		headers["Content-Type"] = mime_type
 			headers["Content-Type"] = mime_type
 		filename = path.name
@@ -222,19 +330,6 @@ class Matrix(Provider):
 		except Exception:
 			download_url_for_store = ""
 		# Determine message type
 		msgtype = "m.file"
 		ext = path.suffix.lower()
 		audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
 		video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv"}
 		image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
 		if ext in audio_exts:
 			msgtype = "m.audio"
 		elif ext in video_exts:
 			msgtype = "m.video"
 		elif ext in image_exts:
 			msgtype = "m.image"
 		info = {"mimetype": mime_type, "size": path.stat().st_size}
 		payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
--- a/Store/HydrusNetwork.py
+++ b/Store/HydrusNetwork.py
@@ -955,6 +955,39 @@ class HydrusNetwork(Store):
        debug(f"{self._log_prefix()} get_file: url={browser_url}")
        return browser_url
    def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
        """Delete a file from Hydrus, then clear the deletion record.
        This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
        """
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} delete_file: client unavailable")
                return False
            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
                debug(f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'")
                return False
            reason = kwargs.get("reason")
            reason_text = str(reason).strip() if isinstance(reason, str) and reason.strip() else None
            # 1) Delete file
            client.delete_files([file_hash], reason=reason_text)
            # 2) Clear deletion record (best-effort)
            try:
                client.clear_file_deletion_record([file_hash])
            except Exception as exc:
                debug(f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}")
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} delete_file failed: {exc}")
            return False
    def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
        """Get metadata for a file from Hydrus by hash.
--- a/cmdlet/add_tag.py
+++ b/cmdlet/add_tag.py
@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
    s = s.replace("\u2011", "-")  # non-breaking hyphen
    s = s.replace("\u2012", "-")  # figure dash
    s = s.replace("\u2015", "-")  # horizontal bar
    # Collapse any whitespace runs (including newlines/tabs) to a single space.
    # Some sources wrap the artist name or title across lines.
    try:
        s = re.sub(r"\s+", " ", s).strip()
    except Exception:
        s = " ".join(s.split())
    return s
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
        if ch.isspace():
            while i < len(literal) and literal[i].isspace():
                i += 1
-            out.append(r"\\s*")
+            out.append(r"\s*")
            continue
        out.append(re.escape(ch))
        i += 1
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
        raise ValueError("extract template must contain at least one (field)")
    field_names: List[str] = []
-    parts: List[str] = [r"^\\s*"]
+    parts: List[str] = [r"^\s*"]
    last_end = 0
    for idx, m in enumerate(matches):
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
            raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
        field_names.append(raw_name)
        name_lower = raw_name.lower()
        is_last = idx == (len(matches) - 1)
        if is_last:
            parts.append(fr"(?P<{raw_name}>.+)")
        else:
-            parts.append(fr"(?P<{raw_name}>.+?)")
+            # Heuristic: common numeric fields should capture full digit runs.
            # This avoids ambiguous splits like track='2', title='3 ...'.
            if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
                parts.append(fr"(?P<{raw_name}>\d+)")
            else:
                parts.append(fr"(?P<{raw_name}>.+?)")
        last_end = m.end()
    tail = tpl[last_end:]
    if tail:
        parts.append(_literal_to_title_pattern_regex(tail))
-    parts.append(r"\\s*$")
+    parts.append(r"\s*$")
    rx = "".join(parts)
    return re.compile(rx, flags=re.IGNORECASE), field_names
--- a/cmdlet/archive_file.py
+++ b/cmdlet/archive_file.py
@@ -0,0 +1,426 @@
 """Create a single .tar.zst archive from piped file selections."""
 from __future__ import annotations
 import re
 import sys
 import tarfile
 import tempfile
 import time
 import uuid
 from pathlib import Path
 from typing import Any, Dict, List, Sequence, Set
 from urllib.parse import parse_qs, urlparse
 from SYS.logger import log
 import pipeline as ctx
 from config import resolve_output_dir
 from . import _shared as sh
 Cmdlet = sh.Cmdlet
 CmdletArg = sh.CmdletArg
 SharedArgs = sh.SharedArgs
 coerce_to_pipe_object = sh.coerce_to_pipe_object
 create_pipe_object_result = sh.create_pipe_object_result
 parse_cmdlet_args = sh.parse_cmdlet_args
 should_show_help = sh.should_show_help
 _SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
 def _extract_sha256_hex(item: Any) -> str:
 	try:
 		if isinstance(item, dict):
 			h = item.get("hash")
 		else:
 			h = getattr(item, "hash", None)
 		if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
 			return h.strip().lower()
 	except Exception:
 		pass
 	return ""
 def _extract_store_name(item: Any) -> str:
 	try:
 		if isinstance(item, dict):
 			s = item.get("store")
 		else:
 			s = getattr(item, "store", None)
 		return str(s or "").strip()
 	except Exception:
 		return ""
 def _extract_url(item: Any) -> str:
 	try:
 		u = sh.get_field(item, "url") or sh.get_field(item, "target")
 		if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
 			return u.strip()
 	except Exception:
 		pass
 	return ""
 def _extract_hash_from_hydrus_file_url(url: str) -> str:
 	try:
 		parsed = urlparse(str(url))
 		if not (parsed.path or "").endswith("/get_files/file"):
 			return ""
 		qs = parse_qs(parsed.query or "")
 		h = (qs.get("hash") or [""])[0]
 		if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
 			return h.strip().lower()
 	except Exception:
 		pass
 	return ""
 def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
 	instances: Set[str] = set()
 	try:
 		store_cfg = config.get("store") if isinstance(config, dict) else None
 		if isinstance(store_cfg, dict):
 			hydrus_cfg = store_cfg.get("hydrusnetwork")
 			if isinstance(hydrus_cfg, dict):
 				instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
 	except Exception:
 		instances = set()
 	return instances
 def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
 	"""Download a Hydrus-backed item to a local temp path (best-effort).
 	This is intentionally side-effect free except for writing the local temp file.
 	"""
 	try:
 		from config import get_hydrus_access_key, get_hydrus_url
 		from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
 	except Exception:
 		return None
 	store_name = _extract_store_name(item)
 	store_lower = store_name.lower()
 	hydrus_instances = _hydrus_instance_names(config)
 	store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
 	url = _extract_url(item)
 	file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
 	if not file_hash:
 		return None
 	# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
 	is_hydrus_url = False
 	if url:
 		try:
 			parsed = urlparse(url)
 			is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
 		except Exception:
 			is_hydrus_url = False
 	if not (is_hydrus_url or store_hint):
 		return None
 	# Prefer store name as instance key; fall back to "home".
 	access_key = None
 	hydrus_url = None
 	for inst in [s for s in [store_lower, "home"] if s]:
 		try:
 			access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
 			hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
 			if access_key and hydrus_url:
 				break
 		except Exception:
 			access_key = None
 			hydrus_url = None
 	if not access_key or not hydrus_url:
 		return None
 	client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
 	file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
 	# Best-effort extension from Hydrus metadata.
 	suffix = ".hydrus"
 	try:
 		meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
 		entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
 		if isinstance(entries, list) and entries:
 			entry = entries[0]
 			if isinstance(entry, dict):
 				ext = entry.get("ext")
 				if isinstance(ext, str) and ext.strip():
 					cleaned = ext.strip()
 					if not cleaned.startswith("."):
 						cleaned = "." + cleaned.lstrip(".")
 					if len(cleaned) <= 12:
 						suffix = cleaned
 	except Exception:
 		pass
 	try:
 		output_dir.mkdir(parents=True, exist_ok=True)
 	except Exception:
 		pass
 	dest = output_dir / f"{file_hash}{suffix}"
 	if dest.exists():
 		dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
 	headers = {"Hydrus-Client-API-Access-Key": access_key}
 	download_hydrus_file(file_url, headers, dest, timeout=60.0)
 	try:
 		if dest.exists() and dest.is_file():
 			return dest
 	except Exception:
 		return None
 	return None
 def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
 	"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
 	# 1) Direct local path
 	try:
 		po = coerce_to_pipe_object(item, None)
 		raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
 		if raw_path:
 			p = Path(str(raw_path)).expanduser()
 			if p.exists():
 				return p, None
 	except Exception:
 		pass
 	# 2) Store-backed path
 	file_hash = _extract_sha256_hex(item)
 	store_name = _extract_store_name(item)
 	if file_hash and store_name:
 		try:
 			from Store import Store
 			store = Store(config)
 			backend = store[store_name]
 			src = backend.get_file(file_hash)
 			if isinstance(src, Path):
 				if src.exists():
 					return src, None
 			elif isinstance(src, str) and src.strip():
 				cand = Path(src).expanduser()
 				if cand.exists():
 					return cand, None
 				# If the backend returns a URL (HydrusNetwork), download it.
 				if src.strip().lower().startswith(("http://", "https://")):
 					tmp_base = None
 					try:
 						tmp_base = config.get("temp") if isinstance(config, dict) else None
 					except Exception:
 						tmp_base = None
 					out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
 					out_dir = out_dir / "archive" / "hydrus"
 					downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
 					if downloaded is not None:
 						return downloaded, downloaded
 		except Exception:
 			pass
 	# 3) Hydrus-backed items without backend.get_file path.
 	try:
 		tmp_base = config.get("temp") if isinstance(config, dict) else None
 	except Exception:
 		tmp_base = None
 	out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
 	out_dir = out_dir / "archive" / "hydrus"
 	downloaded = _maybe_download_hydrus_item(item, config, out_dir)
 	if downloaded is not None:
 		return downloaded, downloaded
 	return None, None
 def _unique_arcname(name: str, seen: Set[str]) -> str:
 	base = str(name or "").replace("\\", "/")
 	base = base.lstrip("/")
 	if not base:
 		base = "file"
 	if base not in seen:
 		seen.add(base)
 		return base
 	stem = base
 	suffix = ""
 	if "/" not in base:
 		p = Path(base)
 		stem = p.stem
 		suffix = p.suffix
 	n = 2
 	while True:
 		candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
 		if candidate not in seen:
 			seen.add(candidate)
 			return candidate
 		n += 1
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	if should_show_help(args):
 		log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
 		return 0
 	parsed = parse_cmdlet_args(args, CMDLET)
 	level_raw = parsed.get("level")
 	try:
 		level = int(level_raw) if level_raw is not None else 11
 	except Exception:
 		level = 11
 	if level < 1:
 		level = 1
 	if level > 22:
 		level = 22
 	# Output destination is controlled by the shared -path behavior in the pipeline runner.
 	# This cmdlet always creates the archive in the configured output directory and emits it.
 	# Collect piped items; archive-file is a batch command (single output).
 	items: List[Any] = []
 	if isinstance(result, list):
 		items = list(result)
 	elif result is not None:
 		items = [result]
 	if not items:
 		log("No piped items provided to archive-file", file=sys.stderr)
 		return 1
 	temp_downloads: List[Path] = []
 	try:
 		paths: List[Path] = []
 		for it in items:
 			p, tmp = _resolve_existing_or_fetch_path(it, config)
 			if p is None:
 				continue
 			paths.append(p)
 			if tmp is not None:
 				temp_downloads.append(tmp)
 		# Keep stable order, remove duplicates.
 		uniq: List[Path] = []
 		seen_paths: Set[str] = set()
 		for p in paths:
 			key = str(p.resolve()) if p.exists() else str(p)
 			if key in seen_paths:
 				continue
 			seen_paths.add(key)
 			uniq.append(p)
 		paths = uniq
 		if not paths:
 			log("No existing file paths found in piped items", file=sys.stderr)
 			return 1
 		out_dir = resolve_output_dir(config)
 		try:
 			out_dir.mkdir(parents=True, exist_ok=True)
 		except Exception:
 			pass
 		stamp = time.strftime("%Y%m%d_%H%M%S")
 		out_path = out_dir / f"archive_{stamp}.tar.zst"
 		try:
 			out_path = sh._unique_destination_path(out_path)  # type: ignore[attr-defined]
 		except Exception:
 			pass
 		try:
 			out_path.parent.mkdir(parents=True, exist_ok=True)
 		except Exception as exc:
 			log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
 			return 1
 		# Import zstandard lazily so the rest of the CLI still runs without it.
 		try:
 			import zstandard as zstd  # type: ignore
 		except Exception:
 			log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
 			return 1
 		# Write tar stream into zstd stream.
 		try:
 			with open(out_path, "wb") as out_handle:
 				cctx = zstd.ZstdCompressor(level=level)
 				with cctx.stream_writer(out_handle) as compressor:
 					with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
 						seen_names: Set[str] = set()
 						for p in paths:
 							arcname = _unique_arcname(p.name, seen_names)
 							# For directories, tarfile will include contents when recursive=True.
 							try:
 								tf.add(str(p), arcname=arcname, recursive=True)
 							except Exception as exc:
 								log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
 		except Exception as exc:
 			log(f"Archive creation failed: {exc}", file=sys.stderr)
 			return 1
 		# Emit a single artifact downstream.
 		hash_value = None
 		try:
 			from SYS.utils import sha256_file
 			hash_value = sha256_file(out_path)
 		except Exception:
 			hash_value = None
 		pipe_obj = create_pipe_object_result(
 			source="archive",
 			identifier=out_path.stem,
 			file_path=str(out_path),
 			cmdlet_name="archive-file",
 			title=out_path.name,
 			hash_value=hash_value,
 			is_temp=True,
 			store="PATH",
 			extra={
 				"target": str(out_path),
 				"archive_format": "tar.zst",
 				"compression": "zstd",
 				"level": level,
 				"source_count": len(paths),
 				"source_paths": [str(p) for p in paths],
 			},
 		)
 		ctx.emit(pipe_obj)
 		return 0
 	finally:
 		# Best-effort cleanup of any temp Hydrus downloads we created.
 		for tmp in temp_downloads:
 			try:
 				tmp.unlink(missing_ok=True)  # type: ignore[arg-type]
 			except TypeError:
 				try:
 					if tmp.exists():
 						tmp.unlink()
 				except Exception:
 					pass
 			except Exception:
 				pass
 CMDLET = Cmdlet(
 	name="archive-file",
 	summary="Archive piped files into a single .tar.zst.",
 	usage="@N | archive-file [-level <1-22>] [-path <path>]",
 	arg=[
 		CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
 		SharedArgs.PATH,
 	],
 	detail=[
 		"- Example: @1-5 | archive-file",
 		"- Default zstd level is 11.",
 		"- Emits one output item (the archive) for downstream piping.",
 	],
 )
 CMDLET.exec = _run
 CMDLET.register()
--- a/cmdlet/delete_file.py
+++ b/cmdlet/delete_file.py
@@ -297,51 +297,81 @@ class Delete_File(sh.Cmdlet):
            should_try_hydrus = False
        if should_try_hydrus and hash_hex:
-            client = None
+            # Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
-            if store:
+            # This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
-                # Store specified: do not fall back to a global/default Hydrus client.
+            did_backend_delete = False
                try:
                    registry = Store(config)
                    backend = registry[str(store)]
                    candidate = getattr(backend, "_client", None)
                    if candidate is not None and hasattr(candidate, "_post"):
                        client = candidate
                except Exception as exc:
                    if not local_deleted:
                        log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
                        return False
                if client is None:
                    if not local_deleted:
                        log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
                        return False
            else:
                # No store context; use default Hydrus client.
                try:
                    client = hydrus_wrapper.get_client(config)
                except Exception as exc:
                    if not local_deleted:
                        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
                        return False
                if client is None:
                    if not local_deleted:
                        log("Hydrus client unavailable", file=sys.stderr)
                        return False
            payload: Dict[str, Any] = {"hashes": [hash_hex]}
            if reason:
                payload["reason"] = reason
            try:
-                client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
+                if backend is not None:
                    deleter = getattr(backend, "delete_file", None)
                    if callable(deleter):
                        did_backend_delete = bool(deleter(hash_hex, reason=reason))
            except Exception:
                did_backend_delete = False
            if did_backend_delete:
                hydrus_deleted = True
                title_str = str(title_val).strip() if title_val else ""
                if title_str:
                    debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
                else:
                    debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
-            except Exception:
+            else:
-                # If it's not in Hydrus (e.g. 404 or similar), that's fine
+                # Fallback to direct client calls.
-                if not local_deleted:
+                client = None
-                    return []
+                if store:
                    # Store specified: do not fall back to a global/default Hydrus client.
                    try:
                        registry = Store(config)
                        backend = registry[str(store)]
                        candidate = getattr(backend, "_client", None)
                        if candidate is not None and hasattr(candidate, "_post"):
                            client = candidate
                    except Exception as exc:
                        if not local_deleted:
                            log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
                            return False
                    if client is None:
                        if not local_deleted:
                            log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
                            return False
                else:
                    # No store context; use default Hydrus client.
                    try:
                        client = hydrus_wrapper.get_client(config)
                    except Exception as exc:
                        if not local_deleted:
                            log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
                            return False
                    if client is None:
                        if not local_deleted:
                            log("Hydrus client unavailable", file=sys.stderr)
                            return False
                payload: Dict[str, Any] = {"hashes": [hash_hex]}
                if reason:
                    payload["reason"] = reason
                try:
                    client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
                    # Best-effort clear deletion record if supported by this client.
                    try:
                        clearer = getattr(client, "clear_file_deletion_record", None)
                        if callable(clearer):
                            clearer([hash_hex])
                        else:
                            client._post("/add_files/clear_file_deletion_record", data={"hashes": [hash_hex]})  # type: ignore[attr-defined]
                    except Exception:
                        pass
                    hydrus_deleted = True
                    title_str = str(title_val).strip() if title_val else ""
                    if title_str:
                        debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
                    else:
                        debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
                except Exception:
                    # If it's not in Hydrus (e.g. 404 or similar), that's fine
                    if not local_deleted:
                        return []
        if hydrus_deleted and hash_hex:
            size_hint = None
--- a/cmdlet/download_data.py
+++ b/cmdlet/download_data.py
@@ -0,0 +1,267 @@
 """Smart downloader front-door.
 Currently focused on Internet Archive item pages:
 - Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
 - Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
 - Selecting a row via @N expands to download-file <direct-url>
 This enables:
  search-provider -provider internetarchive "..."
  @3                # shows formats table
  @2 | add-file ...  # downloads selected file then pipes to add-file
 """
 from __future__ import annotations
 import re
 import sys
 from typing import Any, Dict, List, Sequence, cast
 from urllib.parse import quote
 from SYS.logger import log, debug
 import pipeline as pipeline_context
 from result_table import ResultTable
 from . import _shared as sh
 Cmdlet = sh.Cmdlet
 SharedArgs = sh.SharedArgs
 parse_cmdlet_args = sh.parse_cmdlet_args
 get_field = sh.get_field
 def _extract_ia_identifier(text: str) -> str:
    s = str(text or "").strip()
    if not s:
        return ""
    # https://archive.org/details/<identifier>
    m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
    if m:
        return str(m.group(1) or "").strip()
    # internetarchive:<identifier>
    if s.lower().startswith("internetarchive:"):
        return s.split(":", 1)[-1].strip()
    return ""
 class Download_Data(Cmdlet):
    def __init__(self) -> None:
        super().__init__(
            name="download-data",
            summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
            usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
            alias=[],
            arg=[SharedArgs.URL],
            detail=[
                "For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
                "Select a file row with @N to run download-file on that direct URL.",
            ],
            exec=self.run,
        )
        self.register()
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        try:
            # parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
            parsed = parse_cmdlet_args(args, cast(Any, self))
        except Exception:
            parsed = {}
        raw_urls = parsed.get("url", [])
        if isinstance(raw_urls, str):
            raw_urls = [raw_urls]
        url_arg = str(raw_urls[0]).strip() if raw_urls else ""
        piped_items: List[Any] = []
        if isinstance(result, list):
            piped_items = list(result)
        elif result is not None:
            piped_items = [result]
        # Prefer piped item target if present.
        target = ""
        if piped_items:
            target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
        if not target:
            target = url_arg
        table_name = ""
        try:
            table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
        except Exception:
            table_name = ""
        identifier = ""
        if piped_items:
            md = get_field(piped_items[0], "full_metadata")
            if isinstance(md, dict):
                identifier = str(md.get("identifier") or "").strip()
        if not identifier:
            identifier = _extract_ia_identifier(target)
        if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
            return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
        log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
        return 1
    @staticmethod
    def _run_internetarchive(item: Any, *, identifier: str) -> int:
        try:
            from Provider.internetarchive import _ia as _ia_loader
        except Exception as exc:
            log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
            return 1
        def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
            try:
                source = str(f.get("source") or "").strip().lower()
                fmt = str(f.get("format") or "").strip().lower()
            except Exception:
                source = ""
                fmt = ""
            if source == "metadata":
                return True
            if fmt in {"metadata", "archive bittorrent"}:
                return True
            if fmt.startswith("thumbnail"):
                return True
            return False
        ia = None
        try:
            ia = _ia_loader()
        except Exception as exc:
            log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
            return 1
        try:
            get_item = getattr(ia, "get_item", None)
            if not callable(get_item):
                raise Exception("internetarchive.get_item is not available")
            ia_item = cast(Any, get_item(str(identifier)))
        except Exception as exc:
            log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
            return 1
        files: List[Dict[str, Any]] = []
        try:
            raw_files = getattr(ia_item, "files", None)
            if isinstance(raw_files, list):
                for f in raw_files:
                    if isinstance(f, dict):
                        files.append(f)
        except Exception:
            files = []
        if not files:
            try:
                for f in ia_item.get_files():
                    name = getattr(f, "name", None)
                    if not name and isinstance(f, dict):
                        name = f.get("name")
                    if not name:
                        continue
                    files.append(
                        {
                            "name": str(name),
                            "size": getattr(f, "size", None),
                            "format": getattr(f, "format", None),
                            "source": getattr(f, "source", None),
                        }
                    )
            except Exception:
                files = []
        if not files:
            log("download-data: Internet Archive item has no files", file=sys.stderr)
            return 1
        # Prefer non-metadata files for the picker.
        candidates = [f for f in files if not _is_ia_metadata_file(f)]
        if not candidates:
            candidates = list(files)
        def _key(f: Dict[str, Any]) -> tuple[str, str]:
            fmt = str(f.get("format") or "").strip().lower()
            name = str(f.get("name") or "").strip().lower()
            return (fmt, name)
        candidates.sort(key=_key)
        title = ""
        try:
            title = str(get_field(item, "title") or "").strip()
        except Exception:
            title = ""
        table_title = f"Internet Archive: {title}".strip().rstrip(":")
        if not title:
            table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
        table = ResultTable(table_title).set_preserve_order(True)
        table.set_table("internetarchive.formats")
        # Selecting a row should expand to `download-file <direct-url>`.
        table.set_source_command("download-file", [])
        rows: List[Dict[str, Any]] = []
        for f in candidates:
            name = str(f.get("name") or "").strip()
            if not name:
                continue
            fmt = str(f.get("format") or "").strip()
            src = str(f.get("source") or "").strip()
            size_val: Any = f.get("size")
            try:
                size_val = int(size_val) if size_val not in (None, "") else ""
            except Exception:
                # Keep as-is; ResultTable will stringify.
                pass
            direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
            row_item: Dict[str, Any] = {
                "table": "internetarchive",
                "title": fmt or name,
                "path": direct_url,
                "url": direct_url,
                "columns": [
                    ("Format", fmt),
                    ("Name", name),
                    ("Size", size_val),
                    ("Source", src),
                ],
                # Used by @N expansion: download-file <direct-url>
                "_selection_args": [direct_url],
                "full_metadata": {
                    "identifier": identifier,
                    "name": name,
                    "format": fmt,
                    "source": src,
                    "size": f.get("size"),
                },
            }
            rows.append(row_item)
            table.add_result(row_item)
        if not rows:
            log("download-data: no downloadable files found for this item", file=sys.stderr)
            return 1
        try:
            pipeline_context.set_last_result_table(table, rows, subject=item)
            pipeline_context.set_current_stage_table(table)
        except Exception as exc:
            debug(f"[download-data] Failed to register result table: {exc}")
        return 0
 CMDLET = Download_Data()
--- a/cmdlet/merge_file.py
+++ b/cmdlet/merge_file.py
@@ -16,6 +16,7 @@ from . import _shared as sh
 Cmdlet = sh.Cmdlet
 CmdletArg = sh.CmdletArg
 SharedArgs = sh.SharedArgs
 create_pipe_object_result = sh.create_pipe_object_result
 get_field = sh.get_field
 get_pipe_object_hash = sh.get_pipe_object_hash
@@ -37,7 +38,6 @@ except ImportError:
 try:
    from metadata import (
        read_tags_from_file,
        dedup_tags_by_namespace,
        merge_multiple_tag_lists,
    )
    HAS_METADATA_API = True
@@ -87,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    delete_after = parsed.get("delete", False)
    output_override: Optional[Path] = None
-    output_arg = parsed.get("output")
+    output_arg = parsed.get("path")
    if output_arg:
        try:
            output_override = Path(str(output_arg)).expanduser()
@@ -928,10 +928,10 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
 CMDLET = Cmdlet(
    name="merge-file",
    summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
-    usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
+	usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
    arg=[
        CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
-        CmdletArg("-output", description="Override output file path."),
+		SharedArgs.PATH,
        CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
    ],
    detail=[
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
    "pypdf>=3.0.0",
    "mutagen>=1.46.0",
    "cbor2>=4.0",
    "zstandard>=0.23.0",
    # Image and media support
    "Pillow>=10.0.0",
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,6 +15,7 @@ internetarchive>=4.1.0
 pypdf>=3.0.0
 mutagen>=1.46.0
 cbor2>=4.0
 zstandard>=0.23.0
 # Image and media support
 Pillow>=10.0.0