df

2025-12-27 14:50:59 -08:00
parent 22af776ee2
commit fcdd507d00
12 changed files with 1004 additions and 66 deletions
--- a/cmdlet/add_tag.py
+++ b/cmdlet/add_tag.py
@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
    s = s.replace("\u2011", "-")  # non-breaking hyphen
    s = s.replace("\u2012", "-")  # figure dash
    s = s.replace("\u2015", "-")  # horizontal bar
+
+    # Collapse any whitespace runs (including newlines/tabs) to a single space.
+    # Some sources wrap the artist name or title across lines.
+    try:
+        s = re.sub(r"\s+", " ", s).strip()
+    except Exception:
+        s = " ".join(s.split())
    return s


@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
        if ch.isspace():
            while i < len(literal) and literal[i].isspace():
                i += 1
-            out.append(r"\\s*")
+            out.append(r"\s*")
            continue
        out.append(re.escape(ch))
        i += 1
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
        raise ValueError("extract template must contain at least one (field)")

    field_names: List[str] = []
-    parts: List[str] = [r"^\\s*"]
+    parts: List[str] = [r"^\s*"]
    last_end = 0

    for idx, m in enumerate(matches):
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
            raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
        field_names.append(raw_name)

+        name_lower = raw_name.lower()
        is_last = idx == (len(matches) - 1)
        if is_last:
            parts.append(fr"(?P<{raw_name}>.+)")
        else:
-            parts.append(fr"(?P<{raw_name}>.+?)")
+            # Heuristic: common numeric fields should capture full digit runs.
+            # This avoids ambiguous splits like track='2', title='3 ...'.
+            if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
+                parts.append(fr"(?P<{raw_name}>\d+)")
+            else:
+                parts.append(fr"(?P<{raw_name}>.+?)")

        last_end = m.end()

    tail = tpl[last_end:]
    if tail:
        parts.append(_literal_to_title_pattern_regex(tail))
-    parts.append(r"\\s*$")
+    parts.append(r"\s*$")

    rx = "".join(parts)
    return re.compile(rx, flags=re.IGNORECASE), field_names
--- a/cmdlet/archive_file.py
+++ b/cmdlet/archive_file.py
@@ -0,0 +1,426 @@
+"""Create a single .tar.zst archive from piped file selections."""
+
+from __future__ import annotations
+
+import re
+import sys
+import tarfile
+import tempfile
+import time
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Sequence, Set
+from urllib.parse import parse_qs, urlparse
+
+from SYS.logger import log
+
+import pipeline as ctx
+from config import resolve_output_dir
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
+coerce_to_pipe_object = sh.coerce_to_pipe_object
+create_pipe_object_result = sh.create_pipe_object_result
+parse_cmdlet_args = sh.parse_cmdlet_args
+should_show_help = sh.should_show_help
+
+
+_SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
+
+
+def _extract_sha256_hex(item: Any) -> str:
+	try:
+		if isinstance(item, dict):
+			h = item.get("hash")
+		else:
+			h = getattr(item, "hash", None)
+		if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
+			return h.strip().lower()
+	except Exception:
+		pass
+	return ""
+
+
+def _extract_store_name(item: Any) -> str:
+	try:
+		if isinstance(item, dict):
+			s = item.get("store")
+		else:
+			s = getattr(item, "store", None)
+		return str(s or "").strip()
+	except Exception:
+		return ""
+
+
+def _extract_url(item: Any) -> str:
+	try:
+		u = sh.get_field(item, "url") or sh.get_field(item, "target")
+		if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
+			return u.strip()
+	except Exception:
+		pass
+	return ""
+
+
+def _extract_hash_from_hydrus_file_url(url: str) -> str:
+	try:
+		parsed = urlparse(str(url))
+		if not (parsed.path or "").endswith("/get_files/file"):
+			return ""
+		qs = parse_qs(parsed.query or "")
+		h = (qs.get("hash") or [""])[0]
+		if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
+			return h.strip().lower()
+	except Exception:
+		pass
+	return ""
+
+
+def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
+	instances: Set[str] = set()
+	try:
+		store_cfg = config.get("store") if isinstance(config, dict) else None
+		if isinstance(store_cfg, dict):
+			hydrus_cfg = store_cfg.get("hydrusnetwork")
+			if isinstance(hydrus_cfg, dict):
+				instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
+	except Exception:
+		instances = set()
+	return instances
+
+
+def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
+	"""Download a Hydrus-backed item to a local temp path (best-effort).
+
+	This is intentionally side-effect free except for writing the local temp file.
+	"""
+	try:
+		from config import get_hydrus_access_key, get_hydrus_url
+		from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
+	except Exception:
+		return None
+
+	store_name = _extract_store_name(item)
+	store_lower = store_name.lower()
+	hydrus_instances = _hydrus_instance_names(config)
+	store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
+
+	url = _extract_url(item)
+	file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
+	if not file_hash:
+		return None
+
+	# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
+	is_hydrus_url = False
+	if url:
+		try:
+			parsed = urlparse(url)
+			is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
+		except Exception:
+			is_hydrus_url = False
+	if not (is_hydrus_url or store_hint):
+		return None
+
+	# Prefer store name as instance key; fall back to "home".
+	access_key = None
+	hydrus_url = None
+	for inst in [s for s in [store_lower, "home"] if s]:
+		try:
+			access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
+			hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
+			if access_key and hydrus_url:
+				break
+		except Exception:
+			access_key = None
+			hydrus_url = None
+
+	if not access_key or not hydrus_url:
+		return None
+
+	client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
+	file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
+
+	# Best-effort extension from Hydrus metadata.
+	suffix = ".hydrus"
+	try:
+		meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
+		entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
+		if isinstance(entries, list) and entries:
+			entry = entries[0]
+			if isinstance(entry, dict):
+				ext = entry.get("ext")
+				if isinstance(ext, str) and ext.strip():
+					cleaned = ext.strip()
+					if not cleaned.startswith("."):
+						cleaned = "." + cleaned.lstrip(".")
+					if len(cleaned) <= 12:
+						suffix = cleaned
+	except Exception:
+		pass
+
+	try:
+		output_dir.mkdir(parents=True, exist_ok=True)
+	except Exception:
+		pass
+
+	dest = output_dir / f"{file_hash}{suffix}"
+	if dest.exists():
+		dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
+
+	headers = {"Hydrus-Client-API-Access-Key": access_key}
+	download_hydrus_file(file_url, headers, dest, timeout=60.0)
+	try:
+		if dest.exists() and dest.is_file():
+			return dest
+	except Exception:
+		return None
+	return None
+
+
+def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
+	"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
+	# 1) Direct local path
+	try:
+		po = coerce_to_pipe_object(item, None)
+		raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
+		if raw_path:
+			p = Path(str(raw_path)).expanduser()
+			if p.exists():
+				return p, None
+	except Exception:
+		pass
+
+	# 2) Store-backed path
+	file_hash = _extract_sha256_hex(item)
+	store_name = _extract_store_name(item)
+	if file_hash and store_name:
+		try:
+			from Store import Store
+
+			store = Store(config)
+			backend = store[store_name]
+			src = backend.get_file(file_hash)
+			if isinstance(src, Path):
+				if src.exists():
+					return src, None
+			elif isinstance(src, str) and src.strip():
+				cand = Path(src).expanduser()
+				if cand.exists():
+					return cand, None
+				# If the backend returns a URL (HydrusNetwork), download it.
+				if src.strip().lower().startswith(("http://", "https://")):
+					tmp_base = None
+					try:
+						tmp_base = config.get("temp") if isinstance(config, dict) else None
+					except Exception:
+						tmp_base = None
+					out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
+					out_dir = out_dir / "archive" / "hydrus"
+					downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
+					if downloaded is not None:
+						return downloaded, downloaded
+		except Exception:
+			pass
+
+	# 3) Hydrus-backed items without backend.get_file path.
+	try:
+		tmp_base = config.get("temp") if isinstance(config, dict) else None
+	except Exception:
+		tmp_base = None
+	out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
+	out_dir = out_dir / "archive" / "hydrus"
+	downloaded = _maybe_download_hydrus_item(item, config, out_dir)
+	if downloaded is not None:
+		return downloaded, downloaded
+
+	return None, None
+
+
+def _unique_arcname(name: str, seen: Set[str]) -> str:
+	base = str(name or "").replace("\\", "/")
+	base = base.lstrip("/")
+	if not base:
+		base = "file"
+	if base not in seen:
+		seen.add(base)
+		return base
+
+	stem = base
+	suffix = ""
+	if "/" not in base:
+		p = Path(base)
+		stem = p.stem
+		suffix = p.suffix
+
+	n = 2
+	while True:
+		candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
+		if candidate not in seen:
+			seen.add(candidate)
+			return candidate
+		n += 1
+
+
+def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+	if should_show_help(args):
+		log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
+		return 0
+
+	parsed = parse_cmdlet_args(args, CMDLET)
+
+	level_raw = parsed.get("level")
+	try:
+		level = int(level_raw) if level_raw is not None else 11
+	except Exception:
+		level = 11
+	if level < 1:
+		level = 1
+	if level > 22:
+		level = 22
+
+	# Output destination is controlled by the shared -path behavior in the pipeline runner.
+	# This cmdlet always creates the archive in the configured output directory and emits it.
+
+	# Collect piped items; archive-file is a batch command (single output).
+	items: List[Any] = []
+	if isinstance(result, list):
+		items = list(result)
+	elif result is not None:
+		items = [result]
+
+	if not items:
+		log("No piped items provided to archive-file", file=sys.stderr)
+		return 1
+
+	temp_downloads: List[Path] = []
+	try:
+		paths: List[Path] = []
+		for it in items:
+			p, tmp = _resolve_existing_or_fetch_path(it, config)
+			if p is None:
+				continue
+			paths.append(p)
+			if tmp is not None:
+				temp_downloads.append(tmp)
+
+		# Keep stable order, remove duplicates.
+		uniq: List[Path] = []
+		seen_paths: Set[str] = set()
+		for p in paths:
+			key = str(p.resolve()) if p.exists() else str(p)
+			if key in seen_paths:
+				continue
+			seen_paths.add(key)
+			uniq.append(p)
+		paths = uniq
+
+		if not paths:
+			log("No existing file paths found in piped items", file=sys.stderr)
+			return 1
+
+		out_dir = resolve_output_dir(config)
+		try:
+			out_dir.mkdir(parents=True, exist_ok=True)
+		except Exception:
+			pass
+		stamp = time.strftime("%Y%m%d_%H%M%S")
+		out_path = out_dir / f"archive_{stamp}.tar.zst"
+		try:
+			out_path = sh._unique_destination_path(out_path)  # type: ignore[attr-defined]
+		except Exception:
+			pass
+
+		try:
+			out_path.parent.mkdir(parents=True, exist_ok=True)
+		except Exception as exc:
+			log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
+			return 1
+
+		# Import zstandard lazily so the rest of the CLI still runs without it.
+		try:
+			import zstandard as zstd  # type: ignore
+		except Exception:
+			log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
+			return 1
+
+		# Write tar stream into zstd stream.
+		try:
+			with open(out_path, "wb") as out_handle:
+				cctx = zstd.ZstdCompressor(level=level)
+				with cctx.stream_writer(out_handle) as compressor:
+					with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
+						seen_names: Set[str] = set()
+						for p in paths:
+							arcname = _unique_arcname(p.name, seen_names)
+							# For directories, tarfile will include contents when recursive=True.
+							try:
+								tf.add(str(p), arcname=arcname, recursive=True)
+							except Exception as exc:
+								log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
+		except Exception as exc:
+			log(f"Archive creation failed: {exc}", file=sys.stderr)
+			return 1
+
+		# Emit a single artifact downstream.
+		hash_value = None
+		try:
+			from SYS.utils import sha256_file
+
+			hash_value = sha256_file(out_path)
+		except Exception:
+			hash_value = None
+
+		pipe_obj = create_pipe_object_result(
+			source="archive",
+			identifier=out_path.stem,
+			file_path=str(out_path),
+			cmdlet_name="archive-file",
+			title=out_path.name,
+			hash_value=hash_value,
+			is_temp=True,
+			store="PATH",
+			extra={
+				"target": str(out_path),
+				"archive_format": "tar.zst",
+				"compression": "zstd",
+				"level": level,
+				"source_count": len(paths),
+				"source_paths": [str(p) for p in paths],
+			},
+		)
+		ctx.emit(pipe_obj)
+		return 0
+	finally:
+		# Best-effort cleanup of any temp Hydrus downloads we created.
+		for tmp in temp_downloads:
+			try:
+				tmp.unlink(missing_ok=True)  # type: ignore[arg-type]
+			except TypeError:
+				try:
+					if tmp.exists():
+						tmp.unlink()
+				except Exception:
+					pass
+			except Exception:
+				pass
+
+
+CMDLET = Cmdlet(
+	name="archive-file",
+	summary="Archive piped files into a single .tar.zst.",
+	usage="@N | archive-file [-level <1-22>] [-path <path>]",
+	arg=[
+		CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
+		SharedArgs.PATH,
+	],
+	detail=[
+		"- Example: @1-5 | archive-file",
+		"- Default zstd level is 11.",
+		"- Emits one output item (the archive) for downstream piping.",
+	],
+)
+
+CMDLET.exec = _run
+CMDLET.register()
--- a/cmdlet/delete_file.py
+++ b/cmdlet/delete_file.py
@@ -297,51 +297,81 @@ class Delete_File(sh.Cmdlet):
            should_try_hydrus = False
            
        if should_try_hydrus and hash_hex:
-            client = None
-            if store:
-                # Store specified: do not fall back to a global/default Hydrus client.
-                try:
-                    registry = Store(config)
-                    backend = registry[str(store)]
-                    candidate = getattr(backend, "_client", None)
-                    if candidate is not None and hasattr(candidate, "_post"):
-                        client = candidate
-                except Exception as exc:
-                    if not local_deleted:
-                        log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
-                        return False
-                if client is None:
-                    if not local_deleted:
-                        log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
-                        return False
-            else:
-                # No store context; use default Hydrus client.
-                try:
-                    client = hydrus_wrapper.get_client(config)
-                except Exception as exc:
-                    if not local_deleted:
-                        log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
-                        return False
-                if client is None:
-                    if not local_deleted:
-                        log("Hydrus client unavailable", file=sys.stderr)
-                        return False
-
-            payload: Dict[str, Any] = {"hashes": [hash_hex]}
-            if reason:
-                payload["reason"] = reason
+            # Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
+            # This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
+            did_backend_delete = False
            try:
-                client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
+                if backend is not None:
+                    deleter = getattr(backend, "delete_file", None)
+                    if callable(deleter):
+                        did_backend_delete = bool(deleter(hash_hex, reason=reason))
+            except Exception:
+                did_backend_delete = False
+
+            if did_backend_delete:
                hydrus_deleted = True
                title_str = str(title_val).strip() if title_val else ""
                if title_str:
                    debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
                else:
                    debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
-            except Exception:
-                # If it's not in Hydrus (e.g. 404 or similar), that's fine
-                if not local_deleted:
-                    return []
+            else:
+                # Fallback to direct client calls.
+                client = None
+                if store:
+                    # Store specified: do not fall back to a global/default Hydrus client.
+                    try:
+                        registry = Store(config)
+                        backend = registry[str(store)]
+                        candidate = getattr(backend, "_client", None)
+                        if candidate is not None and hasattr(candidate, "_post"):
+                            client = candidate
+                    except Exception as exc:
+                        if not local_deleted:
+                            log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
+                            return False
+                    if client is None:
+                        if not local_deleted:
+                            log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
+                            return False
+                else:
+                    # No store context; use default Hydrus client.
+                    try:
+                        client = hydrus_wrapper.get_client(config)
+                    except Exception as exc:
+                        if not local_deleted:
+                            log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
+                            return False
+                    if client is None:
+                        if not local_deleted:
+                            log("Hydrus client unavailable", file=sys.stderr)
+                            return False
+
+                payload: Dict[str, Any] = {"hashes": [hash_hex]}
+                if reason:
+                    payload["reason"] = reason
+                try:
+                    client._post("/add_files/delete_files", data=payload)  # type: ignore[attr-defined]
+                    # Best-effort clear deletion record if supported by this client.
+                    try:
+                        clearer = getattr(client, "clear_file_deletion_record", None)
+                        if callable(clearer):
+                            clearer([hash_hex])
+                        else:
+                            client._post("/add_files/clear_file_deletion_record", data={"hashes": [hash_hex]})  # type: ignore[attr-defined]
+                    except Exception:
+                        pass
+
+                    hydrus_deleted = True
+                    title_str = str(title_val).strip() if title_val else ""
+                    if title_str:
+                        debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
+                    else:
+                        debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
+                except Exception:
+                    # If it's not in Hydrus (e.g. 404 or similar), that's fine
+                    if not local_deleted:
+                        return []

        if hydrus_deleted and hash_hex:
            size_hint = None
--- a/cmdlet/download_data.py
+++ b/cmdlet/download_data.py
@@ -0,0 +1,267 @@
+"""Smart downloader front-door.
+
+Currently focused on Internet Archive item pages:
+- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
+- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
+- Selecting a row via @N expands to download-file <direct-url>
+
+This enables:
+  search-provider -provider internetarchive "..."
+  @3                # shows formats table
+  @2 | add-file ...  # downloads selected file then pipes to add-file
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from typing import Any, Dict, List, Sequence, cast
+from urllib.parse import quote
+
+from SYS.logger import log, debug
+import pipeline as pipeline_context
+from result_table import ResultTable
+
+from . import _shared as sh
+
+Cmdlet = sh.Cmdlet
+SharedArgs = sh.SharedArgs
+parse_cmdlet_args = sh.parse_cmdlet_args
+get_field = sh.get_field
+
+
+def _extract_ia_identifier(text: str) -> str:
+    s = str(text or "").strip()
+    if not s:
+        return ""
+
+    # https://archive.org/details/<identifier>
+    m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
+    if m:
+        return str(m.group(1) or "").strip()
+
+    # internetarchive:<identifier>
+    if s.lower().startswith("internetarchive:"):
+        return s.split(":", 1)[-1].strip()
+
+    return ""
+
+
+class Download_Data(Cmdlet):
+    def __init__(self) -> None:
+        super().__init__(
+            name="download-data",
+            summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
+            usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
+            alias=[],
+            arg=[SharedArgs.URL],
+            detail=[
+                "For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
+                "Select a file row with @N to run download-file on that direct URL.",
+            ],
+            exec=self.run,
+        )
+        self.register()
+
+    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+        try:
+            # parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
+            parsed = parse_cmdlet_args(args, cast(Any, self))
+        except Exception:
+            parsed = {}
+
+        raw_urls = parsed.get("url", [])
+        if isinstance(raw_urls, str):
+            raw_urls = [raw_urls]
+        url_arg = str(raw_urls[0]).strip() if raw_urls else ""
+
+        piped_items: List[Any] = []
+        if isinstance(result, list):
+            piped_items = list(result)
+        elif result is not None:
+            piped_items = [result]
+
+        # Prefer piped item target if present.
+        target = ""
+        if piped_items:
+            target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
+        if not target:
+            target = url_arg
+
+        table_name = ""
+        try:
+            table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
+        except Exception:
+            table_name = ""
+
+        identifier = ""
+        if piped_items:
+            md = get_field(piped_items[0], "full_metadata")
+            if isinstance(md, dict):
+                identifier = str(md.get("identifier") or "").strip()
+        if not identifier:
+            identifier = _extract_ia_identifier(target)
+
+        if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
+            return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
+
+        log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
+        return 1
+
+    @staticmethod
+    def _run_internetarchive(item: Any, *, identifier: str) -> int:
+        try:
+            from Provider.internetarchive import _ia as _ia_loader
+        except Exception as exc:
+            log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
+            return 1
+
+        def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
+            try:
+                source = str(f.get("source") or "").strip().lower()
+                fmt = str(f.get("format") or "").strip().lower()
+            except Exception:
+                source = ""
+                fmt = ""
+
+            if source == "metadata":
+                return True
+            if fmt in {"metadata", "archive bittorrent"}:
+                return True
+            if fmt.startswith("thumbnail"):
+                return True
+            return False
+
+        ia = None
+        try:
+            ia = _ia_loader()
+        except Exception as exc:
+            log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
+            return 1
+
+        try:
+            get_item = getattr(ia, "get_item", None)
+            if not callable(get_item):
+                raise Exception("internetarchive.get_item is not available")
+            ia_item = cast(Any, get_item(str(identifier)))
+        except Exception as exc:
+            log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
+            return 1
+
+        files: List[Dict[str, Any]] = []
+        try:
+            raw_files = getattr(ia_item, "files", None)
+            if isinstance(raw_files, list):
+                for f in raw_files:
+                    if isinstance(f, dict):
+                        files.append(f)
+        except Exception:
+            files = []
+
+        if not files:
+            try:
+                for f in ia_item.get_files():
+                    name = getattr(f, "name", None)
+                    if not name and isinstance(f, dict):
+                        name = f.get("name")
+                    if not name:
+                        continue
+                    files.append(
+                        {
+                            "name": str(name),
+                            "size": getattr(f, "size", None),
+                            "format": getattr(f, "format", None),
+                            "source": getattr(f, "source", None),
+                        }
+                    )
+            except Exception:
+                files = []
+
+        if not files:
+            log("download-data: Internet Archive item has no files", file=sys.stderr)
+            return 1
+
+        # Prefer non-metadata files for the picker.
+        candidates = [f for f in files if not _is_ia_metadata_file(f)]
+        if not candidates:
+            candidates = list(files)
+
+        def _key(f: Dict[str, Any]) -> tuple[str, str]:
+            fmt = str(f.get("format") or "").strip().lower()
+            name = str(f.get("name") or "").strip().lower()
+            return (fmt, name)
+
+        candidates.sort(key=_key)
+
+        title = ""
+        try:
+            title = str(get_field(item, "title") or "").strip()
+        except Exception:
+            title = ""
+
+        table_title = f"Internet Archive: {title}".strip().rstrip(":")
+        if not title:
+            table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
+
+        table = ResultTable(table_title).set_preserve_order(True)
+        table.set_table("internetarchive.formats")
+        # Selecting a row should expand to `download-file <direct-url>`.
+        table.set_source_command("download-file", [])
+
+        rows: List[Dict[str, Any]] = []
+        for f in candidates:
+            name = str(f.get("name") or "").strip()
+            if not name:
+                continue
+
+            fmt = str(f.get("format") or "").strip()
+            src = str(f.get("source") or "").strip()
+
+            size_val: Any = f.get("size")
+            try:
+                size_val = int(size_val) if size_val not in (None, "") else ""
+            except Exception:
+                # Keep as-is; ResultTable will stringify.
+                pass
+
+            direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
+
+            row_item: Dict[str, Any] = {
+                "table": "internetarchive",
+                "title": fmt or name,
+                "path": direct_url,
+                "url": direct_url,
+                "columns": [
+                    ("Format", fmt),
+                    ("Name", name),
+                    ("Size", size_val),
+                    ("Source", src),
+                ],
+                # Used by @N expansion: download-file <direct-url>
+                "_selection_args": [direct_url],
+                "full_metadata": {
+                    "identifier": identifier,
+                    "name": name,
+                    "format": fmt,
+                    "source": src,
+                    "size": f.get("size"),
+                },
+            }
+
+            rows.append(row_item)
+            table.add_result(row_item)
+
+        if not rows:
+            log("download-data: no downloadable files found for this item", file=sys.stderr)
+            return 1
+
+        try:
+            pipeline_context.set_last_result_table(table, rows, subject=item)
+            pipeline_context.set_current_stage_table(table)
+        except Exception as exc:
+            debug(f"[download-data] Failed to register result table: {exc}")
+
+        return 0
+
+
+CMDLET = Download_Data()
--- a/cmdlet/merge_file.py
+++ b/cmdlet/merge_file.py
@@ -16,6 +16,7 @@ from . import _shared as sh

 Cmdlet = sh.Cmdlet
 CmdletArg = sh.CmdletArg
+SharedArgs = sh.SharedArgs
 create_pipe_object_result = sh.create_pipe_object_result
 get_field = sh.get_field
 get_pipe_object_hash = sh.get_pipe_object_hash
@@ -37,7 +38,6 @@ except ImportError:
 try:
    from metadata import (
        read_tags_from_file,
-        dedup_tags_by_namespace,
        merge_multiple_tag_lists,
    )
    HAS_METADATA_API = True
@@ -87,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    delete_after = parsed.get("delete", False)
    
    output_override: Optional[Path] = None
-    output_arg = parsed.get("output")
+    output_arg = parsed.get("path")
    if output_arg:
        try:
            output_override = Path(str(output_arg)).expanduser()
@@ -928,10 +928,10 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
 CMDLET = Cmdlet(
    name="merge-file",
    summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
-    usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
+	usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
    arg=[
        CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
-        CmdletArg("-output", description="Override output file path."),
+		SharedArgs.PATH,
        CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
    ],
    detail=[