df

2026-01-05 07:51:19 -08:00
parent 8545367e28
commit 1f765cffda
32 changed files with 3447 additions and 3250 deletions
--- a/API/HTTP.py
+++ b/API/HTTP.py
@@ -10,10 +10,24 @@ Provides synchronous and asynchronous HTTP operations with:

 import httpx
 import asyncio
-from typing import Optional, Dict, Any, Callable, BinaryIO
+import sys
+import time
+import traceback
+import re
+from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set
 from pathlib import Path
+from urllib.parse import unquote, urlparse, parse_qs
 import logging

+from SYS.logger import debug, log
+from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
+from SYS.utils import ensure_directory, sha256_file
+
+try:  # Optional; used for metadata extraction when available
+    from SYS.metadata import extract_ytdlp_tags
+except Exception:  # pragma: no cover - optional dependency
+    extract_ytdlp_tags = None  # type: ignore[assignment]
+
 logger = logging.getLogger(__name__)

 # Default configuration
@@ -366,6 +380,359 @@ class HTTPClient:
        return self._client.stream(method, url, **kwargs)


+def download_direct_file(
+    url: str,
+    output_dir: Path,
+    debug_logger: Optional[DebugLogger] = None,
+    quiet: bool = False,
+    suggested_filename: Optional[str] = None,
+    pipeline_progress: Optional[Any] = None,
+) -> DownloadMediaResult:
+    """Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""
+
+    ensure_directory(output_dir)
+
+    def _sanitize_filename(name: str) -> str:
+        # Windows-safe filename sanitization.
+        text = str(name or "").strip()
+        if not text:
+            return ""
+        text = text.replace("/", "\\")
+        text = text.split("\\")[-1]
+
+        invalid = set('<>:"/\\|?*')
+        cleaned_chars: List[str] = []
+        for ch in text:
+            o = ord(ch)
+            if o < 32 or ch in invalid:
+                cleaned_chars.append(" ")
+                continue
+            cleaned_chars.append(ch)
+        cleaned = " ".join("".join(cleaned_chars).split()).strip()
+        cleaned = cleaned.rstrip(" .")
+        return cleaned
+
+    def _unique_path(path: Path) -> Path:
+        if not path.exists():
+            return path
+        stem = path.stem
+        suffix = path.suffix
+        parent = path.parent
+        for i in range(1, 10_000):
+            candidate = parent / f"{stem} ({i}){suffix}"
+            if not candidate.exists():
+                return candidate
+        return parent / f"{stem} ({int(time.time())}){suffix}"
+
+    parsed_url = urlparse(url)
+    url_path = parsed_url.path
+
+    filename: Optional[str] = None
+    if parsed_url.query:
+        query_params = parse_qs(parsed_url.query)
+        for param_name in ("filename", "download", "file", "name"):
+            if param_name in query_params and query_params[param_name]:
+                filename = query_params[param_name][0]
+                filename = unquote(filename)
+                break
+
+    if not filename or not filename.strip():
+        filename = url_path.split("/")[-1] if url_path else ""
+        filename = unquote(filename)
+
+    if "?" in filename:
+        filename = filename.split("?")[0]
+
+    content_type = ""
+    try:
+        with HTTPClient(timeout=10.0) as client:
+            response = client._request("HEAD", url, follow_redirects=True)
+            content_disposition = response.headers.get("content-disposition", "")
+            try:
+                content_type = str(response.headers.get("content-type", "") or "").strip().lower()
+            except Exception:
+                content_type = ""
+
+            if content_disposition:
+                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
+                if match:
+                    extracted_name = match.group(1) or match.group(2)
+                    if extracted_name:
+                        filename = unquote(extracted_name)
+                        if not quiet:
+                            debug(f"Filename from Content-Disposition: {filename}")
+    except Exception as exc:
+        if not quiet:
+            log(f"Could not get filename from headers: {exc}", file=sys.stderr)
+
+    try:
+        page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
+        ext = ""
+        try:
+            ext = Path(str(filename or "")).suffix.lower()
+        except Exception:
+            ext = ""
+
+        ct0 = (content_type or "").split(";", 1)[0].strip().lower()
+        must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)
+
+        if must_probe:
+            with HTTPClient(timeout=10.0) as client:
+                with client._request_stream("GET", url, follow_redirects=True) as resp:
+                    resp.raise_for_status()
+                    ct = (
+                        str(resp.headers.get("content-type", "") or "")
+                        .split(";", 1)[0]
+                        .strip()
+                        .lower()
+                    )
+                    if ct.startswith("text/html"):
+                        raise DownloadError("URL appears to be an HTML page, not a direct file")
+    except DownloadError:
+        raise
+    except Exception:
+        pass
+
+    suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
+    if suggested:
+        suggested_path = Path(suggested)
+        if suggested_path.suffix:
+            filename = suggested
+        else:
+            detected_ext = ""
+            try:
+                detected_ext = Path(str(filename)).suffix
+            except Exception:
+                detected_ext = ""
+            filename = suggested + detected_ext if detected_ext else suggested
+
+    try:
+        has_ext = bool(filename and Path(str(filename)).suffix)
+    except Exception:
+        has_ext = False
+
+    if filename and (not has_ext):
+        ct = (content_type or "").split(";", 1)[0].strip().lower()
+        ext_by_ct = {
+            "application/pdf": ".pdf",
+            "application/epub+zip": ".epub",
+            "application/x-mobipocket-ebook": ".mobi",
+            "image/jpeg": ".jpg",
+            "image/png": ".png",
+            "image/webp": ".webp",
+            "image/gif": ".gif",
+            "text/plain": ".txt",
+            "application/zip": ".zip",
+        }
+
+        if ct in ext_by_ct:
+            filename = f"{filename}{ext_by_ct[ct]}"
+        elif ct.startswith("text/html"):
+            raise DownloadError("URL appears to be an HTML page, not a direct file")
+
+    if not filename or not str(filename).strip():
+        raise DownloadError(
+            "Could not determine filename for URL (no Content-Disposition and no path filename)"
+        )
+
+    file_path = _unique_path(output_dir / str(filename))
+
+    use_pipeline_transfer = False
+    try:
+        if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
+            ui = None
+            if hasattr(pipeline_progress, "ui_and_pipe_index"):
+                ui, _ = pipeline_progress.ui_and_pipe_index()  # type: ignore[attr-defined]
+            use_pipeline_transfer = ui is not None
+    except Exception:
+        use_pipeline_transfer = False
+
+    progress_bar: Optional[ProgressBar] = None
+    if (not quiet) and (not use_pipeline_transfer):
+        progress_bar = ProgressBar()
+
+    transfer_started = [False]
+
+    if not quiet:
+        debug(f"Direct download: {filename}")
+
+    try:
+        start_time = time.time()
+        downloaded_bytes = [0]
+        transfer_started[0] = False
+
+        def _maybe_begin_transfer(content_length: int) -> None:
+            if pipeline_progress is None or transfer_started[0]:
+                return
+            try:
+                total_val: Optional[int] = (
+                    int(content_length)
+                    if isinstance(content_length, int) and content_length > 0
+                    else None
+                )
+            except Exception:
+                total_val = None
+            try:
+                if hasattr(pipeline_progress, "begin_transfer"):
+                    pipeline_progress.begin_transfer(
+                        label=str(filename or "download"),
+                        total=total_val,
+                    )
+                    transfer_started[0] = True
+            except Exception:
+                return
+
+        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
+            downloaded_bytes[0] = int(bytes_downloaded or 0)
+
+            try:
+                if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
+                    _maybe_begin_transfer(content_length)
+                    total_val: Optional[int] = (
+                        int(content_length)
+                        if isinstance(content_length, int) and content_length > 0
+                        else None
+                    )
+                    pipeline_progress.update_transfer(
+                        label=str(filename or "download"),
+                        completed=int(bytes_downloaded or 0),
+                        total=total_val,
+                    )
+            except Exception:
+                pass
+
+            if progress_bar is not None:
+                progress_bar.update(
+                    downloaded=int(bytes_downloaded or 0),
+                    total=int(content_length) if content_length and content_length > 0 else None,
+                    label=str(filename or "download"),
+                    file=sys.stderr,
+                )
+
+        with HTTPClient(timeout=30.0) as client:
+            client.download(url, str(file_path), progress_callback=progress_callback)
+
+        elapsed = time.time() - start_time
+
+        try:
+            if progress_bar is not None:
+                progress_bar.finish()
+        except Exception:
+            pass
+
+        try:
+            if pipeline_progress is not None and transfer_started[0] and hasattr(
+                pipeline_progress, "finish_transfer"
+            ):
+                pipeline_progress.finish_transfer(label=str(filename or "download"))
+        except Exception:
+            pass
+
+        if not quiet:
+            debug(f"✓ Downloaded in {elapsed:.1f}s")
+
+        ext_out = ""
+        try:
+            ext_out = Path(str(filename)).suffix.lstrip(".")
+        except Exception:
+            ext_out = ""
+
+        info: Dict[str, Any] = {
+            "id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
+            "ext": ext_out,
+            "webpage_url": url,
+        }
+
+        hash_value = None
+        try:
+            hash_value = sha256_file(file_path)
+        except Exception:
+            pass
+
+        tags: List[str] = []
+        if extract_ytdlp_tags:
+            try:
+                tags = extract_ytdlp_tags(info)
+            except Exception as exc:
+                log(f"Error extracting tags: {exc}", file=sys.stderr)
+
+        if not any(str(t).startswith("title:") for t in tags):
+            info["title"] = str(filename)
+            tags = []
+            if extract_ytdlp_tags:
+                try:
+                    tags = extract_ytdlp_tags(info)
+                except Exception as exc:
+                    log(f"Error extracting tags with filename: {exc}", file=sys.stderr)
+
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "direct-file-downloaded",
+                {"url": url, "path": str(file_path), "hash": hash_value},
+            )
+
+        return DownloadMediaResult(
+            path=file_path,
+            info=info,
+            tag=tags,
+            source_url=url,
+            hash_value=hash_value,
+        )
+
+    except (httpx.HTTPError, httpx.RequestError) as exc:
+        try:
+            if progress_bar is not None:
+                progress_bar.finish()
+        except Exception:
+            pass
+        try:
+            if pipeline_progress is not None and transfer_started[0] and hasattr(
+                pipeline_progress, "finish_transfer"
+            ):
+                pipeline_progress.finish_transfer(label=str(filename or "download"))
+        except Exception:
+            pass
+
+        log(f"Download error: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {"phase": "direct-file", "url": url, "error": str(exc)},
+            )
+        raise DownloadError(f"Failed to download {url}: {exc}") from exc
+
+    except Exception as exc:
+        try:
+            if progress_bar is not None:
+                progress_bar.finish()
+        except Exception:
+            pass
+        try:
+            if pipeline_progress is not None and transfer_started[0] and hasattr(
+                pipeline_progress, "finish_transfer"
+            ):
+                pipeline_progress.finish_transfer(label=str(filename or "download"))
+        except Exception:
+            pass
+
+        log(f"Error downloading file: {exc}", file=sys.stderr)
+        if debug_logger is not None:
+            debug_logger.write_record(
+                "exception",
+                {
+                    "phase": "direct-file",
+                    "url": url,
+                    "error": str(exc),
+                    "traceback": traceback.format_exc(),
+                },
+            )
+        raise DownloadError(f"Error downloading file: {exc}") from exc
+
+
+# Back-compat alias
+_download_direct_file = download_direct_file
+
+
 class AsyncHTTPClient:
    """Unified async HTTP client with asyncio support."""

--- a/API/HydrusNetwork.py
+++ b/API/HydrusNetwork.py
@@ -11,6 +11,7 @@ import shutil
 import subprocess
 import sys
 import time
+from collections import deque

 from SYS.logger import log
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
@@ -18,8 +19,8 @@ import tempfile
 import logging
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
-from urllib.parse import urlsplit, urlencode, quote
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, cast
+from urllib.parse import urlsplit, urlencode, quote, urlunsplit, unquote
 import httpx

 logger = logging.getLogger(__name__)
@@ -1828,3 +1829,742 @@ def download_hydrus_file(
        print_final_progress(filename, file_size, elapsed)

    return downloaded
+
+
+# ============================================================================
+# Hydrus metadata helpers (moved from SYS.metadata)
+# ============================================================================
+
+
+def _normalize_hash(value: Any) -> str:
+    candidate = str(value or "").strip().lower()
+    if not candidate:
+        raise ValueError("Hydrus hash is required")
+    if len(candidate) != 64 or any(ch not in "0123456789abcdef" for ch in candidate):
+        raise ValueError("Hydrus hash must be a 64-character hex string")
+    return candidate
+
+
+def _normalize_tag(tag: Any) -> Optional[str]:
+    if tag is None:
+        return None
+    if isinstance(tag, str):
+        candidate = tag.strip()
+    else:
+        candidate = str(tag).strip()
+    return candidate or None
+
+
+def _dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
+    if not tags:
+        return []
+
+    namespace_to_tags: Dict[Optional[str], List[Tuple[int, str]]] = {}
+    first_appearance: Dict[Optional[str], int] = {}
+
+    for idx, tag in enumerate(tags):
+        namespace: Optional[str] = tag.split(":", 1)[0] if ":" in tag else None
+        if namespace not in first_appearance:
+            first_appearance[namespace] = idx
+        if namespace not in namespace_to_tags:
+            namespace_to_tags[namespace] = []
+        namespace_to_tags[namespace].append((idx, tag))
+
+    result: List[Tuple[int, str]] = []
+    for namespace, tag_list in namespace_to_tags.items():
+        chosen_tag = tag_list[0][1] if keep_first else tag_list[-1][1]
+        result.append((first_appearance[namespace], chosen_tag))
+
+    result.sort(key=lambda x: x[0])
+    return [tag for _, tag in result]
+
+
+def _extract_tag_services(entry: Dict[str, Any]) -> List[Dict[str, Any]]:
+    tags_section = entry.get("tags")
+    services: List[Dict[str, Any]] = []
+    if not isinstance(tags_section, dict):
+        return services
+    names_map = tags_section.get("service_keys_to_names")
+    if not isinstance(names_map, dict):
+        names_map = {}
+
+    def get_record(service_key: Optional[str], service_name: Optional[str]) -> Dict[str, Any]:
+        key_lower = service_key.lower() if isinstance(service_key, str) else None
+        name_lower = service_name.lower() if isinstance(service_name, str) else None
+        for record in services:
+            existing_key = record.get("service_key")
+            if key_lower and isinstance(existing_key, str) and existing_key.lower() == key_lower:
+                if service_name and not record.get("service_name"):
+                    record["service_name"] = service_name
+                return record
+            existing_name = record.get("service_name")
+            if name_lower and isinstance(existing_name, str) and existing_name.lower() == name_lower:
+                if service_key and not record.get("service_key"):
+                    record["service_key"] = service_key
+                return record
+        record = {
+            "service_key": service_key,
+            "service_name": service_name,
+            "tags": [],
+        }
+        services.append(record)
+        return record
+
+    def _iter_current_status_lists(container: Any) -> Iterable[List[Any]]:
+        if isinstance(container, dict):
+            for status_key, tags_list in container.items():
+                if str(status_key) != "0":
+                    continue
+                if isinstance(tags_list, list):
+                    yield tags_list
+        elif isinstance(container, list):
+            yield container
+
+    statuses_map = tags_section.get("service_keys_to_statuses_to_tags")
+    if isinstance(statuses_map, dict):
+        for service_key, status_map in statuses_map.items():
+            record = get_record(service_key if isinstance(service_key, str) else None, names_map.get(service_key))
+            for tags_list in _iter_current_status_lists(status_map):
+                for tag in tags_list:
+                    normalized = _normalize_tag(tag)
+                    if normalized:
+                        record["tags"].append(normalized)
+
+    ignored_keys = {
+        "service_keys_to_statuses_to_tags",
+        "service_keys_to_statuses_to_display_tags",
+        "service_keys_to_display_friendly_tags",
+        "service_keys_to_names",
+        "tag_display_types_to_namespaces",
+        "namespace_display_string_lookup",
+        "tag_display_decoration_colour_lookup",
+    }
+
+    for key, service in tags_section.items():
+        if key in ignored_keys:
+            continue
+        if isinstance(service, dict):
+            service_key = service.get("service_key") or (key if isinstance(key, str) else None)
+            service_name = service.get("service_name") or service.get("name") or names_map.get(service_key)
+            record = get_record(service_key if isinstance(service_key, str) else None, service_name)
+            storage = service.get("storage_tags") or service.get("statuses_to_tags") or service.get("tags")
+            if isinstance(storage, dict):
+                for tags_list in _iter_current_status_lists(storage):
+                    for tag in tags_list:
+                        normalized = _normalize_tag(tag)
+                        if normalized:
+                            record["tags"].append(normalized)
+            elif isinstance(storage, list):
+                for tag in storage:
+                    normalized = _normalize_tag(tag)
+                    if normalized:
+                        record["tags"].append(normalized)
+
+    for record in services:
+        record["tags"] = _dedup_tags_by_namespace(record["tags"], keep_first=True)
+    return services
+
+
+def _select_primary_tags(
+    services: List[Dict[str, Any]],
+    aggregated: List[str],
+    prefer_service: Optional[str]
+) -> Tuple[Optional[str], List[str]]:
+    prefer_lower = prefer_service.lower() if isinstance(prefer_service, str) else None
+    if prefer_lower:
+        for record in services:
+            name = record.get("service_name")
+            if isinstance(name, str) and name.lower() == prefer_lower and record["tags"]:
+                return record.get("service_key"), record["tags"]
+    for record in services:
+        if record["tags"]:
+            return record.get("service_key"), record["tags"]
+    return None, aggregated
+
+
+def _derive_title(
+    tags_primary: List[str],
+    tags_aggregated: List[str],
+    entry: Dict[str, Any]
+) -> Optional[str]:
+    for source in (tags_primary, tags_aggregated):
+        for tag in source:
+            namespace, sep, value = tag.partition(":")
+            if sep and namespace and namespace.lower() == "title":
+                cleaned = value.strip()
+                if cleaned:
+                    return cleaned
+    for key in (
+        "title",
+        "display_name",
+        "pretty_name",
+        "original_display_filename",
+        "original_filename",
+    ):
+        value = entry.get(key)
+        if isinstance(value, str):
+            cleaned = value.strip()
+            if cleaned:
+                return cleaned
+    return None
+
+
+def _derive_clip_time(
+    tags_primary: List[str],
+    tags_aggregated: List[str],
+    entry: Dict[str, Any]
+) -> Optional[str]:
+    namespaces = {"clip", "clip_time", "cliptime"}
+    for source in (tags_primary, tags_aggregated):
+        for tag in source:
+            namespace, sep, value = tag.partition(":")
+            if sep and namespace and namespace.lower() in namespaces:
+                cleaned = value.strip()
+                if cleaned:
+                    return cleaned
+    clip_value = entry.get("clip_time")
+    if isinstance(clip_value, str):
+        cleaned_clip = clip_value.strip()
+        if cleaned_clip:
+            return cleaned_clip
+    return None
+
+
+def _summarize_hydrus_entry(
+    entry: Dict[str, Any],
+    prefer_service: Optional[str]
+) -> Tuple[Dict[str, Any], List[str], Optional[str], Optional[str], Optional[str]]:
+    services = _extract_tag_services(entry)
+    aggregated: List[str] = []
+    seen: Set[str] = set()
+    for record in services:
+        for tag in record["tags"]:
+            if tag not in seen:
+                seen.add(tag)
+                aggregated.append(tag)
+    service_key, primary_tags = _select_primary_tags(services, aggregated, prefer_service)
+    title = _derive_title(primary_tags, aggregated, entry)
+    clip_time = _derive_clip_time(primary_tags, aggregated, entry)
+    summary = dict(entry)
+    if title and not summary.get("title"):
+        summary["title"] = title
+    if clip_time and not summary.get("clip_time"):
+        summary["clip_time"] = clip_time
+    summary["tag_service_key"] = service_key
+    summary["has_current_file_service"] = _has_current_file_service(entry)
+    if "is_local" not in summary:
+        summary["is_local"] = bool(entry.get("is_local"))
+    return summary, primary_tags, service_key, title, clip_time
+
+
+def _looks_like_hash(value: Any) -> bool:
+    if not isinstance(value, str):
+        return False
+    candidate = value.strip().lower()
+    return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
+
+
+def _collect_relationship_hashes(payload: Any, accumulator: Set[str]) -> None:
+    if isinstance(payload, dict):
+        for value in payload.values():
+            _collect_relationship_hashes(value, accumulator)
+    elif isinstance(payload, (list, tuple, set)):
+        for value in payload:
+            _collect_relationship_hashes(value, accumulator)
+    elif isinstance(payload, str) and _looks_like_hash(payload):
+        accumulator.add(payload)
+
+
+def _generate_hydrus_url_variants(url: str) -> List[str]:
+    seen: Set[str] = set()
+    variants: List[str] = []
+
+    def push(candidate: Optional[str]) -> None:
+        if not candidate:
+            return
+        text = candidate.strip()
+        if not text or text in seen:
+            return
+        seen.add(text)
+        variants.append(text)
+
+    push(url)
+    try:
+        parsed = urlsplit(url)
+    except Exception:
+        return variants
+
+    if parsed.scheme in {"http", "https"}:
+        alternate_scheme = "https" if parsed.scheme == "http" else "http"
+        push(urlunsplit((alternate_scheme, parsed.netloc, parsed.path, parsed.query, parsed.fragment)))
+
+    normalised_netloc = parsed.netloc.lower()
+    if normalised_netloc and normalised_netloc != parsed.netloc:
+        push(urlunsplit((parsed.scheme, normalised_netloc, parsed.path, parsed.query, parsed.fragment)))
+
+    if parsed.path:
+        trimmed_path = parsed.path.rstrip("/")
+        if trimmed_path != parsed.path:
+            push(urlunsplit((parsed.scheme, parsed.netloc, trimmed_path, parsed.query, parsed.fragment)))
+        else:
+            push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path + "/", parsed.query, parsed.fragment)))
+        unquoted_path = unquote(parsed.path)
+        if unquoted_path != parsed.path:
+            push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, parsed.query, parsed.fragment)))
+
+    if parsed.query or parsed.fragment:
+        push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", "")))
+        if parsed.path:
+            unquoted_path = unquote(parsed.path)
+            push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, "", "")))
+
+    return variants
+
+
+def _build_hydrus_query(
+    hashes: Optional[Sequence[str]],
+    file_ids: Optional[Sequence[int]],
+    include_relationships: bool,
+    minimal: bool,
+) -> Dict[str, str]:
+    query: Dict[str, str] = {}
+    if hashes:
+        query["hashes"] = json.dumps([_normalize_hash(h) for h in hashes])
+    if file_ids:
+        query["file_ids"] = json.dumps([int(fid) for fid in file_ids])
+    if not query:
+        raise ValueError("hashes or file_ids must be provided")
+    query["include_service_keys_to_tags"] = json.dumps(True)
+    query["include_tag_services"] = json.dumps(True)
+    query["include_file_services"] = json.dumps(True)
+    if include_relationships:
+        query["include_file_relationships"] = json.dumps(True)
+    if not minimal:
+        extras = (
+            "include_url",
+            "include_size",
+            "include_width",
+            "include_height",
+            "include_duration",
+            "include_mime",
+            "include_has_audio",
+            "include_is_trashed",
+        )
+        for key in extras:
+            query[key] = json.dumps(True)
+    return query
+
+
+def _fetch_hydrus_entries(
+    client: "HydrusNetwork",
+    hashes: Optional[Sequence[str]],
+    file_ids: Optional[Sequence[int]],
+    include_relationships: bool,
+    minimal: bool,
+) -> List[Dict[str, Any]]:
+    if not hashes and not file_ids:
+        return []
+    spec = HydrusRequestSpec(
+        method="GET",
+        endpoint="/get_files/file_metadata",
+        query=_build_hydrus_query(hashes, file_ids, include_relationships, minimal),
+    )
+    response = client._perform_request(spec)
+    metadata = response.get("metadata") if isinstance(response, dict) else None
+    if isinstance(metadata, list):
+        return [entry for entry in metadata if isinstance(entry, dict)]
+    return []
+
+
+def _has_current_file_service(entry: Dict[str, Any]) -> bool:
+    services = entry.get("file_services")
+    if not isinstance(services, dict):
+        return False
+    current = services.get("current")
+    if isinstance(current, dict):
+        for value in current.values():
+            if value:
+                return True
+        return False
+    if isinstance(current, list):
+        return len(current) > 0
+    return False
+
+
+def _compute_file_flags(entry: Dict[str, Any]) -> Tuple[bool, bool, bool]:
+    mime = entry.get("mime")
+    mime_lower = mime.lower() if isinstance(mime, str) else ""
+    is_video = mime_lower.startswith("video/")
+    is_audio = mime_lower.startswith("audio/")
+    is_deleted = bool(entry.get("is_trashed"))
+    file_services = entry.get("file_services")
+    if not is_deleted and isinstance(file_services, dict):
+        deleted = file_services.get("deleted")
+        if isinstance(deleted, dict) and deleted:
+            is_deleted = True
+    return is_video, is_audio, is_deleted
+
+
+def fetch_hydrus_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
+    hash_hex = None
+    raw_hash_value = payload.get("hash")
+    if raw_hash_value is not None:
+        hash_hex = _normalize_hash(raw_hash_value)
+    file_ids: List[int] = []
+    raw_file_ids = payload.get("file_ids")
+    if isinstance(raw_file_ids, (list, tuple, set)):
+        for value in raw_file_ids:
+            try:
+                file_ids.append(int(value))
+            except (TypeError, ValueError):
+                continue
+    elif raw_file_ids is not None:
+        try:
+            file_ids.append(int(raw_file_ids))
+        except (TypeError, ValueError):
+            file_ids = []
+    raw_file_id = payload.get("file_id")
+    if raw_file_id is not None:
+        try:
+            coerced = int(raw_file_id)
+        except (TypeError, ValueError):
+            coerced = None
+        if coerced is not None and coerced not in file_ids:
+            file_ids.append(coerced)
+    base_url = str(payload.get("api_url") or "").strip()
+    if not base_url:
+        raise ValueError("Hydrus api_url is required")
+    access_key = str(payload.get("access_key") or "").strip()
+    options_raw = payload.get("options")
+    options = options_raw if isinstance(options_raw, dict) else {}
+    prefer_service = options.get("prefer_service_name")
+    if isinstance(prefer_service, str):
+        prefer_service = prefer_service.strip()
+    else:
+        prefer_service = None
+    include_relationships = bool(options.get("include_relationships"))
+    minimal = bool(options.get("minimal"))
+    timeout = float(options.get("timeout") or 60.0)
+    client = HydrusNetwork(base_url, access_key, timeout)
+    hashes: Optional[List[str]] = None
+    if hash_hex:
+        hashes = [hash_hex]
+    if not hashes and not file_ids:
+        raise ValueError("Hydrus hash or file id is required")
+    try:
+        entries = _fetch_hydrus_entries(
+            client,
+            hashes,
+            file_ids or None,
+            include_relationships,
+            minimal
+        )
+    except HydrusRequestError as exc:
+        raise RuntimeError(str(exc))
+    if not entries:
+        response: Dict[str, Any] = {
+            "hash": hash_hex,
+            "metadata": {},
+            "tags": [],
+            "warnings": [f"No Hydrus metadata for {hash_hex or file_ids}"],
+            "error": "not_found",
+        }
+        if file_ids:
+            response["file_id"] = file_ids[0]
+        return response
+    entry = entries[0]
+    if not hash_hex:
+        entry_hash = entry.get("hash")
+        if isinstance(entry_hash, str) and entry_hash:
+            hash_hex = entry_hash
+            hashes = [hash_hex]
+    summary, primary_tags, service_key, title, clip_time = _summarize_hydrus_entry(entry, prefer_service)
+    is_video, is_audio, is_deleted = _compute_file_flags(entry)
+    has_current_file_service = _has_current_file_service(entry)
+    is_local = bool(entry.get("is_local"))
+    size_bytes = entry.get("size") or entry.get("file_size")
+    filesize_mb = None
+    if isinstance(size_bytes, (int, float)) and size_bytes > 0:
+        filesize_mb = float(size_bytes) / (1024.0 * 1024.0)
+    duration = entry.get("duration")
+    if duration is None and isinstance(entry.get("duration_ms"), (int, float)):
+        duration = float(entry["duration_ms"]) / 1000.0
+    warnings_list: List[str] = []
+    if not primary_tags:
+        warnings_list.append("No tags returned for preferred service")
+    relationships = None
+    relationship_metadata: Dict[str, Dict[str, Any]] = {}
+    if include_relationships and hash_hex:
+        try:
+            rel_spec = HydrusRequestSpec(
+                method="GET",
+                endpoint="/manage_file_relationships/get_file_relationships",
+                query={"hash": hash_hex},
+            )
+            relationships = client._perform_request(rel_spec)
+        except HydrusRequestError as exc:
+            warnings_list.append(f"Relationship lookup failed: {exc}")
+            relationships = None
+        if isinstance(relationships, dict):
+            related_hashes: Set[str] = set()
+            _collect_relationship_hashes(relationships, related_hashes)
+            related_hashes.discard(hash_hex)
+            if related_hashes:
+                try:
+                    related_entries = _fetch_hydrus_entries(
+                        client,
+                        sorted(related_hashes),
+                        None,
+                        False,
+                        True
+                    )
+                except HydrusRequestError as exc:
+                    warnings_list.append(f"Relationship metadata fetch failed: {exc}")
+                else:
+                    for rel_entry in related_entries:
+                        rel_hash = rel_entry.get("hash")
+                        if not isinstance(rel_hash, str):
+                            continue
+                        rel_summary, rel_tags, _, rel_title, rel_clip = _summarize_hydrus_entry(rel_entry, prefer_service)
+                        rel_summary["tags"] = rel_tags
+                        if rel_title:
+                            rel_summary["title"] = rel_title
+                        if rel_clip:
+                            rel_summary["clip_time"] = rel_clip
+                        relationship_metadata[rel_hash] = rel_summary
+    result: Dict[str, Any] = {
+        "hash": entry.get("hash") or hash_hex,
+        "metadata": summary,
+        "tags": primary_tags,
+        "tag_service_key": service_key,
+        "title": title,
+        "clip_time": clip_time,
+        "duration": duration,
+        "filesize_mb": filesize_mb,
+        "is_video": is_video,
+        "is_audio": is_audio,
+        "is_deleted": is_deleted,
+        "is_local": is_local,
+        "has_current_file_service": has_current_file_service,
+        "matched_hash": entry.get("hash") or hash_hex,
+        "swap_recommended": False,
+    }
+    file_id_value = entry.get("file_id")
+    if isinstance(file_id_value, (int, float)):
+        result["file_id"] = int(file_id_value)
+    if relationships is not None:
+        result["relationships"] = relationships
+    if relationship_metadata:
+        result["relationship_metadata"] = relationship_metadata
+    if warnings_list:
+        result["warnings"] = warnings_list
+    return result
+
+
+def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]:
+    raw_url = payload.get("url") or payload.get("source_url")
+    url = str(raw_url or "").strip()
+    if not url:
+        raise ValueError("URL is required to fetch Hydrus metadata by URL")
+    base_url = str(payload.get("api_url") or "").strip()
+    if not base_url:
+        raise ValueError("Hydrus api_url is required")
+    access_key = str(payload.get("access_key") or "").strip()
+    options_raw = payload.get("options")
+    options = options_raw if isinstance(options_raw, dict) else {}
+    timeout = float(options.get("timeout") or 60.0)
+    client = HydrusNetwork(base_url, access_key, timeout)
+    hashes: Optional[List[str]] = None
+    file_ids: Optional[List[int]] = None
+    matched_url = None
+    normalised_reported = None
+    seen: Set[str] = set()
+    queue = deque()
+    for variant in _generate_hydrus_url_variants(url):
+        queue.append(variant)
+    if not queue:
+        queue.append(url)
+    tried_variants: List[str] = []
+    while queue:
+        candidate = queue.popleft()
+        candidate = str(candidate or "").strip()
+        if not candidate or candidate in seen:
+            continue
+        seen.add(candidate)
+        tried_variants.append(candidate)
+        spec = HydrusRequestSpec(
+            method="GET",
+            endpoint="/add_urls/get_url_files",
+            query={"url": candidate},
+        )
+        try:
+            response = client._perform_request(spec)
+        except HydrusRequestError as exc:
+            raise RuntimeError(str(exc))
+        response_hashes_list: List[str] = []
+        response_file_ids_list: List[int] = []
+        if isinstance(response, dict):
+            normalised_value = response.get("normalised_url")
+            if isinstance(normalised_value, str):
+                trimmed = normalised_value.strip()
+                if trimmed:
+                    normalised_reported = normalised_reported or trimmed
+                    if trimmed not in seen:
+                        queue.append(trimmed)
+            for redirect_key in ("redirect_url", "url"):
+                redirect_value = response.get(redirect_key)
+                if isinstance(redirect_value, str):
+                    redirect_trimmed = redirect_value.strip()
+                    if redirect_trimmed and redirect_trimmed not in seen:
+                        queue.append(redirect_trimmed)
+            raw_hashes = response.get("hashes") or response.get("file_hashes")
+            if isinstance(raw_hashes, list):
+                for item in raw_hashes:
+                    try:
+                        normalized = _normalize_hash(item)
+                    except ValueError:
+                        continue
+                    if normalized:
+                        response_hashes_list.append(normalized)
+            raw_ids = response.get("file_ids") or response.get("file_id")
+            if isinstance(raw_ids, list):
+                for item in raw_ids:
+                    try:
+                        response_file_ids_list.append(int(item))
+                    except (TypeError, ValueError):
+                        continue
+            elif raw_ids is not None:
+                try:
+                    response_file_ids_list.append(int(raw_ids))
+                except (TypeError, ValueError):
+                    pass
+            statuses = response.get("url_file_statuses")
+            if isinstance(statuses, list):
+                for entry in statuses:
+                    if not isinstance(entry, dict):
+                        continue
+                    status_hash = entry.get("hash") or entry.get("file_hash")
+                    if status_hash:
+                        try:
+                            normalized = _normalize_hash(status_hash)
+                        except ValueError:
+                            normalized = None
+                        if normalized:
+                            response_hashes_list.append(normalized)
+                    status_id = entry.get("file_id") or entry.get("fileid")
+                    if status_id is not None:
+                        try:
+                            response_file_ids_list.append(int(status_id))
+                        except (TypeError, ValueError):
+                            pass
+            if not hashes and response_hashes_list:
+                hashes = response_hashes_list
+            if not file_ids and response_file_ids_list:
+                file_ids = response_file_ids_list
+        if hashes or file_ids:
+            matched_url = candidate
+            break
+    if not hashes and not file_ids:
+        raise RuntimeError(
+            "No Hydrus matches for URL variants: "
+            + ", ".join(tried_variants)
+        )
+    followup_payload = {
+        "api_url": base_url,
+        "access_key": access_key,
+        "hash": hashes[0] if hashes else None,
+        "file_ids": file_ids,
+        "options": {"timeout": timeout, "minimal": True},
+    }
+    result = fetch_hydrus_metadata(followup_payload)
+    result["matched_url"] = matched_url or url
+    result["normalised_url"] = normalised_reported or matched_url or url
+    result["tried_urls"] = tried_variants
+    return result
+
+
+def _build_hydrus_context(payload: Dict[str, Any]) -> Tuple["HydrusNetwork", str, str, float, Optional[str]]:
+    base_url = str(payload.get("api_url") or "").strip()
+    if not base_url:
+        raise ValueError("Hydrus api_url is required")
+    access_key = str(payload.get("access_key") or "").strip()
+    options_raw = payload.get("options")
+    options = options_raw if isinstance(options_raw, dict) else {}
+    timeout = float(options.get("timeout") or payload.get("timeout") or 60.0)
+    prefer_service = payload.get("prefer_service_name") or options.get("prefer_service_name")
+    if isinstance(prefer_service, str):
+        prefer_service = prefer_service.strip() or None
+    else:
+        prefer_service = None
+    client = HydrusNetwork(base_url, access_key, timeout)
+    return client, base_url, access_key, timeout, prefer_service
+
+
+def _refetch_hydrus_summary(
+    base_url: str,
+    access_key: str,
+    hash_hex: str,
+    timeout: float,
+    prefer_service: Optional[str]
+) -> Dict[str, Any]:
+    payload: Dict[str, Any] = {
+        "hash": hash_hex,
+        "api_url": base_url,
+        "access_key": access_key,
+        "options": {
+            "minimal": True,
+            "include_relationships": False,
+            "timeout": timeout,
+        },
+    }
+    if prefer_service:
+        payload["options"]["prefer_service_name"] = prefer_service
+    return fetch_hydrus_metadata(payload)
+
+
+def apply_hydrus_tag_mutation(
+    payload: Dict[str, Any],
+    add: Iterable[Any],
+    remove: Iterable[Any]
+) -> Dict[str, Any]:
+    client, base_url, access_key, timeout, prefer_service = _build_hydrus_context(payload)
+    hash_hex = _normalize_hash(payload.get("hash"))
+    add_list = [_normalize_tag(tag) for tag in add if _normalize_tag(tag)]
+    remove_list = [_normalize_tag(tag) for tag in remove if _normalize_tag(tag)]
+    if not add_list and not remove_list:
+        raise ValueError("No tag changes supplied")
+    service_key = payload.get("service_key") or payload.get("tag_service_key")
+    summary = None
+    if not service_key:
+        summary = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
+        service_key = summary.get("tag_service_key")
+    if not isinstance(service_key, str) or not service_key:
+        raise RuntimeError("Unable to determine Hydrus tag service key")
+    actions: Dict[str, List[str]] = {}
+    if add_list:
+        actions["0"] = [tag for tag in add_list if tag]
+    if remove_list:
+        actions["1"] = [tag for tag in remove_list if tag]
+    if not actions:
+        raise ValueError("Tag mutation produced no actionable changes")
+    request_payload = {
+        "hashes": [hash_hex],
+        "service_keys_to_actions_to_tags": {
+            service_key: actions,
+        },
+    }
+    try:
+        tag_spec = HydrusRequestSpec(
+            method="POST",
+            endpoint="/add_tags/add_tags",
+            data=request_payload,
+        )
+        client._perform_request(tag_spec)
+    except HydrusRequestError as exc:
+        raise RuntimeError(str(exc))
+    summary_after = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
+    result = dict(summary_after)
+    result["added_tags"] = actions.get("0", [])
+    result["removed_tags"] = actions.get("1", [])
+    result["tag_service_key"] = summary_after.get("tag_service_key")
+    return result
--- a/API/cmdlet.py
+++ b/API/cmdlet.py
@@ -0,0 +1,220 @@
+from __future__ import annotations
+
+import contextlib
+import io
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, Sequence
+
+from SYS import pipeline as ctx
+from SYS.models import PipelineStageContext
+from SYS.rich_display import capture_rich_output
+
+
+CmdletCallable = Callable[[Any, Sequence[str], Dict[str, Any]], int]
+
+
+@dataclass(slots=True)
+class CmdletRunResult:
+    """Programmatic result for a single cmdlet invocation."""
+
+    name: str
+    args: Sequence[str]
+    exit_code: int = 0
+    emitted: List[Any] = field(default_factory=list)
+
+    # Best-effort: cmdlets can publish tables/items via pipeline state even when
+    # they don't emit pipeline items.
+    result_table: Optional[Any] = None
+    result_items: List[Any] = field(default_factory=list)
+    result_subject: Optional[Any] = None
+
+    stdout: str = ""
+    stderr: str = ""
+    error: Optional[str] = None
+
+
+def _normalize_cmd_name(name: str) -> str:
+    return str(name or "").replace("_", "-").strip().lower()
+
+
+def resolve_cmdlet(cmd_name: str) -> Optional[CmdletCallable]:
+    """Resolve a cmdlet callable by name from the registry (aliases supported)."""
+    try:
+        from SYS.cmdlet_catalog import ensure_registry_loaded
+
+        ensure_registry_loaded()
+    except Exception:
+        pass
+
+    try:
+        import cmdlet as cmdlet_pkg
+
+        return cmdlet_pkg.get(cmd_name)
+    except Exception:
+        return None
+
+
+def run_cmdlet(
+    cmd: str | CmdletCallable,
+    args: Sequence[str] | None,
+    config: Dict[str, Any],
+    *,
+    piped: Any = None,
+    isolate: bool = True,
+    capture_output: bool = True,
+    stage_index: int = 0,
+    total_stages: int = 1,
+    pipe_index: Optional[int] = None,
+    worker_id: Optional[str] = None,
+) -> CmdletRunResult:
+    """Run a single cmdlet programmatically and return structured results.
+
+    This is intended for TUI/webapp consumers that want cmdlet behavior without
+    going through the interactive CLI loop.
+
+    Notes:
+    - When `isolate=True` (default) this runs inside `ctx.new_pipeline_state()` so
+      global CLI pipeline state is not mutated.
+    - Output capturing covers both normal `print()` and Rich output via
+      `capture_rich_output()`.
+    """
+
+    normalized_args: Sequence[str] = list(args or [])
+
+    if isinstance(cmd, str):
+        name = _normalize_cmd_name(cmd)
+        cmd_fn = resolve_cmdlet(name)
+    else:
+        name = getattr(cmd, "__name__", "cmdlet")
+        cmd_fn = cmd
+
+    result = CmdletRunResult(name=name, args=normalized_args)
+
+    if not callable(cmd_fn):
+        result.exit_code = 1
+        result.error = f"Unknown command: {name}"
+        result.stderr = result.error
+        return result
+
+    stage_ctx = PipelineStageContext(
+        stage_index=int(stage_index),
+        total_stages=int(total_stages),
+        pipe_index=pipe_index,
+        worker_id=worker_id,
+    )
+
+    stdout_buffer = io.StringIO()
+    stderr_buffer = io.StringIO()
+
+    stage_text = " ".join([name, *list(normalized_args)]).strip()
+
+    state_cm = ctx.new_pipeline_state() if isolate else contextlib.nullcontext()
+
+    with state_cm:
+        # Keep behavior predictable: start from a clean slate.
+        try:
+            ctx.reset()
+        except Exception:
+            pass
+
+        try:
+            ctx.set_stage_context(stage_ctx)
+        except Exception:
+            pass
+
+        try:
+            ctx.set_current_cmdlet_name(name)
+        except Exception:
+            pass
+
+        try:
+            ctx.set_current_stage_text(stage_text)
+        except Exception:
+            pass
+
+        try:
+            ctx.set_current_command_text(stage_text)
+        except Exception:
+            pass
+
+        try:
+            run_cm = (
+                capture_rich_output(stdout=stdout_buffer, stderr=stderr_buffer)
+                if capture_output
+                else contextlib.nullcontext()
+            )
+            with run_cm:
+                with (
+                    contextlib.redirect_stdout(stdout_buffer)
+                    if capture_output
+                    else contextlib.nullcontext()
+                ):
+                    with (
+                        contextlib.redirect_stderr(stderr_buffer)
+                        if capture_output
+                        else contextlib.nullcontext()
+                    ):
+                        result.exit_code = int(cmd_fn(piped, list(normalized_args), config))
+        except Exception as exc:
+            result.exit_code = 1
+            result.error = f"{type(exc).__name__}: {exc}"
+        finally:
+            result.stdout = stdout_buffer.getvalue()
+            result.stderr = stderr_buffer.getvalue()
+
+        # Prefer cmdlet emits (pipeline semantics).
+        try:
+            result.emitted = list(stage_ctx.emits or [])
+        except Exception:
+            result.emitted = []
+
+        # Mirror CLI behavior: if cmdlet emitted items and there is no overlay table,
+        # make emitted items the last result items for downstream consumers.
+        try:
+            has_overlay = bool(ctx.get_display_table())
+        except Exception:
+            has_overlay = False
+
+        if result.emitted and not has_overlay:
+            try:
+                ctx.set_last_result_items_only(list(result.emitted))
+            except Exception:
+                pass
+
+        # Best-effort snapshot of visible results.
+        try:
+            result.result_table = (
+                ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
+            )
+        except Exception:
+            result.result_table = None
+
+        try:
+            result.result_items = list(ctx.get_last_result_items() or [])
+        except Exception:
+            result.result_items = []
+
+        try:
+            result.result_subject = ctx.get_last_result_subject()
+        except Exception:
+            result.result_subject = None
+
+        # Cleanup stage-local markers.
+        try:
+            ctx.clear_current_stage_text()
+        except Exception:
+            pass
+        try:
+            ctx.clear_current_cmdlet_name()
+        except Exception:
+            pass
+        try:
+            ctx.clear_current_command_text()
+        except Exception:
+            pass
+        try:
+            ctx.set_stage_context(None)
+        except Exception:
+            pass
+
+    return result