df
This commit is contained in:
369
API/HTTP.py
369
API/HTTP.py
@@ -10,10 +10,24 @@ Provides synchronous and asynchronous HTTP operations with:
|
||||
|
||||
import httpx
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import re
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
import logging
|
||||
|
||||
from SYS.logger import debug, log
|
||||
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
|
||||
try: # Optional; used for metadata extraction when available
|
||||
from SYS.metadata import extract_ytdlp_tags
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default configuration
|
||||
@@ -366,6 +380,359 @@ class HTTPClient:
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
def download_direct_file(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
quiet: bool = False,
|
||||
suggested_filename: Optional[str] = None,
|
||||
pipeline_progress: Optional[Any] = None,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""
|
||||
|
||||
ensure_directory(output_dir)
|
||||
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
# Windows-safe filename sanitization.
|
||||
text = str(name or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
text = text.replace("/", "\\")
|
||||
text = text.split("\\")[-1]
|
||||
|
||||
invalid = set('<>:"/\\|?*')
|
||||
cleaned_chars: List[str] = []
|
||||
for ch in text:
|
||||
o = ord(ch)
|
||||
if o < 32 or ch in invalid:
|
||||
cleaned_chars.append(" ")
|
||||
continue
|
||||
cleaned_chars.append(ch)
|
||||
cleaned = " ".join("".join(cleaned_chars).split()).strip()
|
||||
cleaned = cleaned.rstrip(" .")
|
||||
return cleaned
|
||||
|
||||
def _unique_path(path: Path) -> Path:
|
||||
if not path.exists():
|
||||
return path
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
for i in range(1, 10_000):
|
||||
candidate = parent / f"{stem} ({i}){suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
return parent / f"{stem} ({int(time.time())}){suffix}"
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
|
||||
filename: Optional[str] = None
|
||||
if parsed_url.query:
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
for param_name in ("filename", "download", "file", "name"):
|
||||
if param_name in query_params and query_params[param_name]:
|
||||
filename = query_params[param_name][0]
|
||||
filename = unquote(filename)
|
||||
break
|
||||
|
||||
if not filename or not filename.strip():
|
||||
filename = url_path.split("/")[-1] if url_path else ""
|
||||
filename = unquote(filename)
|
||||
|
||||
if "?" in filename:
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
content_type = ""
|
||||
try:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
try:
|
||||
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
|
||||
except Exception:
|
||||
content_type = ""
|
||||
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||
if match:
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
filename = unquote(extracted_name)
|
||||
if not quiet:
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
except Exception as exc:
|
||||
if not quiet:
|
||||
log(f"Could not get filename from headers: {exc}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
|
||||
ext = ""
|
||||
try:
|
||||
ext = Path(str(filename or "")).suffix.lower()
|
||||
except Exception:
|
||||
ext = ""
|
||||
|
||||
ct0 = (content_type or "").split(";", 1)[0].strip().lower()
|
||||
must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)
|
||||
|
||||
if must_probe:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
with client._request_stream("GET", url, follow_redirects=True) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = (
|
||||
str(resp.headers.get("content-type", "") or "")
|
||||
.split(";", 1)[0]
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
except DownloadError:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
|
||||
if suggested:
|
||||
suggested_path = Path(suggested)
|
||||
if suggested_path.suffix:
|
||||
filename = suggested
|
||||
else:
|
||||
detected_ext = ""
|
||||
try:
|
||||
detected_ext = Path(str(filename)).suffix
|
||||
except Exception:
|
||||
detected_ext = ""
|
||||
filename = suggested + detected_ext if detected_ext else suggested
|
||||
|
||||
try:
|
||||
has_ext = bool(filename and Path(str(filename)).suffix)
|
||||
except Exception:
|
||||
has_ext = False
|
||||
|
||||
if filename and (not has_ext):
|
||||
ct = (content_type or "").split(";", 1)[0].strip().lower()
|
||||
ext_by_ct = {
|
||||
"application/pdf": ".pdf",
|
||||
"application/epub+zip": ".epub",
|
||||
"application/x-mobipocket-ebook": ".mobi",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"image/gif": ".gif",
|
||||
"text/plain": ".txt",
|
||||
"application/zip": ".zip",
|
||||
}
|
||||
|
||||
if ct in ext_by_ct:
|
||||
filename = f"{filename}{ext_by_ct[ct]}"
|
||||
elif ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
|
||||
if not filename or not str(filename).strip():
|
||||
raise DownloadError(
|
||||
"Could not determine filename for URL (no Content-Disposition and no path filename)"
|
||||
)
|
||||
|
||||
file_path = _unique_path(output_dir / str(filename))
|
||||
|
||||
use_pipeline_transfer = False
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
ui = None
|
||||
if hasattr(pipeline_progress, "ui_and_pipe_index"):
|
||||
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
|
||||
use_pipeline_transfer = ui is not None
|
||||
except Exception:
|
||||
use_pipeline_transfer = False
|
||||
|
||||
progress_bar: Optional[ProgressBar] = None
|
||||
if (not quiet) and (not use_pipeline_transfer):
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
transfer_started = [False]
|
||||
|
||||
if not quiet:
|
||||
debug(f"Direct download: {filename}")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
downloaded_bytes = [0]
|
||||
transfer_started[0] = False
|
||||
|
||||
def _maybe_begin_transfer(content_length: int) -> None:
|
||||
if pipeline_progress is None or transfer_started[0]:
|
||||
return
|
||||
try:
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
total_val = None
|
||||
try:
|
||||
if hasattr(pipeline_progress, "begin_transfer"):
|
||||
pipeline_progress.begin_transfer(
|
||||
label=str(filename or "download"),
|
||||
total=total_val,
|
||||
)
|
||||
transfer_started[0] = True
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
||||
downloaded_bytes[0] = int(bytes_downloaded or 0)
|
||||
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
_maybe_begin_transfer(content_length)
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
pipeline_progress.update_transfer(
|
||||
label=str(filename or "download"),
|
||||
completed=int(bytes_downloaded or 0),
|
||||
total=total_val,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if progress_bar is not None:
|
||||
progress_bar.update(
|
||||
downloaded=int(bytes_downloaded or 0),
|
||||
total=int(content_length) if content_length and content_length > 0 else None,
|
||||
label=str(filename or "download"),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
client.download(url, str(file_path), progress_callback=progress_callback)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not quiet:
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s")
|
||||
|
||||
ext_out = ""
|
||||
try:
|
||||
ext_out = Path(str(filename)).suffix.lstrip(".")
|
||||
except Exception:
|
||||
ext_out = ""
|
||||
|
||||
info: Dict[str, Any] = {
|
||||
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
|
||||
"ext": ext_out,
|
||||
"webpage_url": url,
|
||||
}
|
||||
|
||||
hash_value = None
|
||||
try:
|
||||
hash_value = sha256_file(file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tags: List[str] = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as exc:
|
||||
log(f"Error extracting tags: {exc}", file=sys.stderr)
|
||||
|
||||
if not any(str(t).startswith("title:") for t in tags):
|
||||
info["title"] = str(filename)
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as exc:
|
||||
log(f"Error extracting tags with filename: {exc}", file=sys.stderr)
|
||||
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"direct-file-downloaded",
|
||||
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=file_path,
|
||||
info=info,
|
||||
tag=tags,
|
||||
source_url=url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
|
||||
except (httpx.HTTPError, httpx.RequestError) as exc:
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log(f"Download error: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||
|
||||
except Exception as exc:
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
progress_bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(
|
||||
pipeline_progress, "finish_transfer"
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log(f"Error downloading file: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{
|
||||
"phase": "direct-file",
|
||||
"url": url,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
},
|
||||
)
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
# Back-compat alias
|
||||
_download_direct_file = download_direct_file
|
||||
|
||||
|
||||
class AsyncHTTPClient:
|
||||
"""Unified async HTTP client with asyncio support."""
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
from SYS.logger import log
|
||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
|
||||
@@ -18,8 +19,8 @@ import tempfile
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote, urlunsplit, unquote
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -1828,3 +1829,742 @@ def download_hydrus_file(
|
||||
print_final_progress(filename, file_size, elapsed)
|
||||
|
||||
return downloaded
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Hydrus metadata helpers (moved from SYS.metadata)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _normalize_hash(value: Any) -> str:
|
||||
candidate = str(value or "").strip().lower()
|
||||
if not candidate:
|
||||
raise ValueError("Hydrus hash is required")
|
||||
if len(candidate) != 64 or any(ch not in "0123456789abcdef" for ch in candidate):
|
||||
raise ValueError("Hydrus hash must be a 64-character hex string")
|
||||
return candidate
|
||||
|
||||
|
||||
def _normalize_tag(tag: Any) -> Optional[str]:
|
||||
if tag is None:
|
||||
return None
|
||||
if isinstance(tag, str):
|
||||
candidate = tag.strip()
|
||||
else:
|
||||
candidate = str(tag).strip()
|
||||
return candidate or None
|
||||
|
||||
|
||||
def _dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||
if not tags:
|
||||
return []
|
||||
|
||||
namespace_to_tags: Dict[Optional[str], List[Tuple[int, str]]] = {}
|
||||
first_appearance: Dict[Optional[str], int] = {}
|
||||
|
||||
for idx, tag in enumerate(tags):
|
||||
namespace: Optional[str] = tag.split(":", 1)[0] if ":" in tag else None
|
||||
if namespace not in first_appearance:
|
||||
first_appearance[namespace] = idx
|
||||
if namespace not in namespace_to_tags:
|
||||
namespace_to_tags[namespace] = []
|
||||
namespace_to_tags[namespace].append((idx, tag))
|
||||
|
||||
result: List[Tuple[int, str]] = []
|
||||
for namespace, tag_list in namespace_to_tags.items():
|
||||
chosen_tag = tag_list[0][1] if keep_first else tag_list[-1][1]
|
||||
result.append((first_appearance[namespace], chosen_tag))
|
||||
|
||||
result.sort(key=lambda x: x[0])
|
||||
return [tag for _, tag in result]
|
||||
|
||||
|
||||
def _extract_tag_services(entry: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
tags_section = entry.get("tags")
|
||||
services: List[Dict[str, Any]] = []
|
||||
if not isinstance(tags_section, dict):
|
||||
return services
|
||||
names_map = tags_section.get("service_keys_to_names")
|
||||
if not isinstance(names_map, dict):
|
||||
names_map = {}
|
||||
|
||||
def get_record(service_key: Optional[str], service_name: Optional[str]) -> Dict[str, Any]:
|
||||
key_lower = service_key.lower() if isinstance(service_key, str) else None
|
||||
name_lower = service_name.lower() if isinstance(service_name, str) else None
|
||||
for record in services:
|
||||
existing_key = record.get("service_key")
|
||||
if key_lower and isinstance(existing_key, str) and existing_key.lower() == key_lower:
|
||||
if service_name and not record.get("service_name"):
|
||||
record["service_name"] = service_name
|
||||
return record
|
||||
existing_name = record.get("service_name")
|
||||
if name_lower and isinstance(existing_name, str) and existing_name.lower() == name_lower:
|
||||
if service_key and not record.get("service_key"):
|
||||
record["service_key"] = service_key
|
||||
return record
|
||||
record = {
|
||||
"service_key": service_key,
|
||||
"service_name": service_name,
|
||||
"tags": [],
|
||||
}
|
||||
services.append(record)
|
||||
return record
|
||||
|
||||
def _iter_current_status_lists(container: Any) -> Iterable[List[Any]]:
|
||||
if isinstance(container, dict):
|
||||
for status_key, tags_list in container.items():
|
||||
if str(status_key) != "0":
|
||||
continue
|
||||
if isinstance(tags_list, list):
|
||||
yield tags_list
|
||||
elif isinstance(container, list):
|
||||
yield container
|
||||
|
||||
statuses_map = tags_section.get("service_keys_to_statuses_to_tags")
|
||||
if isinstance(statuses_map, dict):
|
||||
for service_key, status_map in statuses_map.items():
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, names_map.get(service_key))
|
||||
for tags_list in _iter_current_status_lists(status_map):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
ignored_keys = {
|
||||
"service_keys_to_statuses_to_tags",
|
||||
"service_keys_to_statuses_to_display_tags",
|
||||
"service_keys_to_display_friendly_tags",
|
||||
"service_keys_to_names",
|
||||
"tag_display_types_to_namespaces",
|
||||
"namespace_display_string_lookup",
|
||||
"tag_display_decoration_colour_lookup",
|
||||
}
|
||||
|
||||
for key, service in tags_section.items():
|
||||
if key in ignored_keys:
|
||||
continue
|
||||
if isinstance(service, dict):
|
||||
service_key = service.get("service_key") or (key if isinstance(key, str) else None)
|
||||
service_name = service.get("service_name") or service.get("name") or names_map.get(service_key)
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, service_name)
|
||||
storage = service.get("storage_tags") or service.get("statuses_to_tags") or service.get("tags")
|
||||
if isinstance(storage, dict):
|
||||
for tags_list in _iter_current_status_lists(storage):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
elif isinstance(storage, list):
|
||||
for tag in storage:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
for record in services:
|
||||
record["tags"] = _dedup_tags_by_namespace(record["tags"], keep_first=True)
|
||||
return services
|
||||
|
||||
|
||||
def _select_primary_tags(
|
||||
services: List[Dict[str, Any]],
|
||||
aggregated: List[str],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Optional[str], List[str]]:
|
||||
prefer_lower = prefer_service.lower() if isinstance(prefer_service, str) else None
|
||||
if prefer_lower:
|
||||
for record in services:
|
||||
name = record.get("service_name")
|
||||
if isinstance(name, str) and name.lower() == prefer_lower and record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
for record in services:
|
||||
if record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
return None, aggregated
|
||||
|
||||
|
||||
def _derive_title(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() == "title":
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
for key in (
|
||||
"title",
|
||||
"display_name",
|
||||
"pretty_name",
|
||||
"original_display_filename",
|
||||
"original_filename",
|
||||
):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, str):
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
return None
|
||||
|
||||
|
||||
def _derive_clip_time(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
namespaces = {"clip", "clip_time", "cliptime"}
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() in namespaces:
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
clip_value = entry.get("clip_time")
|
||||
if isinstance(clip_value, str):
|
||||
cleaned_clip = clip_value.strip()
|
||||
if cleaned_clip:
|
||||
return cleaned_clip
|
||||
return None
|
||||
|
||||
|
||||
def _summarize_hydrus_entry(
|
||||
entry: Dict[str, Any],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Dict[str, Any], List[str], Optional[str], Optional[str], Optional[str]]:
|
||||
services = _extract_tag_services(entry)
|
||||
aggregated: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
for record in services:
|
||||
for tag in record["tags"]:
|
||||
if tag not in seen:
|
||||
seen.add(tag)
|
||||
aggregated.append(tag)
|
||||
service_key, primary_tags = _select_primary_tags(services, aggregated, prefer_service)
|
||||
title = _derive_title(primary_tags, aggregated, entry)
|
||||
clip_time = _derive_clip_time(primary_tags, aggregated, entry)
|
||||
summary = dict(entry)
|
||||
if title and not summary.get("title"):
|
||||
summary["title"] = title
|
||||
if clip_time and not summary.get("clip_time"):
|
||||
summary["clip_time"] = clip_time
|
||||
summary["tag_service_key"] = service_key
|
||||
summary["has_current_file_service"] = _has_current_file_service(entry)
|
||||
if "is_local" not in summary:
|
||||
summary["is_local"] = bool(entry.get("is_local"))
|
||||
return summary, primary_tags, service_key, title, clip_time
|
||||
|
||||
|
||||
def _looks_like_hash(value: Any) -> bool:
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
candidate = value.strip().lower()
|
||||
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
|
||||
|
||||
|
||||
def _collect_relationship_hashes(payload: Any, accumulator: Set[str]) -> None:
|
||||
if isinstance(payload, dict):
|
||||
for value in payload.values():
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, (list, tuple, set)):
|
||||
for value in payload:
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, str) and _looks_like_hash(payload):
|
||||
accumulator.add(payload)
|
||||
|
||||
|
||||
def _generate_hydrus_url_variants(url: str) -> List[str]:
|
||||
seen: Set[str] = set()
|
||||
variants: List[str] = []
|
||||
|
||||
def push(candidate: Optional[str]) -> None:
|
||||
if not candidate:
|
||||
return
|
||||
text = candidate.strip()
|
||||
if not text or text in seen:
|
||||
return
|
||||
seen.add(text)
|
||||
variants.append(text)
|
||||
|
||||
push(url)
|
||||
try:
|
||||
parsed = urlsplit(url)
|
||||
except Exception:
|
||||
return variants
|
||||
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
alternate_scheme = "https" if parsed.scheme == "http" else "http"
|
||||
push(urlunsplit((alternate_scheme, parsed.netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
normalised_netloc = parsed.netloc.lower()
|
||||
if normalised_netloc and normalised_netloc != parsed.netloc:
|
||||
push(urlunsplit((parsed.scheme, normalised_netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.path:
|
||||
trimmed_path = parsed.path.rstrip("/")
|
||||
if trimmed_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, trimmed_path, parsed.query, parsed.fragment)))
|
||||
else:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path + "/", parsed.query, parsed.fragment)))
|
||||
unquoted_path = unquote(parsed.path)
|
||||
if unquoted_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.query or parsed.fragment:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", "")))
|
||||
if parsed.path:
|
||||
unquoted_path = unquote(parsed.path)
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, "", "")))
|
||||
|
||||
return variants
|
||||
|
||||
|
||||
def _build_hydrus_query(
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> Dict[str, str]:
|
||||
query: Dict[str, str] = {}
|
||||
if hashes:
|
||||
query["hashes"] = json.dumps([_normalize_hash(h) for h in hashes])
|
||||
if file_ids:
|
||||
query["file_ids"] = json.dumps([int(fid) for fid in file_ids])
|
||||
if not query:
|
||||
raise ValueError("hashes or file_ids must be provided")
|
||||
query["include_service_keys_to_tags"] = json.dumps(True)
|
||||
query["include_tag_services"] = json.dumps(True)
|
||||
query["include_file_services"] = json.dumps(True)
|
||||
if include_relationships:
|
||||
query["include_file_relationships"] = json.dumps(True)
|
||||
if not minimal:
|
||||
extras = (
|
||||
"include_url",
|
||||
"include_size",
|
||||
"include_width",
|
||||
"include_height",
|
||||
"include_duration",
|
||||
"include_mime",
|
||||
"include_has_audio",
|
||||
"include_is_trashed",
|
||||
)
|
||||
for key in extras:
|
||||
query[key] = json.dumps(True)
|
||||
return query
|
||||
|
||||
|
||||
def _fetch_hydrus_entries(
|
||||
client: "HydrusNetwork",
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> List[Dict[str, Any]]:
|
||||
if not hashes and not file_ids:
|
||||
return []
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/get_files/file_metadata",
|
||||
query=_build_hydrus_query(hashes, file_ids, include_relationships, minimal),
|
||||
)
|
||||
response = client._perform_request(spec)
|
||||
metadata = response.get("metadata") if isinstance(response, dict) else None
|
||||
if isinstance(metadata, list):
|
||||
return [entry for entry in metadata if isinstance(entry, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _has_current_file_service(entry: Dict[str, Any]) -> bool:
|
||||
services = entry.get("file_services")
|
||||
if not isinstance(services, dict):
|
||||
return False
|
||||
current = services.get("current")
|
||||
if isinstance(current, dict):
|
||||
for value in current.values():
|
||||
if value:
|
||||
return True
|
||||
return False
|
||||
if isinstance(current, list):
|
||||
return len(current) > 0
|
||||
return False
|
||||
|
||||
|
||||
def _compute_file_flags(entry: Dict[str, Any]) -> Tuple[bool, bool, bool]:
|
||||
mime = entry.get("mime")
|
||||
mime_lower = mime.lower() if isinstance(mime, str) else ""
|
||||
is_video = mime_lower.startswith("video/")
|
||||
is_audio = mime_lower.startswith("audio/")
|
||||
is_deleted = bool(entry.get("is_trashed"))
|
||||
file_services = entry.get("file_services")
|
||||
if not is_deleted and isinstance(file_services, dict):
|
||||
deleted = file_services.get("deleted")
|
||||
if isinstance(deleted, dict) and deleted:
|
||||
is_deleted = True
|
||||
return is_video, is_audio, is_deleted
|
||||
|
||||
|
||||
def fetch_hydrus_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
hash_hex = None
|
||||
raw_hash_value = payload.get("hash")
|
||||
if raw_hash_value is not None:
|
||||
hash_hex = _normalize_hash(raw_hash_value)
|
||||
file_ids: List[int] = []
|
||||
raw_file_ids = payload.get("file_ids")
|
||||
if isinstance(raw_file_ids, (list, tuple, set)):
|
||||
for value in raw_file_ids:
|
||||
try:
|
||||
file_ids.append(int(value))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_file_ids is not None:
|
||||
try:
|
||||
file_ids.append(int(raw_file_ids))
|
||||
except (TypeError, ValueError):
|
||||
file_ids = []
|
||||
raw_file_id = payload.get("file_id")
|
||||
if raw_file_id is not None:
|
||||
try:
|
||||
coerced = int(raw_file_id)
|
||||
except (TypeError, ValueError):
|
||||
coerced = None
|
||||
if coerced is not None and coerced not in file_ids:
|
||||
file_ids.append(coerced)
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
prefer_service = options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip()
|
||||
else:
|
||||
prefer_service = None
|
||||
include_relationships = bool(options.get("include_relationships"))
|
||||
minimal = bool(options.get("minimal"))
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
if hash_hex:
|
||||
hashes = [hash_hex]
|
||||
if not hashes and not file_ids:
|
||||
raise ValueError("Hydrus hash or file id is required")
|
||||
try:
|
||||
entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
hashes,
|
||||
file_ids or None,
|
||||
include_relationships,
|
||||
minimal
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
if not entries:
|
||||
response: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"metadata": {},
|
||||
"tags": [],
|
||||
"warnings": [f"No Hydrus metadata for {hash_hex or file_ids}"],
|
||||
"error": "not_found",
|
||||
}
|
||||
if file_ids:
|
||||
response["file_id"] = file_ids[0]
|
||||
return response
|
||||
entry = entries[0]
|
||||
if not hash_hex:
|
||||
entry_hash = entry.get("hash")
|
||||
if isinstance(entry_hash, str) and entry_hash:
|
||||
hash_hex = entry_hash
|
||||
hashes = [hash_hex]
|
||||
summary, primary_tags, service_key, title, clip_time = _summarize_hydrus_entry(entry, prefer_service)
|
||||
is_video, is_audio, is_deleted = _compute_file_flags(entry)
|
||||
has_current_file_service = _has_current_file_service(entry)
|
||||
is_local = bool(entry.get("is_local"))
|
||||
size_bytes = entry.get("size") or entry.get("file_size")
|
||||
filesize_mb = None
|
||||
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
||||
filesize_mb = float(size_bytes) / (1024.0 * 1024.0)
|
||||
duration = entry.get("duration")
|
||||
if duration is None and isinstance(entry.get("duration_ms"), (int, float)):
|
||||
duration = float(entry["duration_ms"]) / 1000.0
|
||||
warnings_list: List[str] = []
|
||||
if not primary_tags:
|
||||
warnings_list.append("No tags returned for preferred service")
|
||||
relationships = None
|
||||
relationship_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if include_relationships and hash_hex:
|
||||
try:
|
||||
rel_spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/manage_file_relationships/get_file_relationships",
|
||||
query={"hash": hash_hex},
|
||||
)
|
||||
relationships = client._perform_request(rel_spec)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship lookup failed: {exc}")
|
||||
relationships = None
|
||||
if isinstance(relationships, dict):
|
||||
related_hashes: Set[str] = set()
|
||||
_collect_relationship_hashes(relationships, related_hashes)
|
||||
related_hashes.discard(hash_hex)
|
||||
if related_hashes:
|
||||
try:
|
||||
related_entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
sorted(related_hashes),
|
||||
None,
|
||||
False,
|
||||
True
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship metadata fetch failed: {exc}")
|
||||
else:
|
||||
for rel_entry in related_entries:
|
||||
rel_hash = rel_entry.get("hash")
|
||||
if not isinstance(rel_hash, str):
|
||||
continue
|
||||
rel_summary, rel_tags, _, rel_title, rel_clip = _summarize_hydrus_entry(rel_entry, prefer_service)
|
||||
rel_summary["tags"] = rel_tags
|
||||
if rel_title:
|
||||
rel_summary["title"] = rel_title
|
||||
if rel_clip:
|
||||
rel_summary["clip_time"] = rel_clip
|
||||
relationship_metadata[rel_hash] = rel_summary
|
||||
result: Dict[str, Any] = {
|
||||
"hash": entry.get("hash") or hash_hex,
|
||||
"metadata": summary,
|
||||
"tags": primary_tags,
|
||||
"tag_service_key": service_key,
|
||||
"title": title,
|
||||
"clip_time": clip_time,
|
||||
"duration": duration,
|
||||
"filesize_mb": filesize_mb,
|
||||
"is_video": is_video,
|
||||
"is_audio": is_audio,
|
||||
"is_deleted": is_deleted,
|
||||
"is_local": is_local,
|
||||
"has_current_file_service": has_current_file_service,
|
||||
"matched_hash": entry.get("hash") or hash_hex,
|
||||
"swap_recommended": False,
|
||||
}
|
||||
file_id_value = entry.get("file_id")
|
||||
if isinstance(file_id_value, (int, float)):
|
||||
result["file_id"] = int(file_id_value)
|
||||
if relationships is not None:
|
||||
result["relationships"] = relationships
|
||||
if relationship_metadata:
|
||||
result["relationship_metadata"] = relationship_metadata
|
||||
if warnings_list:
|
||||
result["warnings"] = warnings_list
|
||||
return result
|
||||
|
||||
|
||||
def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
raw_url = payload.get("url") or payload.get("source_url")
|
||||
url = str(raw_url or "").strip()
|
||||
if not url:
|
||||
raise ValueError("URL is required to fetch Hydrus metadata by URL")
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
file_ids: Optional[List[int]] = None
|
||||
matched_url = None
|
||||
normalised_reported = None
|
||||
seen: Set[str] = set()
|
||||
queue = deque()
|
||||
for variant in _generate_hydrus_url_variants(url):
|
||||
queue.append(variant)
|
||||
if not queue:
|
||||
queue.append(url)
|
||||
tried_variants: List[str] = []
|
||||
while queue:
|
||||
candidate = queue.popleft()
|
||||
candidate = str(candidate or "").strip()
|
||||
if not candidate or candidate in seen:
|
||||
continue
|
||||
seen.add(candidate)
|
||||
tried_variants.append(candidate)
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/add_urls/get_url_files",
|
||||
query={"url": candidate},
|
||||
)
|
||||
try:
|
||||
response = client._perform_request(spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
response_hashes_list: List[str] = []
|
||||
response_file_ids_list: List[int] = []
|
||||
if isinstance(response, dict):
|
||||
normalised_value = response.get("normalised_url")
|
||||
if isinstance(normalised_value, str):
|
||||
trimmed = normalised_value.strip()
|
||||
if trimmed:
|
||||
normalised_reported = normalised_reported or trimmed
|
||||
if trimmed not in seen:
|
||||
queue.append(trimmed)
|
||||
for redirect_key in ("redirect_url", "url"):
|
||||
redirect_value = response.get(redirect_key)
|
||||
if isinstance(redirect_value, str):
|
||||
redirect_trimmed = redirect_value.strip()
|
||||
if redirect_trimmed and redirect_trimmed not in seen:
|
||||
queue.append(redirect_trimmed)
|
||||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||||
if isinstance(raw_hashes, list):
|
||||
for item in raw_hashes:
|
||||
try:
|
||||
normalized = _normalize_hash(item)
|
||||
except ValueError:
|
||||
continue
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
raw_ids = response.get("file_ids") or response.get("file_id")
|
||||
if isinstance(raw_ids, list):
|
||||
for item in raw_ids:
|
||||
try:
|
||||
response_file_ids_list.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_ids is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(raw_ids))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
statuses = response.get("url_file_statuses")
|
||||
if isinstance(statuses, list):
|
||||
for entry in statuses:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
status_hash = entry.get("hash") or entry.get("file_hash")
|
||||
if status_hash:
|
||||
try:
|
||||
normalized = _normalize_hash(status_hash)
|
||||
except ValueError:
|
||||
normalized = None
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
status_id = entry.get("file_id") or entry.get("fileid")
|
||||
if status_id is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(status_id))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if not hashes and response_hashes_list:
|
||||
hashes = response_hashes_list
|
||||
if not file_ids and response_file_ids_list:
|
||||
file_ids = response_file_ids_list
|
||||
if hashes or file_ids:
|
||||
matched_url = candidate
|
||||
break
|
||||
if not hashes and not file_ids:
|
||||
raise RuntimeError(
|
||||
"No Hydrus matches for URL variants: "
|
||||
+ ", ".join(tried_variants)
|
||||
)
|
||||
followup_payload = {
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"hash": hashes[0] if hashes else None,
|
||||
"file_ids": file_ids,
|
||||
"options": {"timeout": timeout, "minimal": True},
|
||||
}
|
||||
result = fetch_hydrus_metadata(followup_payload)
|
||||
result["matched_url"] = matched_url or url
|
||||
result["normalised_url"] = normalised_reported or matched_url or url
|
||||
result["tried_urls"] = tried_variants
|
||||
return result
|
||||
|
||||
|
||||
def _build_hydrus_context(payload: Dict[str, Any]) -> Tuple["HydrusNetwork", str, str, float, Optional[str]]:
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or payload.get("timeout") or 60.0)
|
||||
prefer_service = payload.get("prefer_service_name") or options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip() or None
|
||||
else:
|
||||
prefer_service = None
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
return client, base_url, access_key, timeout, prefer_service
|
||||
|
||||
|
||||
def _refetch_hydrus_summary(
|
||||
base_url: str,
|
||||
access_key: str,
|
||||
hash_hex: str,
|
||||
timeout: float,
|
||||
prefer_service: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"options": {
|
||||
"minimal": True,
|
||||
"include_relationships": False,
|
||||
"timeout": timeout,
|
||||
},
|
||||
}
|
||||
if prefer_service:
|
||||
payload["options"]["prefer_service_name"] = prefer_service
|
||||
return fetch_hydrus_metadata(payload)
|
||||
|
||||
|
||||
def apply_hydrus_tag_mutation(
|
||||
payload: Dict[str, Any],
|
||||
add: Iterable[Any],
|
||||
remove: Iterable[Any]
|
||||
) -> Dict[str, Any]:
|
||||
client, base_url, access_key, timeout, prefer_service = _build_hydrus_context(payload)
|
||||
hash_hex = _normalize_hash(payload.get("hash"))
|
||||
add_list = [_normalize_tag(tag) for tag in add if _normalize_tag(tag)]
|
||||
remove_list = [_normalize_tag(tag) for tag in remove if _normalize_tag(tag)]
|
||||
if not add_list and not remove_list:
|
||||
raise ValueError("No tag changes supplied")
|
||||
service_key = payload.get("service_key") or payload.get("tag_service_key")
|
||||
summary = None
|
||||
if not service_key:
|
||||
summary = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
service_key = summary.get("tag_service_key")
|
||||
if not isinstance(service_key, str) or not service_key:
|
||||
raise RuntimeError("Unable to determine Hydrus tag service key")
|
||||
actions: Dict[str, List[str]] = {}
|
||||
if add_list:
|
||||
actions["0"] = [tag for tag in add_list if tag]
|
||||
if remove_list:
|
||||
actions["1"] = [tag for tag in remove_list if tag]
|
||||
if not actions:
|
||||
raise ValueError("Tag mutation produced no actionable changes")
|
||||
request_payload = {
|
||||
"hashes": [hash_hex],
|
||||
"service_keys_to_actions_to_tags": {
|
||||
service_key: actions,
|
||||
},
|
||||
}
|
||||
try:
|
||||
tag_spec = HydrusRequestSpec(
|
||||
method="POST",
|
||||
endpoint="/add_tags/add_tags",
|
||||
data=request_payload,
|
||||
)
|
||||
client._perform_request(tag_spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
summary_after = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
result = dict(summary_after)
|
||||
result["added_tags"] = actions.get("0", [])
|
||||
result["removed_tags"] = actions.get("1", [])
|
||||
result["tag_service_key"] = summary_after.get("tag_service_key")
|
||||
return result
|
||||
|
||||
220
API/cmdlet.py
Normal file
220
API/cmdlet.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence
|
||||
|
||||
from SYS import pipeline as ctx
|
||||
from SYS.models import PipelineStageContext
|
||||
from SYS.rich_display import capture_rich_output
|
||||
|
||||
|
||||
CmdletCallable = Callable[[Any, Sequence[str], Dict[str, Any]], int]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CmdletRunResult:
|
||||
"""Programmatic result for a single cmdlet invocation."""
|
||||
|
||||
name: str
|
||||
args: Sequence[str]
|
||||
exit_code: int = 0
|
||||
emitted: List[Any] = field(default_factory=list)
|
||||
|
||||
# Best-effort: cmdlets can publish tables/items via pipeline state even when
|
||||
# they don't emit pipeline items.
|
||||
result_table: Optional[Any] = None
|
||||
result_items: List[Any] = field(default_factory=list)
|
||||
result_subject: Optional[Any] = None
|
||||
|
||||
stdout: str = ""
|
||||
stderr: str = ""
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
def _normalize_cmd_name(name: str) -> str:
|
||||
return str(name or "").replace("_", "-").strip().lower()
|
||||
|
||||
|
||||
def resolve_cmdlet(cmd_name: str) -> Optional[CmdletCallable]:
|
||||
"""Resolve a cmdlet callable by name from the registry (aliases supported)."""
|
||||
try:
|
||||
from SYS.cmdlet_catalog import ensure_registry_loaded
|
||||
|
||||
ensure_registry_loaded()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
import cmdlet as cmdlet_pkg
|
||||
|
||||
return cmdlet_pkg.get(cmd_name)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def run_cmdlet(
|
||||
cmd: str | CmdletCallable,
|
||||
args: Sequence[str] | None,
|
||||
config: Dict[str, Any],
|
||||
*,
|
||||
piped: Any = None,
|
||||
isolate: bool = True,
|
||||
capture_output: bool = True,
|
||||
stage_index: int = 0,
|
||||
total_stages: int = 1,
|
||||
pipe_index: Optional[int] = None,
|
||||
worker_id: Optional[str] = None,
|
||||
) -> CmdletRunResult:
|
||||
"""Run a single cmdlet programmatically and return structured results.
|
||||
|
||||
This is intended for TUI/webapp consumers that want cmdlet behavior without
|
||||
going through the interactive CLI loop.
|
||||
|
||||
Notes:
|
||||
- When `isolate=True` (default) this runs inside `ctx.new_pipeline_state()` so
|
||||
global CLI pipeline state is not mutated.
|
||||
- Output capturing covers both normal `print()` and Rich output via
|
||||
`capture_rich_output()`.
|
||||
"""
|
||||
|
||||
normalized_args: Sequence[str] = list(args or [])
|
||||
|
||||
if isinstance(cmd, str):
|
||||
name = _normalize_cmd_name(cmd)
|
||||
cmd_fn = resolve_cmdlet(name)
|
||||
else:
|
||||
name = getattr(cmd, "__name__", "cmdlet")
|
||||
cmd_fn = cmd
|
||||
|
||||
result = CmdletRunResult(name=name, args=normalized_args)
|
||||
|
||||
if not callable(cmd_fn):
|
||||
result.exit_code = 1
|
||||
result.error = f"Unknown command: {name}"
|
||||
result.stderr = result.error
|
||||
return result
|
||||
|
||||
stage_ctx = PipelineStageContext(
|
||||
stage_index=int(stage_index),
|
||||
total_stages=int(total_stages),
|
||||
pipe_index=pipe_index,
|
||||
worker_id=worker_id,
|
||||
)
|
||||
|
||||
stdout_buffer = io.StringIO()
|
||||
stderr_buffer = io.StringIO()
|
||||
|
||||
stage_text = " ".join([name, *list(normalized_args)]).strip()
|
||||
|
||||
state_cm = ctx.new_pipeline_state() if isolate else contextlib.nullcontext()
|
||||
|
||||
with state_cm:
|
||||
# Keep behavior predictable: start from a clean slate.
|
||||
try:
|
||||
ctx.reset()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
ctx.set_stage_context(stage_ctx)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
ctx.set_current_cmdlet_name(name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
ctx.set_current_stage_text(stage_text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
ctx.set_current_command_text(stage_text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
run_cm = (
|
||||
capture_rich_output(stdout=stdout_buffer, stderr=stderr_buffer)
|
||||
if capture_output
|
||||
else contextlib.nullcontext()
|
||||
)
|
||||
with run_cm:
|
||||
with (
|
||||
contextlib.redirect_stdout(stdout_buffer)
|
||||
if capture_output
|
||||
else contextlib.nullcontext()
|
||||
):
|
||||
with (
|
||||
contextlib.redirect_stderr(stderr_buffer)
|
||||
if capture_output
|
||||
else contextlib.nullcontext()
|
||||
):
|
||||
result.exit_code = int(cmd_fn(piped, list(normalized_args), config))
|
||||
except Exception as exc:
|
||||
result.exit_code = 1
|
||||
result.error = f"{type(exc).__name__}: {exc}"
|
||||
finally:
|
||||
result.stdout = stdout_buffer.getvalue()
|
||||
result.stderr = stderr_buffer.getvalue()
|
||||
|
||||
# Prefer cmdlet emits (pipeline semantics).
|
||||
try:
|
||||
result.emitted = list(stage_ctx.emits or [])
|
||||
except Exception:
|
||||
result.emitted = []
|
||||
|
||||
# Mirror CLI behavior: if cmdlet emitted items and there is no overlay table,
|
||||
# make emitted items the last result items for downstream consumers.
|
||||
try:
|
||||
has_overlay = bool(ctx.get_display_table())
|
||||
except Exception:
|
||||
has_overlay = False
|
||||
|
||||
if result.emitted and not has_overlay:
|
||||
try:
|
||||
ctx.set_last_result_items_only(list(result.emitted))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Best-effort snapshot of visible results.
|
||||
try:
|
||||
result.result_table = (
|
||||
ctx.get_display_table() or ctx.get_current_stage_table() or ctx.get_last_result_table()
|
||||
)
|
||||
except Exception:
|
||||
result.result_table = None
|
||||
|
||||
try:
|
||||
result.result_items = list(ctx.get_last_result_items() or [])
|
||||
except Exception:
|
||||
result.result_items = []
|
||||
|
||||
try:
|
||||
result.result_subject = ctx.get_last_result_subject()
|
||||
except Exception:
|
||||
result.result_subject = None
|
||||
|
||||
# Cleanup stage-local markers.
|
||||
try:
|
||||
ctx.clear_current_stage_text()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
ctx.clear_current_cmdlet_name()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
ctx.clear_current_command_text()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
ctx.set_stage_context(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user