This commit is contained in:
nose
2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions

View File

@@ -6,6 +6,7 @@ import sys
import shutil
import tempfile
import re
from urllib.parse import urlsplit, parse_qs
import models
import pipeline as ctx
@@ -13,12 +14,20 @@ from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
from Store import Store
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
)
from ._shared import collapse_namespace_tag
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
SharedArgs = sh.SharedArgs
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
merge_sequences = sh.merge_sequences
extract_relationships = sh.extract_relationships
extract_duration = sh.extract_duration
coerce_to_pipe_object = sh.coerce_to_pipe_object
collapse_namespace_tag = sh.collapse_namespace_tag
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
from metadata import write_metadata
@@ -181,7 +190,7 @@ class Add_File(Cmdlet):
downloaded_path = Path(downloaded)
if downloaded_path.exists() and downloaded_path.is_dir():
log(
"[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
"[add-file] OpenLibrary download produced a directory (PDF conversion failed). Cannot ingest.",
file=sys.stderr,
)
failures += 1
@@ -195,12 +204,32 @@ class Add_File(Cmdlet):
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
if code == 0:
successes += 1
else:
failures += 1
continue
# Hydrus file URLs are direct file downloads and may require Hydrus auth headers.
# If the user provided a destination (-provider or -store), download now and continue.
if (provider_name or location) and isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://")):
downloaded = self._try_download_hydrus_file_url(
file_url=str(media_path_or_url),
pipe_obj=pipe_obj,
config=config,
)
if downloaded is not None:
downloaded_path, downloaded_temp_dir = downloaded
temp_dir_to_cleanup = downloaded_temp_dir
media_path_or_url = str(downloaded_path)
pipe_obj.path = str(downloaded_path)
pipe_obj.is_temp = True
delete_after_item = True
# If it's still a URL target, fall back to the legacy delegate.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
if code == 0:
successes += 1
else:
failures += 1
continue
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
@@ -767,6 +796,134 @@ class Add_File(Cmdlet):
return True
return False
@staticmethod
def _sanitize_filename(value: str) -> str:
# Minimal Windows-safe filename sanitization.
text = str(value or "").strip()
if not text:
return "file"
invalid = '<>:"/\\|?*'
text = "".join("_" if (ch in invalid or ord(ch) < 32) else ch for ch in text)
text = re.sub(r"\s+", " ", text).strip(" .")
return text or "file"
@staticmethod
def _parse_hydrus_file_url(file_url: str) -> Optional[str]:
"""Return the sha256 hash from a Hydrus /get_files/file URL, or None."""
try:
split = urlsplit(str(file_url))
if split.scheme.lower() not in {"http", "https"}:
return None
path_lower = (split.path or "").lower()
if "/get_files/file" not in path_lower:
return None
params = parse_qs(split.query or "")
raw = None
if "hash" in params and params["hash"]:
raw = params["hash"][0]
if not raw:
return None
hash_val = str(raw).strip().lower()
if not re.fullmatch(r"[0-9a-f]{64}", hash_val):
return None
return hash_val
except Exception:
return None
def _try_download_hydrus_file_url(
self,
*,
file_url: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Optional[tuple[Path, Path]]:
"""If *file_url* is a Hydrus file URL, download it to temp and return (path, temp_dir)."""
file_hash = self._parse_hydrus_file_url(file_url)
if not file_hash:
return None
# Resolve Hydrus backend for auth.
store_name = str(getattr(pipe_obj, "store", "") or "").strip()
if ":" in store_name:
store_name = store_name.split(":", 1)[-1].strip()
backend = None
try:
store_registry = Store(config)
if store_name and store_registry.is_available(store_name):
candidate = store_registry[store_name]
if type(candidate).__name__.lower() == "hydrusnetwork":
backend = candidate
except Exception:
backend = None
if backend is None:
try:
store_registry = Store(config)
target_prefix = str(file_url).split("/get_files/file", 1)[0].rstrip("/")
for backend_name in store_registry.list_backends():
candidate = store_registry[backend_name]
if type(candidate).__name__.lower() != "hydrusnetwork":
continue
base_url = str(getattr(candidate, "URL", "") or "").rstrip("/")
if base_url and (target_prefix.lower() == base_url.lower() or target_prefix.lower().startswith(base_url.lower())):
backend = candidate
break
except Exception:
backend = None
if backend is None:
debug("[add-file] Hydrus file URL detected but no Hydrus backend matched for auth")
return None
api_key = str(getattr(backend, "API", "") or "").strip()
if not api_key:
debug(f"[add-file] Hydrus backend '{getattr(backend, 'NAME', '') or store_name}' missing API key")
return None
# Best-effort filename from title + ext.
ext = ""
try:
if isinstance(pipe_obj.extra, dict):
ext = str(pipe_obj.extra.get("ext") or "").strip().lstrip(".")
except Exception:
ext = ""
if not ext:
ext = "bin"
title_hint = str(getattr(pipe_obj, "title", "") or "").strip()
base_name = self._sanitize_filename(title_hint) if title_hint else f"hydrus_{file_hash[:12]}"
temp_dir = Path(tempfile.mkdtemp(prefix="medios_hydrus_"))
destination = unique_path(temp_dir / f"{base_name}.{ext}")
headers = {"Hydrus-Client-API-Access-Key": api_key}
timeout = 60.0
try:
client = getattr(backend, "_client", None)
timeout_val = getattr(client, "timeout", None)
if timeout_val is not None:
timeout = float(timeout_val)
except Exception:
timeout = 60.0
try:
log(
f"[add-file] Downloading Hydrus file via API ({getattr(backend, 'NAME', '') or store_name})",
file=sys.stderr,
)
downloaded_bytes = hydrus_wrapper.download_hydrus_file(str(file_url), headers, destination, timeout)
if downloaded_bytes <= 0 and not destination.exists():
return None
return destination, temp_dir
except Exception as exc:
log(f"[add-file] Hydrus download failed: {exc}", file=sys.stderr)
try:
shutil.rmtree(temp_dir, ignore_errors=True)
except Exception:
pass
return None
def _delegate_to_download_data(
self,
result: Any,
@@ -883,6 +1040,61 @@ class Add_File(Cmdlet):
except Exception:
return None
@staticmethod
def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]:
"""Extract a named note text from a piped item.
Supports:
- pipe_obj.extra["notes"][note_name]
- result["notes"][note_name] for dict results
- pipe_obj.extra[note_name] / result[note_name] as fallback
"""
def _normalize(val: Any) -> Optional[str]:
if val is None:
return None
if isinstance(val, bytes):
try:
val = val.decode("utf-8", errors="ignore")
except Exception:
val = str(val)
if isinstance(val, str):
text = val.strip()
return text if text else None
try:
text = str(val).strip()
return text if text else None
except Exception:
return None
note_key = str(note_name or "").strip()
if not note_key:
return None
# Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
try:
if isinstance(pipe_obj.extra, dict):
notes_val = pipe_obj.extra.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in pipe_obj.extra:
return _normalize(pipe_obj.extra.get(note_key))
except Exception:
pass
# Fallback to raw result dict
if isinstance(result, dict):
try:
notes_val = result.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in result:
return _normalize(result.get(note_key))
except Exception:
pass
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
@@ -1451,6 +1663,26 @@ class Add_File(Cmdlet):
except Exception:
pass
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
sub_note = Add_File._get_note_text(result, pipe_obj, "sub")
if sub_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "sub", sub_note)
except Exception:
pass
chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
if chapters_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "chapters", chapters_note)
except Exception:
pass
meta: Dict[str, Any] = {}
try:
meta = backend.get_metadata(resolved_hash) or {}

View File

@@ -7,15 +7,15 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
should_show_help = sh.should_show_help
from Store import Store
from SYS.utils import sha256_file
@@ -84,9 +84,9 @@ class Add_Note(Cmdlet):
else:
note_text = str(text_parts or "").strip()
if not note_text:
log("[add_note] Error: Empty note text", file=sys.stderr)
return 1
# Note text can be omitted when upstream stages provide it (e.g. download-media --write-sub
# attaches notes.sub). In that case we resolve per-item below.
user_provided_text = bool(note_text)
results = normalize_result_input(result)
if not results:
@@ -99,11 +99,56 @@ class Add_Note(Cmdlet):
store_registry = Store(config)
updated = 0
# Optional global fallback for note text from pipeline values.
# Allows patterns like: ... | add-note sub
pipeline_default_text = None
if not user_provided_text:
try:
pipeline_default_text = ctx.load_value(note_name)
except Exception:
pipeline_default_text = None
if isinstance(pipeline_default_text, list):
pipeline_default_text = " ".join([str(x) for x in pipeline_default_text]).strip()
elif pipeline_default_text is not None:
pipeline_default_text = str(pipeline_default_text).strip()
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
# Resolve note text for this item when not provided explicitly.
item_note_text = note_text
if not user_provided_text:
# Prefer item-scoped notes dict.
candidate = None
try:
notes = res.get("notes")
if isinstance(notes, dict):
candidate = notes.get(note_name)
except Exception:
candidate = None
# Also allow direct field fallback: res["sub"], etc.
if candidate is None:
try:
candidate = res.get(note_name)
except Exception:
candidate = None
if candidate is None:
candidate = pipeline_default_text
if isinstance(candidate, list):
item_note_text = " ".join([str(x) for x in candidate]).strip()
else:
item_note_text = str(candidate or "").strip()
if not item_note_text:
log(f"[add_note] Warning: No note text found for '{note_name}'; skipping", file=sys.stderr)
ctx.emit(res)
continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
@@ -130,7 +175,7 @@ class Add_Note(Cmdlet):
ok = False
try:
ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
ok = bool(backend.set_note(resolved_hash, note_name, item_note_text, config=config))
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
ok = False

View File

@@ -11,7 +11,15 @@ from SYS.logger import log
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
should_show_help = sh.should_show_help
get_field = sh.get_field
from API.folder import read_sidecar, find_sidecar, API_folder_store
from Store import Store

View File

@@ -8,19 +8,20 @@ from SYS.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_tag_arguments,
expand_tag_groups,
parse_cmdlet_args,
collapse_namespace_tag,
should_show_help,
get_field,
)
from . import _shared as sh
normalize_result_input = sh.normalize_result_input
filter_results_by_temp = sh.filter_results_by_temp
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_tag_arguments = sh.parse_tag_arguments
expand_tag_groups = sh.expand_tag_groups
parse_cmdlet_args = sh.parse_cmdlet_args
collapse_namespace_tag = sh.collapse_namespace_tag
should_show_help = sh.should_show_help
get_field = sh.get_field
from Store import Store
from SYS.utils import sha256_file

View File

@@ -8,19 +8,20 @@ from SYS.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_tag_arguments,
expand_tag_groups,
parse_cmdlet_args,
collapse_namespace_tags,
should_show_help,
get_field,
)
from . import _shared as sh
normalize_result_input = sh.normalize_result_input
filter_results_by_temp = sh.filter_results_by_temp
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_tag_arguments = sh.parse_tag_arguments
expand_tag_groups = sh.expand_tag_groups
parse_cmdlet_args = sh.parse_cmdlet_args
collapse_namespace_tags = sh.collapse_namespace_tags
should_show_help = sh.should_show_help
get_field = sh.get_field
from Store import Store
from SYS.utils import sha256_file

View File

@@ -4,12 +4,12 @@ from typing import Any, Dict, Sequence
import sys
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from . import _shared as sh
from SYS.logger import log
from Store import Store
class Add_Url(Cmdlet):
class Add_Url(sh.Cmdlet):
"""Add URL associations to files via hash+store."""
def __init__(self) -> None:
@@ -18,9 +18,9 @@ class Add_Url(Cmdlet):
summary="Associate a URL with a file",
usage="@1 | add-url <url>",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to associate"),
sh.SharedArgs.HASH,
sh.SharedArgs.STORE,
sh.CmdletArg("url", required=True, description="URL to associate"),
],
detail=[
"- Associates URL with file identified by hash+store",
@@ -32,11 +32,11 @@ class Add_Url(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
parsed = sh.parse_cmdlet_args(args, self)
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
@@ -52,7 +52,7 @@ class Add_Url(Cmdlet):
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1

View File

@@ -1,215 +0,0 @@
from __future__ import annotations
from importlib import import_module
from typing import Any, Dict, List, Optional
try:
from cmdlet import REGISTRY
except Exception:
REGISTRY = {} # type: ignore
try:
from cmdnat import register_native_commands as _register_native_commands
except Exception:
_register_native_commands = None
def ensure_registry_loaded() -> None:
"""Ensure native commands are registered into REGISTRY (idempotent)."""
if _register_native_commands and REGISTRY is not None:
try:
_register_native_commands(REGISTRY)
except Exception:
pass
def _normalize_mod_name(mod_name: str) -> str:
"""Normalize a command/module name for import resolution."""
normalized = (mod_name or "").strip()
if normalized.startswith('.'):
normalized = normalized.lstrip('.')
normalized = normalized.replace('-', '_')
return normalized
def import_cmd_module(mod_name: str):
"""Import a cmdlet/native module from cmdnat or cmdlet packages."""
normalized = _normalize_mod_name(mod_name)
if not normalized:
return None
for package in ("cmdnat", "cmdlet", None):
try:
qualified = f"{package}.{normalized}" if package else normalized
return import_module(qualified)
except ModuleNotFoundError:
continue
except Exception:
continue
return None
def _normalize_arg(arg: Any) -> Dict[str, Any]:
"""Convert a CmdletArg/dict into a plain metadata dict."""
if isinstance(arg, dict):
name = arg.get("name", "")
return {
"name": str(name).lstrip("-"),
"type": arg.get("type", "string"),
"required": bool(arg.get("required", False)),
"description": arg.get("description", ""),
"choices": arg.get("choices", []) or [],
"alias": arg.get("alias", ""),
"variadic": arg.get("variadic", False),
}
name = getattr(arg, "name", "") or ""
return {
"name": str(name).lstrip("-"),
"type": getattr(arg, "type", "string"),
"required": bool(getattr(arg, "required", False)),
"description": getattr(arg, "description", ""),
"choices": getattr(arg, "choices", []) or [],
"alias": getattr(arg, "alias", ""),
"variadic": getattr(arg, "variadic", False),
}
def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
ensure_registry_loaded()
normalized = cmd_name.replace("-", "_")
mod = import_cmd_module(normalized)
data = getattr(mod, "CMDLET", None) if mod else None
if data is None:
try:
reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
if reg_fn:
owner_mod = getattr(reg_fn, "__module__", "")
if owner_mod:
owner = import_module(owner_mod)
data = getattr(owner, "CMDLET", None)
except Exception:
data = None
if not data:
return None
if hasattr(data, "to_dict"):
base = data.to_dict()
elif isinstance(data, dict):
base = data
else:
base = {}
name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name
aliases = getattr(data, "alias", base.get("alias", [])) or []
usage = getattr(data, "usage", base.get("usage", ""))
summary = getattr(data, "summary", base.get("summary", ""))
details = getattr(data, "detail", base.get("detail", [])) or []
args_list = getattr(data, "arg", base.get("arg", [])) or []
args = [_normalize_arg(arg) for arg in args_list]
return {
"name": str(name).replace("_", "-").lower(),
"aliases": [str(a).replace("_", "-").lower() for a in aliases if a],
"usage": usage,
"summary": summary,
"details": details,
"args": args,
"raw": data,
}
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
"""Collect metadata for all registered cmdlet keyed by canonical name."""
ensure_registry_loaded()
entries: Dict[str, Dict[str, Any]] = {}
for reg_name in (REGISTRY or {}).keys():
meta = get_cmdlet_metadata(reg_name)
canonical = str(reg_name).replace("_", "-").lower()
if meta:
canonical = meta.get("name", canonical)
aliases = meta.get("aliases", [])
base = entries.get(
canonical,
{
"name": canonical,
"aliases": [],
"usage": "",
"summary": "",
"details": [],
"args": [],
"raw": meta.get("raw"),
},
)
merged_aliases = set(base.get("aliases", [])) | set(aliases)
if canonical != reg_name:
merged_aliases.add(reg_name)
base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical)
if not base.get("usage") and meta.get("usage"):
base["usage"] = meta["usage"]
if not base.get("summary") and meta.get("summary"):
base["summary"] = meta["summary"]
if not base.get("details") and meta.get("details"):
base["details"] = meta["details"]
if not base.get("args") and meta.get("args"):
base["args"] = meta["args"]
if not base.get("raw"):
base["raw"] = meta.get("raw")
entries[canonical] = base
else:
entries.setdefault(
canonical,
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
)
return entries
def list_cmdlet_names(include_aliases: bool = True) -> List[str]:
"""Return sorted cmdlet names (optionally including aliases)."""
ensure_registry_loaded()
entries = list_cmdlet_metadata()
names = set()
for meta in entries.values():
names.add(meta.get("name", ""))
if include_aliases:
for alias in meta.get("aliases", []):
names.add(alias)
return sorted(n for n in names if n)
def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
"""Return flag variants for cmdlet arguments (e.g., -name/--name)."""
meta = get_cmdlet_metadata(cmd_name)
if not meta:
return []
# Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
# completions feel stable and predictable.
flags: List[str] = []
seen: set[str] = set()
for arg in meta.get("args", []):
name = str(arg.get("name") or "").strip().lstrip("-")
if not name:
continue
for candidate in (f"-{name}", f"--{name}"):
if candidate not in seen:
flags.append(candidate)
seen.add(candidate)
return flags
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
"""Return declared choices for a cmdlet argument."""
meta = get_cmdlet_metadata(cmd_name)
if not meta:
return []
target = arg_name.lstrip("-")
for arg in meta.get("args", []):
if arg.get("name") == target:
return list(arg.get("choices", []) or [])
return []

View File

@@ -1,190 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from SYS.logger import log
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
from Store import Store
CMDLET = Cmdlet(
name="check-file-status",
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
usage="check-file-status [-hash <sha256>] [-store <name>]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Shows whether file is active in Hydrus or marked as deleted",
"- Detects corrupted data (e.g., comma-separated url)",
"- Displays file metadata and service locations",
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
override_hash: str | None = None
override_store: str | None = None
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args):
override_store = str(args[i + 1]).strip()
i += 2
continue
i += 1
store_name: str | None = override_store
if not store_name:
if isinstance(result, dict):
store_name = str(result.get("store") or "").strip() or None
else:
store_name = str(getattr(result, "store", "") or "").strip() or None
if override_hash:
hash_hex = normalize_hash(override_hash)
else:
if isinstance(result, dict):
hash_hex = normalize_hash(result.get("hash") or result.get("hash_hex"))
else:
hash_hex = normalize_hash(getattr(result, "hash", None) or getattr(result, "hash_hex", None))
if not hash_hex:
log("No hash provided and no result selected", file=sys.stderr)
return 1
try:
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "fetch_file_metadata"):
client = candidate
except Exception:
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return 1
else:
client = hydrus_wrapper.get_client(config)
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
try:
result_data = client.fetch_file_metadata(hashes=[hash_hex])
if not result_data.get("metadata"):
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
return 1
file_info = result_data["metadata"][0]
# Status summary
is_deleted = file_info.get("is_deleted", False)
is_local = file_info.get("is_local", False)
is_trashed = file_info.get("is_trashed", False)
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
log(f"File status: {status_str}", file=sys.stderr)
# File info
log(f"\n📄 File Information:", file=sys.stderr)
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
log(f" MIME: {file_info['mime']}", file=sys.stderr)
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
# Service status
file_services = file_info.get("file_services", {})
current_services = file_services.get("current", {})
deleted_services = file_services.get("deleted", {})
if current_services:
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
for service_key, service_info in current_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
log(f" - {sname} ({stype})", file=sys.stderr)
if deleted_services:
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
for service_key, service_info in deleted_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
time_deleted = service_info.get("time_deleted", "?")
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
# URL check
url = file_info.get("url", [])
log(f"\n🔗 url ({len(url)}):", file=sys.stderr)
corrupted_count = 0
for i, url in enumerate(url, 1):
if "," in url:
corrupted_count += 1
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
else:
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
if corrupted_count > 0:
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
# Tags
tags_dict = file_info.get("tags", {})
total_tags = 0
for service_key, service_data in tags_dict.items():
service_name = service_data.get("name", "unknown")
display_tags = service_data.get("display_tags", {}).get("0", [])
total_tags += len(display_tags)
if total_tags > 0:
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
for service_key, service_data in tags_dict.items():
display_tags = service_data.get("display_tags", {}).get("0", [])
if display_tags:
service_name = service_data.get("name", "unknown")
log(f" {service_name}:", file=sys.stderr)
for tag in display_tags[:5]: # Show first 5
log(f" - {tag}", file=sys.stderr)
if len(display_tags) > 5:
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
log("\n", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error checking file status: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.alias = ["check-status", "file-status", "status"]
CMDLET.register()

View File

@@ -1,105 +0,0 @@
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
then emits the remaining non-temporary results for further pipeline stages.
"""
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import sys
import json
from SYS.logger import log
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
import models
import pipeline as pipeline_context
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Remove temporary files from pipeline results.
Accepts:
- Single result object with is_temp field
- List of result objects to clean up
Process:
- Filters results by is_temp=True
- Deletes those files from disk
- Emits only non-temporary results
Typical pipeline usage:
download-data url | screen-shot | add-tag -store local "tag" --all | cleanup
"""
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Normalize input to list
results = normalize_result_input(result)
if not results:
log("[cleanup] No results to process", file=sys.stderr)
return 1
# Separate temporary and permanent results
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
# Delete temporary files
deleted_count = 0
for temp_result in temp_results:
try:
file_path = get_pipe_object_path(temp_result)
if file_path:
path_obj = Path(file_path)
if path_obj.exists():
# Delete the file
path_obj.unlink()
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
deleted_count += 1
# Clean up any associated sidecar files
for ext in ['.tag', '.metadata']:
sidecar = path_obj.parent / (path_obj.name + ext)
if sidecar.exists():
try:
sidecar.unlink()
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
else:
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
# Log summary
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
# Emit permanent results for downstream processing
for perm_result in perm_results:
pipeline_context.emit(perm_result)
return 0
CMDLET = Cmdlet(
name="cleanup",
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
usage="cleanup",
arg=[],
detail=[
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
"- Deletes files marked with is_temp=True from disk",
"- Also cleans up associated sidecar files (.tag, .metadata)",
"- Emits only non-temporary results for further processing",
"- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
"- Exit code 0 if cleanup successful, 1 if no results to process",
],
exec=_run,
).register()

View File

@@ -8,12 +8,12 @@ from pathlib import Path
from SYS.logger import debug, log
from Store.Folder import Folder
from Store import Store
from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help
from . import _shared as sh
from API import HydrusNetwork as hydrus_wrapper
import pipeline as ctx
class Delete_File(Cmdlet):
class Delete_File(sh.Cmdlet):
"""Class-based delete-file cmdlet with self-registration."""
def __init__(self) -> None:
@@ -23,10 +23,10 @@ class Delete_File(Cmdlet):
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
alias=["del-file"],
arg=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
CmdletArg("reason", description="Optional reason for deletion (free text)."),
sh.CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
detail=[
"Default removes both the local file and Hydrus file.",
@@ -45,24 +45,28 @@ class Delete_File(Cmdlet):
if isinstance(item, dict):
hash_hex_raw = item.get("hash_hex") or item.get("hash")
target = item.get("target") or item.get("file_path") or item.get("path")
title_val = item.get("title") or item.get("name")
else:
hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash")
target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path")
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
store = None
if isinstance(item, dict):
store = item.get("store")
else:
store = get_field(item, "store")
store = sh.get_field(item, "store")
store_lower = str(store).lower() if store else ""
is_hydrus_store = bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"})
store_label = str(store) if store else "default"
hydrus_prefix = f"[hydrusnetwork:{store_label}]"
# For Hydrus files, the target IS the hash
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
hash_hex = sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
@@ -156,19 +160,28 @@ class Delete_File(Cmdlet):
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
hydrus_deleted = True
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
debug(f"Deleted from Hydrus: {preview}", file=sys.stderr)
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return False
if hydrus_deleted and hash_hex:
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
title_str = str(title_val).strip() if title_val else ""
if reason:
ctx.emit(f"Deleted {preview} (reason: {reason}).")
if title_str:
ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex} (reason: {reason}).")
else:
ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex} (reason: {reason}).")
else:
ctx.emit(f"Deleted {preview}.")
if title_str:
ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}.")
else:
ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex}.")
if hydrus_deleted or local_deleted:
return True
@@ -178,7 +191,7 @@ class Delete_File(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute delete-file command."""
if should_show_help(args):
if sh.should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0

View File

@@ -7,16 +7,16 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
get_field,
should_show_help,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
get_field = sh.get_field
should_show_help = sh.should_show_help
from Store import Store
from SYS.utils import sha256_file

View File

@@ -10,7 +10,16 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_hash, normalize_result_input, get_field, should_show_help
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_hash = sh.normalize_hash
normalize_result_input = sh.normalize_result_input
get_field = sh.get_field
should_show_help = sh.should_show_help
from API.folder import API_folder_store
from Store import Store
from config import get_local_storage_path

View File

@@ -7,7 +7,15 @@ import sys
import models
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_tag_arguments = sh.parse_tag_arguments
should_show_help = sh.should_show_help
get_field = sh.get_field
from SYS.logger import debug, log
from Store import Store

View File

@@ -4,7 +4,16 @@ from typing import Any, Dict, Sequence
import sys
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
sh.parse_cmdlet_args,
sh.get_field,
sh.normalize_hash,
)
from SYS.logger import log
from Store import Store

View File

@@ -17,15 +17,15 @@ from SYS.download import DownloadError, _download_direct_file
from SYS.logger import log, debug
import pipeline as pipeline_context
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
parse_cmdlet_args,
register_url_with_local_library,
coerce_to_pipe_object,
get_field,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
register_url_with_local_library = sh.register_url_with_local_library
coerce_to_pipe_object = sh.coerce_to_pipe_object
get_field = sh.get_field
class Download_File(Cmdlet):
@@ -251,6 +251,13 @@ class Download_File(Cmdlet):
# Fallback: if we have a direct HTTP URL, download it directly
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
# Never download these as "files".
if str(table or "").lower() == "libgen":
low = target.lower()
if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" in low):
log("[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", file=sys.stderr)
continue
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
file_path = None

View File

@@ -38,7 +38,18 @@ from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLog
import pipeline as pipeline_context
from result_table import ResultTable
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
from tool.ytdlp import YtDlpTool
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
create_pipe_object_result = sh.create_pipe_object_result
parse_cmdlet_args = sh.parse_cmdlet_args
register_url_with_local_library = sh.register_url_with_local_library
coerce_to_pipe_object = sh.coerce_to_pipe_object
get_field = sh.get_field
# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
@@ -62,6 +73,136 @@ _EXTRACTOR_CACHE: List[Any] | None = None
_YTDLP_PROGRESS_BAR = ProgressBar()
_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc")
def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]:
"""Format yt-dlp chapter metadata into a stable, note-friendly text.
Output is one chapter per line, e.g.:
00:00 Intro
01:23-02:10 Topic name
"""
try:
chapters = info.get("chapters")
except Exception:
chapters = None
if not isinstance(chapters, list) or not chapters:
return None
rows: List[tuple[int, Optional[int], str]] = []
max_t = 0
for ch in chapters:
if not isinstance(ch, dict):
continue
start_raw = ch.get("start_time")
end_raw = ch.get("end_time")
title_raw = ch.get("title") or ch.get("name") or ch.get("chapter")
try:
start_s = int(float(start_raw))
except Exception:
continue
end_s: Optional[int] = None
try:
if end_raw is not None:
end_s = int(float(end_raw))
except Exception:
end_s = None
title = str(title_raw).strip() if title_raw is not None else ""
rows.append((start_s, end_s, title))
try:
max_t = max(max_t, start_s, end_s or 0)
except Exception:
max_t = max(max_t, start_s)
if not rows:
return None
force_hours = bool(max_t >= 3600)
def _tc(seconds: int) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
if force_hours:
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
return f"{minutes:02d}:{secs:02d}"
lines: List[str] = []
for start_s, end_s, title in sorted(rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2])):
if end_s is not None and end_s > start_s:
prefix = f"{_tc(start_s)}-{_tc(end_s)}"
else:
prefix = _tc(start_s)
line = f"{prefix} {title}".strip()
if line:
lines.append(line)
text = "\n".join(lines).strip()
return text or None
def _best_subtitle_sidecar(media_path: Path) -> Optional[Path]:
"""Find the most likely subtitle sidecar file for a downloaded media file."""
try:
base_dir = media_path.parent
stem = media_path.stem
if not stem:
return None
candidates: List[Path] = []
for p in base_dir.glob(stem + ".*"):
try:
if not p.is_file():
continue
except Exception:
continue
if p.suffix.lower() in _SUBTITLE_EXTS:
candidates.append(p)
if not candidates:
return None
def _rank(path: Path) -> tuple[int, int, float, str]:
name = path.name.lower()
lang_rank = 0 if ".en." in name or name.endswith(".en" + path.suffix.lower()) else 1
ext = path.suffix.lower()
ext_rank_map = {".vtt": 0, ".srt": 1, ".ass": 2, ".ssa": 3, ".lrc": 4}
ext_rank = ext_rank_map.get(ext, 9)
try:
mtime = float(path.stat().st_mtime)
except Exception:
mtime = 0.0
return (lang_rank, ext_rank, -mtime, name)
candidates.sort(key=_rank)
return candidates[0]
except Exception:
return None
def _read_text_file(path: Path, *, max_bytes: int = 1_500_000) -> Optional[str]:
try:
data = path.read_bytes()
except Exception:
return None
if not data:
return None
if len(data) > max_bytes:
data = data[:max_bytes]
try:
return data.decode("utf-8", errors="replace")
except Exception:
try:
return data.decode(errors="replace")
except Exception:
return None
def _ensure_yt_dlp_ready() -> None:
if yt_dlp is not None:
return
@@ -100,16 +241,26 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
ydl_opts["noplaylist"] = True
if playlist_items:
ydl_opts["playlist_items"] = playlist_items
debug(f"Fetching format list for: {url}")
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
debug(f"Fetching format list for: {url}")
info = ydl.extract_info(url, download=False)
formats = info.get("formats", [])
if not formats:
log("No formats available", file=sys.stderr)
return None
result_formats = []
for fmt in formats:
result_formats.append({
if not isinstance(info, dict):
log("No formats available", file=sys.stderr)
return None
formats = info.get("formats") or []
if not isinstance(formats, list) or not formats:
log("No formats available", file=sys.stderr)
return None
result_formats: List[Dict[str, Any]] = []
for fmt in formats:
if not isinstance(fmt, dict):
continue
result_formats.append(
{
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
@@ -122,9 +273,11 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
"filesize": fmt.get("filesize"),
"abr": fmt.get("abr"),
"tbr": fmt.get("tbr"),
})
debug(f"Found {len(result_formats)} available formats")
return result_formats
}
)
debug(f"Found {len(result_formats)} available formats")
return result_formats or None
except Exception as e:
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
return None
@@ -215,6 +368,31 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
cmd = ["yt-dlp"]
if ytdl_options.get("format"):
cmd.extend(["-f", ytdl_options["format"]])
if ytdl_options.get("merge_output_format"):
cmd.extend(["--merge-output-format", str(ytdl_options["merge_output_format"])])
# For CLI downloads, infer chapter/metadata embedding from either legacy flags
# or explicit FFmpegMetadata postprocessor entries.
postprocessors = ytdl_options.get("postprocessors")
want_add_metadata = bool(ytdl_options.get("addmetadata"))
want_embed_chapters = bool(ytdl_options.get("embedchapters"))
if isinstance(postprocessors, list):
for pp in postprocessors:
if not isinstance(pp, dict):
continue
if str(pp.get("key") or "") == "FFmpegMetadata":
want_add_metadata = True
if bool(pp.get("add_chapters", True)):
want_embed_chapters = True
if want_add_metadata:
cmd.append("--add-metadata")
if want_embed_chapters:
cmd.append("--embed-chapters")
if ytdl_options.get("writesubtitles"):
cmd.append("--write-sub")
cmd.append("--write-auto-sub")
cmd.extend(["--sub-format", "vtt"])
if ytdl_options.get("force_keyframes_at_cuts"):
cmd.extend(["--force-keyframes-at-cuts"]) if ytdl_options.get("force_keyframes_at_cuts") else None
cmd.extend(["-o", section_outtmpl])
@@ -258,11 +436,6 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
if opts.cookies_path and opts.cookies_path.is_file():
base_options["cookiefile"] = str(opts.cookies_path)
else:
from hydrus_health_check import get_cookies_file_path # local import
global_cookies = get_cookies_file_path()
if global_cookies:
base_options["cookiefile"] = global_cookies
if opts.no_playlist:
base_options["noplaylist"] = True
@@ -274,6 +447,37 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]
# Optional yt-dlp features
if getattr(opts, "embed_chapters", False):
# Prefer explicit FFmpegMetadata PP so chapter embedding runs even when
# we already specified other postprocessors (e.g. FFmpegExtractAudio).
pps = base_options.get("postprocessors")
if not isinstance(pps, list):
pps = []
already_has_metadata = any(
isinstance(pp, dict) and str(pp.get("key") or "") == "FFmpegMetadata" for pp in pps
)
if not already_has_metadata:
pps.append(
{
"key": "FFmpegMetadata",
"add_metadata": True,
"add_chapters": True,
"add_infojson": "if_exists",
}
)
base_options["postprocessors"] = pps
# Chapter embedding is most reliable in mkv/mp4 containers.
# When merging separate video+audio streams, prefer mkv so mpv sees chapters.
if opts.mode != "audio":
base_options.setdefault("merge_output_format", "mkv")
if getattr(opts, "write_sub", False):
base_options["writesubtitles"] = True
base_options["writeautomaticsub"] = True
base_options["subtitlesformat"] = "vtt"
if opts.clip_sections:
sections: List[str] = []
@@ -410,13 +614,27 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
response = session.get(libgen_url, timeout=10, allow_redirects=True)
final_url = response.url
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
for link in soup.find_all('a'):
href = link.get('href')
if href and 'get.php' in href:
return urljoin(libgen_url, href)
except ImportError:
try:
from lxml import html as lxml_html
except ImportError:
lxml_html = None
if lxml_html is not None:
doc = lxml_html.fromstring(response.content)
for a in doc.xpath("//a[@href]"):
href = str(a.get("href") or "").strip()
if href and "get.php" in href.lower():
return urljoin(final_url, href)
else:
for m in re.finditer(
r"href=[\"\']([^\"\']+)[\"\']",
response.text or "",
flags=re.IGNORECASE,
):
href = str(m.group(1) or "").strip()
if href and "get.php" in href.lower():
return urljoin(final_url, href)
except Exception:
pass
if final_url != libgen_url:
debug(f"LibGen resolved to mirror: {final_url}")
@@ -648,7 +866,7 @@ def _download_direct_file(
raise DownloadError(f"Error downloading file: {exc}") from exc
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, cookiefile: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
@@ -686,12 +904,8 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
"noprogress": True, # No progress bars
}
# Add cookies if available (lazy import to avoid circular dependency)
from hydrus_health_check import get_cookies_file_path # local import
global_cookies = get_cookies_file_path()
if global_cookies:
ydl_opts["cookiefile"] = global_cookies
if cookiefile:
ydl_opts["cookiefile"] = str(cookiefile)
# Add no_playlist option if specified
if no_playlist:
@@ -807,7 +1021,14 @@ def download_media(
debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
probe_result = {"url": opts.url} # Minimal probe result
else:
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15)
probe_cookiefile = None
try:
if opts.cookies_path and opts.cookies_path.is_file():
probe_cookiefile = str(opts.cookies_path)
except Exception:
probe_cookiefile = None
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile)
if probe_result is None:
if not opts.quiet:
@@ -1182,6 +1403,8 @@ class Download_Media(Cmdlet):
try:
debug("Starting download-media")
ytdlp_tool = YtDlpTool(config)
# Parse arguments
parsed = parse_cmdlet_args(args, self)
@@ -1192,7 +1415,6 @@ class Download_Media(Cmdlet):
# If no url provided via args, try to extract from piped result
if not raw_url and result:
from ._shared import get_field
# Handle single result or list of results
results_to_check = result if isinstance(result, list) else [result]
for item in results_to_check:
@@ -1226,6 +1448,10 @@ class Download_Media(Cmdlet):
# Get other options
clip_spec = parsed.get("clip")
# Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them.
embed_chapters = True
write_sub = True
mode = "audio" if parsed.get("audio") else "video"
# Parse clip range(s) if specified
@@ -1379,7 +1605,14 @@ class Download_Media(Cmdlet):
if playlist_items:
return str(requested_url)
try:
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15)
cf = None
try:
cookie_path = ytdlp_tool.resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cf = str(cookie_path)
except Exception:
cf = None
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
if isinstance(pr, dict):
for key in ("webpage_url", "original_url", "url", "requested_url"):
value = pr.get(key)
@@ -1458,7 +1691,14 @@ class Download_Media(Cmdlet):
- selected_urls: Optional[List[str]] (expanded per-entry urls when available)
"""
try:
pr = probe_url(url, no_playlist=False, timeout_seconds=15)
cf = None
try:
cookie_path = ytdlp_tool.resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cf = str(cookie_path)
except Exception:
cf = None
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
except Exception:
pr = None
if not isinstance(pr, dict):
@@ -1685,6 +1925,15 @@ class Download_Media(Cmdlet):
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
format_id = fmt.get("format_id", "")
# If the chosen format is video-only (no audio stream), automatically
# request best audio too so the resulting file has sound.
selection_format_id = format_id
try:
if vcodec != "none" and acodec == "none" and format_id:
selection_format_id = f"{format_id}+ba"
except Exception:
selection_format_id = format_id
# Format size
size_str = ""
@@ -1729,9 +1978,9 @@ class Download_Media(Cmdlet):
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": format_id,
"item_selector": selection_format_id,
},
"_selection_args": ["-format", format_id]
"_selection_args": ["-format", selection_format_id]
}
# Add to results list and table (don't emit - formats should wait for @N selection)
@@ -1778,23 +2027,57 @@ class Download_Media(Cmdlet):
actual_format = playlist_items
actual_playlist_items = None
# Auto-pick best audio format when -audio is used and no explicit format is given.
# For -audio, default to yt-dlp's built-in bestaudio selector.
# This should *not* require interactive format picking.
if mode == "audio" and not actual_format:
chosen = None
formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
if formats:
chosen = _pick_best_audio_format_id(formats)
actual_format = chosen or "bestaudio/best"
actual_format = "bestaudio"
# If no explicit format is provided for video mode, allow a config override.
if mode == "video" and not actual_format:
configured = (ytdlp_tool.default_format("video") or "").strip()
if configured and configured != "bestvideo+bestaudio/best":
actual_format = configured
# If a single format id was chosen and it is video-only, auto-merge best audio.
if (
actual_format
and isinstance(actual_format, str)
and mode != "audio"
and "+" not in actual_format
and "/" not in actual_format
and "[" not in actual_format
and actual_format not in {"best", "bv", "ba", "b"}
):
try:
formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
if formats:
fmt_match = next(
(f for f in formats if str(f.get("format_id", "")) == actual_format),
None,
)
if fmt_match:
vcodec = str(fmt_match.get("vcodec", "none"))
acodec = str(fmt_match.get("acodec", "none"))
if vcodec != "none" and acodec == "none":
debug(
f"Selected video-only format {actual_format}; using {actual_format}+ba for audio"
)
actual_format = f"{actual_format}+ba"
except Exception:
pass
opts = DownloadOptions(
url=url,
mode=mode,
output_dir=final_output_dir,
ytdl_format=actual_format,
cookies_path=ytdlp_tool.resolve_cookiefile(),
clip_sections=clip_sections_spec,
playlist_items=actual_playlist_items,
quiet=quiet_mode,
no_playlist=False,
embed_chapters=embed_chapters,
write_sub=write_sub,
)
# Use timeout wrapper to prevent hanging
@@ -1838,7 +2121,40 @@ class Download_Media(Cmdlet):
# Build PipeObjects first so we can attach cross-clip relationships.
pipe_objects: List[Dict[str, Any]] = []
for downloaded in results_to_emit:
pipe_objects.append(self._build_pipe_object(downloaded, url, opts))
po = self._build_pipe_object(downloaded, url, opts)
# Attach chapter timestamps for downstream consumers (e.g., mpv scripts)
# even if container embedding fails.
try:
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
except Exception:
info = {}
chapters_text = _format_chapters_note(info) if embed_chapters else None
if chapters_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("chapters", chapters_text)
po["notes"] = notes
if write_sub:
try:
media_path = Path(str(po.get("path") or ""))
except Exception:
media_path = None
if media_path is not None and media_path.exists() and media_path.is_file():
sub_path = _best_subtitle_sidecar(media_path)
if sub_path is not None:
sub_text = _read_text_file(sub_path)
if sub_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
pipe_objects.append(po)
# If this is a clip download, decorate titles/tags so the title: tag is clip-based.
# Relationship tags are only added when multiple clips exist.
@@ -1868,6 +2184,95 @@ class Download_Media(Cmdlet):
debug("✓ Downloaded and emitted")
except DownloadError as e:
# Special-case yt-dlp format errors: show a selectable format list table so
# the user can pick a working format_id and continue the pipeline via @N.
cause = getattr(e, "__cause__", None)
detail = ""
try:
detail = str(cause or "")
except Exception:
detail = ""
if "requested format is not available" in (detail or "").lower() and mode != "audio":
formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
if formats:
formats_to_show = formats
table = ResultTable()
table.title = f"Available formats for {url}"
table.set_source_command("download-media", [str(a) for a in (args or [])])
results_list: List[Dict[str, Any]] = []
for idx, fmt in enumerate(formats_to_show, 1):
resolution = fmt.get("resolution", "")
ext = fmt.get("ext", "")
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
format_id = fmt.get("format_id", "")
selection_format_id = format_id
try:
if vcodec != "none" and acodec == "none" and format_id:
selection_format_id = f"{format_id}+ba"
except Exception:
selection_format_id = format_id
size_str = ""
if filesize:
try:
size_mb = float(filesize) / (1024 * 1024)
size_str = f"{size_mb:.1f}MB"
except Exception:
size_str = ""
desc_parts: List[str] = []
if resolution and resolution != "audio only":
desc_parts.append(str(resolution))
if ext:
desc_parts.append(str(ext).upper())
if vcodec != "none":
desc_parts.append(f"v:{vcodec}")
if acodec != "none":
desc_parts.append(f"a:{acodec}")
if size_str:
desc_parts.append(size_str)
format_desc = " | ".join(desc_parts)
format_dict: Dict[str, Any] = {
"table": "download-media",
"title": f"Format {format_id}",
"url": url,
"target": url,
"detail": format_desc,
"media_kind": "format",
"columns": [
("#", str(idx)),
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Video", vcodec),
("Audio", acodec),
("Size", size_str or "N/A"),
],
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": selection_format_id,
},
"_selection_args": ["-format", selection_format_id],
}
results_list.append(format_dict)
table.add_result(format_dict)
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
# Returning 0 with no emits lets the CLI pause the pipeline for @N selection.
log("Requested format is not available; select a working format with @N", file=sys.stderr)
return 0
log(f"Download failed for {url}: {e}", file=sys.stderr)
except Exception as e:
log(f"Error processing {url}: {e}", file=sys.stderr)

View File

@@ -15,9 +15,9 @@ from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from SYS.logger import log
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from . import _shared as sh
class Download_Torrent(Cmdlet):
class Download_Torrent(sh.Cmdlet):
"""Class-based download-torrent cmdlet with self-registration."""
def __init__(self) -> None:
@@ -27,10 +27,10 @@ class Download_Torrent(Cmdlet):
usage="download-torrent <magnet|.torrent> [options]",
alias=["torrent", "magnet"],
arg=[
CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
sh.CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
sh.CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
sh.CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
sh.CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
],
detail=["Download torrents/magnets via AllDebrid API."],
exec=self.run,
@@ -38,7 +38,7 @@ class Download_Torrent(Cmdlet):
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, self)
parsed = sh.parse_cmdlet_args(args, self)
magnet_args = parsed.get("magnet", [])
output_dir = Path(parsed.get("output") or Path.home() / "Downloads")
wait_timeout = int(float(parsed.get("wait", 600)))

View File

@@ -9,13 +9,13 @@ import subprocess
import webbrowser
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from . import _shared as sh
from SYS.logger import log, debug
from Store import Store
from config import resolve_output_dir
class Get_File(Cmdlet):
class Get_File(sh.Cmdlet):
"""Export files to local path via hash+store."""
def __init__(self) -> None:
@@ -25,10 +25,10 @@ class Get_File(Cmdlet):
summary="Export file to local path",
usage="@1 | get-file -path C:\\Downloads",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
SharedArgs.PATH,
CmdletArg("name", description="Output filename (default: from metadata title)"),
sh.SharedArgs.HASH,
sh.SharedArgs.STORE,
sh.SharedArgs.PATH,
sh.CmdletArg("name", description="Output filename (default: from metadata title)"),
],
detail=[
"- Exports file from storage backend to local path",
@@ -42,12 +42,12 @@ class Get_File(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
debug(f"[get-file] run() called with result type: {type(result)}")
parsed = parse_cmdlet_args(args, self)
parsed = sh.parse_cmdlet_args(args, self)
debug(f"[get-file] parsed args: {parsed}")
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
@@ -62,7 +62,7 @@ class Get_File(Cmdlet):
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
@@ -84,9 +84,9 @@ class Get_File(Cmdlet):
def resolve_display_title() -> str:
candidates = [
get_field(result, "title"),
get_field(result, "name"),
get_field(result, "filename"),
sh.get_field(result, "title"),
sh.get_field(result, "name"),
sh.get_field(result, "filename"),
(metadata.get("title") if isinstance(metadata, dict) else None),
(metadata.get("name") if isinstance(metadata, dict) else None),
(metadata.get("filename") if isinstance(metadata, dict) else None),

View File

@@ -7,7 +7,13 @@ import sys
from SYS.logger import log
from pathlib import Path
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
import pipeline as ctx
from result_table import ResultTable
@@ -74,9 +80,15 @@ class Get_Metadata(Cmdlet):
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
"""Build a table row dict with metadata fields."""
size_mb = None
if isinstance(size_bytes, int):
size_int: Optional[int] = None
if size_bytes is not None:
try:
size_mb = int(size_bytes / (1024 * 1024))
size_int = int(size_bytes)
except Exception:
size_int = None
if isinstance(size_int, int):
try:
size_mb = int(size_int / (1024 * 1024))
except Exception:
size_mb = None
@@ -105,7 +117,7 @@ class Get_Metadata(Cmdlet):
"path": path,
"store": store,
"mime": mime,
"size_bytes": size_bytes,
"size_bytes": size_int,
"duration_seconds": dur_int,
"pages": pages_int,
"imported_ts": imported_ts,
@@ -237,8 +249,8 @@ class Get_Metadata(Cmdlet):
pages=pages,
)
table_title = title
table = ResultTable(table_title).init_command("get-metadata", list(args))
table_title = f"get-metadata: {title}" if title else "get-metadata"
table = ResultTable(table_title).init_command(table_title, "get-metadata", list(args))
self._add_table_body_row(table, row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)

View File

@@ -7,15 +7,15 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
should_show_help = sh.should_show_help
from Store import Store
from SYS.utils import sha256_file

View File

@@ -10,7 +10,17 @@ from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
fmt_bytes = sh.fmt_bytes
get_hash_for_operation = sh.get_hash_for_operation
fetch_hydrus_metadata = sh.fetch_hydrus_metadata
should_show_help = sh.should_show_help
get_field = sh.get_field
from API.folder import API_folder_store
from config import get_local_storage_path
from result_table import ResultTable
@@ -224,13 +234,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
try:
client = None
store_label = "hydrus"
backend_obj = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
store_label = str(store_name)
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
backend_obj = store[str(store_name)]
candidate = getattr(backend_obj, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_relationships"):
client = candidate
except Exception:
@@ -241,6 +252,74 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
else:
client = hydrus_wrapper.get_client(config)
def _resolve_related_title(rel_hash: str) -> str:
"""Best-effort resolve a Hydrus hash to a human title.
Preference order:
- title: tag from the backend (fast path)
- Hydrus metadata tags via fetch_hydrus_metadata
- fallback to short hash
"""
h = normalize_hash(rel_hash)
if not h:
return str(rel_hash)
# Prefer backend tag extraction when available.
if backend_obj is not None and hasattr(backend_obj, "get_tag"):
try:
tag_result = backend_obj.get_tag(h)
tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
if isinstance(tags, list):
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
val = t.split(":", 1)[1].strip()
if val:
return val
except Exception:
pass
# Fallback: fetch minimal metadata and scan for a title tag.
try:
meta, _ = fetch_hydrus_metadata(
config,
h,
store_name=store_label if store_name else None,
hydrus_client=client,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
if isinstance(meta, dict):
tags_payload = meta.get("tags")
tag_candidates: list[str] = []
if isinstance(tags_payload, dict):
for svc_data in tags_payload.values():
if not isinstance(svc_data, dict):
continue
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
for group in storage.values():
if isinstance(group, list):
tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
display = svc_data.get("display_tags")
if isinstance(display, list):
tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
flat = meta.get("tags_flat")
if isinstance(flat, list):
tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
for t in tag_candidates:
if isinstance(t, str) and t.lower().startswith("title:"):
val = t.split(":", 1)[1].strip()
if val:
return val
except Exception:
pass
return h[:16] + "..."
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
@@ -274,7 +353,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": king_hash,
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
})
@@ -292,7 +371,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm, # Can't resolve title easily without another API call
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
@@ -304,7 +383,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})

View File

@@ -27,7 +27,15 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
import pipeline as ctx
from API import HydrusNetwork
from API.folder import read_sidecar, write_sidecar, find_sidecar, API_folder_store
from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
from . import _shared as sh
normalize_hash = sh.normalize_hash
looks_like_hash = sh.looks_like_hash
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
from config import get_local_storage_path

View File

@@ -5,7 +5,15 @@ from typing import Any, Dict, List, Sequence
import sys
import pipeline as ctx
from ._shared import Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from . import _shared as sh
Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
sh.Cmdlet,
sh.SharedArgs,
sh.parse_cmdlet_args,
sh.get_field,
sh.normalize_hash,
)
from SYS.logger import log
from Store import Store

View File

@@ -12,17 +12,17 @@ import re as _re
from config import resolve_output_dir
from ._shared import (
Cmdlet,
CmdletArg,
create_pipe_object_result,
get_field,
get_pipe_object_hash,
get_pipe_object_path,
normalize_result_input,
parse_cmdlet_args,
should_show_help,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
create_pipe_object_result = sh.create_pipe_object_result
get_field = sh.get_field
get_pipe_object_hash = sh.get_pipe_object_hash
get_pipe_object_path = sh.get_pipe_object_path
normalize_result_input = sh.normalize_result_input
parse_cmdlet_args = sh.parse_cmdlet_args
should_show_help = sh.should_show_help
import pipeline as ctx

View File

@@ -20,7 +20,16 @@ from urllib.parse import urlsplit, quote, urljoin
from SYS.logger import log, debug
from API.HTTP import HTTPClient
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
create_pipe_object_result = sh.create_pipe_object_result
normalize_result_input = sh.normalize_result_input
should_show_help = sh.should_show_help
get_field = sh.get_field
parse_cmdlet_args = sh.parse_cmdlet_args
import pipeline as pipeline_context
# ============================================================================
@@ -33,20 +42,7 @@ import pipeline as pipeline_context
# Playwright & Screenshot Dependencies
# ============================================================================
try:
from playwright.sync_api import (
TimeoutError as PlaywrightTimeoutError,
sync_playwright,
)
HAS_PLAYWRIGHT = True
except Exception:
HAS_PLAYWRIGHT = False
PlaywrightTimeoutError = TimeoutError # type: ignore
def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore
raise RuntimeError(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
)
from tool.playwright import HAS_PLAYWRIGHT, PlaywrightTimeoutError, PlaywrightTool
try:
from config import resolve_output_dir
@@ -128,6 +124,7 @@ class ScreenshotOptions:
prefer_platform_target: bool = False
target_selectors: Optional[Sequence[str]] = None
selector_timeout_ms: int = 10_000
playwright_tool: Optional[PlaywrightTool] = None
@dataclass(slots=True)
@@ -324,142 +321,119 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
"""Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
playwright = None
browser = None
context = None
try:
debug("Starting Playwright...", flush=True)
playwright = sync_playwright().start()
log("Launching Chromium browser...", flush=True)
tool = options.playwright_tool or PlaywrightTool({})
tool.debug_dump()
log("Launching browser...", flush=True)
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
if format_name == "pdf" and not options.headless:
warnings.append("pdf output requires headless Chromium; overriding headless mode")
browser = playwright.chromium.launch(
headless=headless,
args=["--disable-blink-features=AutomationControlled"],
)
log("Creating browser context...", flush=True)
context = browser.new_context(
user_agent=USER_AGENT,
viewport=DEFAULT_VIEWPORT,
ignore_https_errors=True,
)
page = context.new_page()
log(f"Navigating to {options.url}...", flush=True)
try:
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
log("Page loaded successfully", flush=True)
except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
with tool.open_page(headless=headless) as page:
log(f"Navigating to {options.url}...", flush=True)
try:
log("Waiting for article element...", flush=True)
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
tool.goto(page, options.url)
log("Page loaded successfully", flush=True)
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
if (v.poster) {
const img = document.createElement('img');
img.src = v.poster;
img.style.maxWidth = '100%';
img.style.borderRadius = '12px';
v.replaceWith(img);
}
});
"""
)
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
try:
_platform_preprocess(options.url, page, warnings)
except Exception as e:
debug(f"[_capture] Platform preprocess failed: {e}")
pass
selectors = list(options.target_selectors or [])
if not selectors:
selectors = _selectors_for_url(options.url)
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
debug(f"[_capture] Trying selectors: {selectors}")
for sel in selectors:
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
log(f"Trying selector: {sel}", flush=True)
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
log("Waiting for article element...", flush=True)
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
continue
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
if (v.poster) {
const img = document.createElement('img');
img.src = v.poster;
img.style.maxWidth = '100%';
img.style.borderRadius = '12px';
v.replaceWith(img);
}
});
"""
)
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
else:
log(f"Capturing full page to {destination}...", flush=True)
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
if options.full_page:
page.screenshot(full_page=True, **screenshot_kwargs)
_platform_preprocess(options.url, page, warnings)
except Exception as e:
debug(f"[_capture] Platform preprocess failed: {e}")
pass
selectors = list(options.target_selectors or [])
if not selectors:
selectors = _selectors_for_url(options.url)
debug(f"[_capture] Trying selectors: {selectors}")
for sel in selectors:
try:
log(f"Trying selector: {sel}", flush=True)
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
continue
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
else:
article = page.query_selector("article")
if article is not None:
article_kwargs = dict(screenshot_kwargs)
article_kwargs.pop("full_page", None)
article.screenshot(**article_kwargs)
log(f"Capturing full page to {destination}...", flush=True)
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
if options.full_page:
page.screenshot(full_page=True, **screenshot_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
article = page.query_selector("article")
if article is not None:
article_kwargs = dict(screenshot_kwargs)
article_kwargs.pop("full_page", None)
article.screenshot(**article_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
except Exception as exc:
debug(f"[_capture] Exception: {exc}")
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
finally:
log("Cleaning up browser resources...", flush=True)
with contextlib.suppress(Exception):
if context is not None:
context.close()
with contextlib.suppress(Exception):
if browser is not None:
browser.close()
with contextlib.suppress(Exception):
if playwright is not None:
playwright.stop()
log("Cleanup complete", flush=True)
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
@@ -511,8 +485,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
Screenshots are created using Playwright and marked as temporary
so they can be cleaned up later with the cleanup cmdlet.
"""
from ._shared import parse_cmdlet_args
debug(f"[_run] screen-shot invoked with args: {args}")
# Help check
@@ -534,6 +506,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
format_value = parsed.get("format")
if not format_value:
# Default format can be set via config.conf tool block:
# [tool=playwright]
# format="pdf"
try:
tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
pw_cfg = tool_cfg.get("playwright") if isinstance(tool_cfg, dict) else None
if isinstance(pw_cfg, dict):
format_value = pw_cfg.get("format")
except Exception:
pass
if not format_value:
format_value = "png"
storage_value = parsed.get("storage")
selector_arg = parsed.get("selector")
selectors = [selector_arg] if selector_arg else []
@@ -669,6 +654,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
prefer_platform_target=False,
wait_for_article=False,
full_page=True,
playwright_tool=PlaywrightTool(config),
)
screenshot_result = _capture_screenshot(options)

View File

@@ -10,7 +10,13 @@ import importlib
from SYS.logger import log, debug
from ProviderCore.registry import get_search_provider, list_search_providers
from ._shared import Cmdlet, CmdletArg, should_show_help
from . import _shared as sh
Cmdlet, CmdletArg, should_show_help = (
sh.Cmdlet,
sh.CmdletArg,
sh.should_show_help,
)
import pipeline as ctx
# Optional dependencies

View File

@@ -10,7 +10,17 @@ import sys
from SYS.logger import log, debug
from ._shared import Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
sh.get_field,
sh.should_show_help,
sh.normalize_hash,
sh.first_title_tag,
)
import pipeline as ctx
@@ -209,6 +219,10 @@ class Search_Store(Cmdlet):
table_title += f" [{storage_backend}]"
table = ResultTable(table_title)
try:
table.set_source_command("search-store", list(args_list))
except Exception:
pass
if hash_query:
try:
table.set_preserve_order(True)
@@ -309,6 +323,11 @@ class Search_Store(Cmdlet):
ext_val = Path(path_str).suffix
except Exception:
ext_val = None
if not ext_val and title:
try:
ext_val = Path(str(title)).suffix
except Exception:
ext_val = None
size_bytes = meta_obj.get("size")
if size_bytes is None:
@@ -333,6 +352,20 @@ class Search_Store(Cmdlet):
ctx.emit(payload)
if found_any:
# Title should reflect the command, query, and only stores present in the table.
store_counts: "OrderedDict[str, int]" = OrderedDict()
for row_item in results_list:
store_val = str(row_item.get("store") or "").strip()
if not store_val:
continue
if store_val not in store_counts:
store_counts[store_val] = 0
store_counts[store_val] += 1
counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
base_title = f"search-store: {query}".strip()
table.title = f"{base_title} | {counts_part}" if counts_part else base_title
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
@@ -377,28 +410,6 @@ class Search_Store(Cmdlet):
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
def _format_storage_label(name: str) -> str:
clean = str(name or "").strip()
if not clean:
return "Unknown"
return clean.replace("_", " ").title()
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
for item in results or []:
store = get_field(item, "store")
if not store:
continue
key = str(store).lower()
if key not in storage_counts:
storage_counts[key] = 0
storage_counts[key] += 1
if storage_counts or query:
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
summary_line = table.set_storage_summary(display_counts, query, inline=True)
if summary_line:
table.title = summary_line
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
@@ -428,6 +439,20 @@ class Search_Store(Cmdlet):
results_list.append(normalized)
ctx.emit(normalized)
# Title should reflect the command, query, and only stores present in the table.
store_counts: "OrderedDict[str, int]" = OrderedDict()
for row_item in results_list:
store_val = str(row_item.get("store") or "").strip()
if not store_val:
continue
if store_val not in store_counts:
store_counts[store_val] = 0
store_counts[store_val] += 1
counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
base_title = f"search-store: {query}".strip()
table.title = f"{base_title} | {counts_part}" if counts_part else base_title
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:

View File

@@ -11,14 +11,14 @@ import re
from SYS.logger import log, debug
from SYS.utils import sha256_file
from ._shared import (
Cmdlet,
CmdletArg,
parse_cmdlet_args,
normalize_result_input,
extract_tag_from_result,
extract_title_from_result
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
import pipeline as ctx
CMDLET = Cmdlet(