df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions

View File

@@ -11,7 +11,7 @@ REGISTRY: Dict[str, Cmdlet] = {}
def _normalize_cmd_name(name: str) -> str:
return str(name or "").replace('_', '-').lower().strip()
return str(name or "").replace("_", "-").lower().strip()
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
@@ -35,8 +35,10 @@ def register(names: Iterable[str]):
@register(["add-tags"])
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
return register_callable(names, fn)
return _wrap
@@ -47,12 +49,11 @@ def get(cmd_name: str) -> Cmdlet | None:
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
# cmdlet self-register when instantiated via their __init__ method
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if not (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"
):
continue
@@ -67,12 +68,14 @@ for filename in os.listdir(cmdlet_dir):
_import_module(f".{mod_name}", __name__)
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlet
try:
from cmdnat import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -27,11 +27,10 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note (-query \"title:<title>,text:<text>[,store:<store>][,hash:<sha256>]\") [ -store <store> | <piped> ]",
usage='add-note (-query "title:<title>,text:<text>[,store:<store>][,hash:<sha256>]") [ -store <store> | <piped> ]',
alias=[""],
arg=[
SharedArgs.STORE,
QueryArg(
"hash",
key="hash",
@@ -116,11 +115,13 @@ class Add_Note(Cmdlet):
# Fallback: best-effort regex.
name_match = re.search(r"\btitle\s*:\s*([^,\s]+)", normalized, flags=re.IGNORECASE)
text_match = re.search(r"\btext\s*:\s*(.+)$", normalized, flags=re.IGNORECASE)
note_name = (name_match.group(1).strip() if name_match else "")
note_text = (text_match.group(1).strip() if text_match else "")
note_name = name_match.group(1).strip() if name_match else ""
note_text = text_match.group(1).strip() if text_match else ""
return (note_name or None, note_text or None)
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -148,11 +149,17 @@ class Add_Note(Cmdlet):
hash_override = normalize_hash(parsed.get("hash"))
note_name, note_text = self._parse_note_query(str(parsed.get("query") or ""))
if not note_name or not note_text:
log("[add_note] Error: -query must include title:<title> and text:<text>", file=sys.stderr)
log(
"[add_note] Error: -query must include title:<title> and text:<text>",
file=sys.stderr,
)
return 1
if hash_override and not store_override:
log("[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>", file=sys.stderr)
log(
"[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>",
file=sys.stderr,
)
return 1
explicit_target = bool(hash_override and store_override)
@@ -166,7 +173,9 @@ class Add_Note(Cmdlet):
backend = store_registry[str(store_override)]
ok = bool(backend.set_note(str(hash_override), note_name, note_text, config=config))
if ok:
ctx.print_if_visible(f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr
)
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
return 1
@@ -180,7 +189,10 @@ class Add_Note(Cmdlet):
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{"store": str(store_override), "hash": hash_override}]
else:
log("[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query \"store:<store> hash:<sha256> ...\")", file=sys.stderr)
log(
'[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query "store:<store> hash:<sha256> ...")',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -242,10 +254,15 @@ class Add_Note(Cmdlet):
try:
ok = bool(bulk_fn(list(ops), config=config))
wrote_any = wrote_any or ok or True
ctx.print_if_visible(f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr)
ctx.print_if_visible(
f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr
)
continue
except Exception as exc:
log(f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", file=sys.stderr)
log(
f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back",
file=sys.stderr,
)
# Fallback: per-item writes
for file_hash, name, text in ops:
@@ -260,4 +277,3 @@ class Add_Note(Cmdlet):
CMDLET = Add_Note()

View File

@@ -29,12 +29,28 @@ CMDLET = Cmdlet(
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg(
"path",
type="string",
description="Specify the local file path (if not piping a result).",
),
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
CmdletArg(
"-king",
type="string",
description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)",
),
CmdletArg(
"-alt",
type="string",
description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)",
),
CmdletArg(
"-type",
type="string",
description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')",
),
],
detail=[
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
@@ -54,7 +70,7 @@ def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
normalized = value.strip().lower()
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
if len(normalized) == 64 and all(c in "0123456789abcdef" for c in normalized):
return normalized
return None
@@ -71,9 +87,9 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
result: Dict[str, list[str]] = {}
if not isinstance(tag_value, str):
return result
# Match patterns like hash(king)HASH or hash(type)<HASH>
pattern = r'hash\((\w+)\)<?([a-fA-F0-9]{64})>?'
pattern = r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?"
matches = re.findall(pattern, tag_value)
if matches:
@@ -118,7 +134,11 @@ def _apply_relationships_from_tags(
- Treat the first hash (king) as the king.
- Store directional alt -> king relationships (no reverse edge).
"""
rel_tags = [t for t in relationship_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:")]
rel_tags = [
t
for t in relationship_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if not rel_tags:
return 0
@@ -166,7 +186,9 @@ def _apply_relationships_from_tags(
# For local DB we treat all non-king hashes as alts.
alt_hashes: list[str] = []
for bucket in ("alt", "related"):
alt_hashes.extend([h for h in (rels.get(bucket) or []) if isinstance(h, str)])
alt_hashes.extend(
[h for h in (rels.get(bucket) or []) if isinstance(h, str)]
)
for alt in alt_hashes:
alt_norm = _normalise_hash_hex(alt)
@@ -191,7 +213,7 @@ def _parse_at_selection(token: str) -> Optional[list[int]]:
if not isinstance(token, str):
return None
t = token.strip()
if not t.startswith('@'):
if not t.startswith("@"):
return None
if t == "@*":
return [] # special sentinel: caller interprets as "all"
@@ -293,27 +315,28 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
"""
if not king_arg:
return None
# Check if it's already a valid hash
normalized = _normalise_hash_hex(king_arg)
if normalized:
return normalized
# Try to resolve as @ selection from pipeline context
if king_arg.startswith('@'):
if king_arg.startswith("@"):
selected = _resolve_items_from_at(king_arg)
if not selected:
log(f"Cannot resolve {king_arg}: no selection context", file=sys.stderr)
return None
if len(selected) != 1:
log(f"{king_arg} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_arg} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return None
item = selected[0]
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash")
)
if item_hash:
@@ -323,11 +346,16 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
log(f"Item {king_arg} has no hash information", file=sys.stderr)
return None
return None
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -356,11 +384,32 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
subj_hashes: list[str] = []
subj_paths: list[str] = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v]
subj_hashes = [
norm(v)
for v in [
subject.get("hydrus_hash"),
subject.get("hash"),
subject.get("hash_hex"),
subject.get("file_hash"),
]
if v
]
subj_paths = [
norm(v)
for v in [subject.get("file_path"), subject.get("path"), subject.get("target")]
if v
]
else:
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)]
subj_hashes = [
norm(getattr(subject, f, None))
for f in ("hydrus_hash", "hash", "hash_hex", "file_hash")
if getattr(subject, f, None)
]
subj_paths = [
norm(getattr(subject, f, None))
for f in ("file_path", "path", "target")
if getattr(subject, f, None)
]
is_match = False
if target_hashes and any(h in subj_hashes for h in target_hashes):
@@ -380,7 +429,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus.
Two modes of operation:
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
@@ -391,7 +440,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(_args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
@@ -420,7 +469,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if alt_arg:
alt_text = str(alt_arg).strip()
resolved_alt_items: list[Any] = []
if alt_text.startswith('@'):
if alt_text.startswith("@"):
selected = _resolve_items_from_at(alt_text)
if not selected:
log(f"Failed to resolve -alt {alt_text}: no selection context", file=sys.stderr)
@@ -431,7 +480,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
parts = [p.strip() for p in alt_text.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -alt value (expected @ selection or 64-hex sha256 hash list)", file=sys.stderr)
log(
"Invalid -alt value (expected @ selection or 64-hex sha256 hash list)",
file=sys.stderr,
)
return 1
if not override_store:
log("-store is required when using -alt with a raw hash list", file=sys.stderr)
@@ -445,11 +497,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
items_to_process = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
return 1
# If no items from pipeline, just process the -path arg
if not items_to_process and arg_path:
items_to_process = [{"file_path": arg_path}]
@@ -459,13 +511,16 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
king_store: Optional[str] = None
if king_arg:
king_text = str(king_arg).strip()
if king_text.startswith('@'):
if king_text.startswith("@"):
selected = _resolve_items_from_at(king_text)
if not selected:
log(f"Cannot resolve {king_text}: no selection context", file=sys.stderr)
return 1
if len(selected) != 1:
log(f"{king_text} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
log(
f"{king_text} selects {len(selected)} items; -king requires exactly 1",
file=sys.stderr,
)
return 1
king_hash, king_store = _extract_hash_and_store(selected[0])
if not king_hash:
@@ -490,18 +545,27 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store", file=sys.stderr)
log(
"Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store",
file=sys.stderr,
)
return 1
# Enforce same-store relationships when store context is available.
if king_store and store_name and str(king_store) != str(store_name):
log(f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'",
file=sys.stderr,
)
return 1
if store_name:
for item in items_to_process:
s = get_field(item, "store")
if s and str(s) != str(store_name):
log(f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'",
file=sys.stderr,
)
return 1
# Resolve backend for store/hash operations
@@ -564,7 +628,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
sidecar_path = find_sidecar(arg_path)
if sidecar_path is not None and sidecar_path.exists():
_, tags, _ = read_sidecar(sidecar_path)
relationship_tags = [t for t in (tags or []) if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t
for t in (tags or [])
if isinstance(t, str) and t.lower().startswith("relationship:")
]
if relationship_tags:
code = _apply_relationships_from_tags(
relationship_tags,
@@ -587,7 +655,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
else:
tags_val = getattr(item, "tag", None)
if isinstance(tags_val, list):
rel_tags_from_pipe.extend([t for t in tags_val if isinstance(t, str) and t.lower().startswith("relationship:")])
rel_tags_from_pipe.extend(
[
t
for t in tags_val
if isinstance(t, str) and t.lower().startswith("relationship:")
]
)
elif isinstance(tags_val, str) and tags_val.lower().startswith("relationship:"):
rel_tags_from_pipe.append(tags_val)
@@ -613,7 +687,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
@@ -622,19 +699,26 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
# directional alt -> king by default for local DB
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, first_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, first_hash, str(rel_type), bidirectional=bidirectional
)
return 0
# Mode 2: explicit king
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, king_hash, str(rel_type), bidirectional=bidirectional)
db.set_relationship_by_hash(
h, king_hash, str(rel_type), bidirectional=bidirectional
)
return 0
except Exception as exc:
log(f"Failed to set store relationships: {exc}", file=sys.stderr)
@@ -648,7 +732,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Verify hashes exist in this Hydrus backend to prevent cross-store edges.
if king_hash and (not _hydrus_hash_exists(hydrus_client, king_hash)):
log(f"Cross-store relationship blocked: king hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: king hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
# Mode 1: first is king
@@ -657,19 +744,28 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h:
continue
if not first_hash:
first_hash = h
if not _hydrus_hash_exists(hydrus_client, first_hash):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
continue
if h != first_hash:
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, first_hash, str(rel_type))
return 0
@@ -678,12 +774,18 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
if not h or h == king_hash:
continue
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
log(
f"Cross-store relationship blocked: hash not found in store '{store_name}'",
file=sys.stderr,
)
return 1
hydrus_client.set_relationship(h, king_hash, str(rel_type))
return 0
@@ -693,18 +795,19 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract hash and path from current item
file_hash = None
file_path_from_result = None
if isinstance(item, dict):
file_hash = item.get("hash_hex") or item.get("hash")
file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
else:
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
# Legacy LOCAL STORAGE MODE: Handle relationships for local files
# (kept for -path sidecar workflows; store/hash mode above is preferred)
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
use_local_storage = bool(local_storage_path)
local_storage_root: Optional[Path] = None
@@ -743,13 +846,22 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
king_file_path = opt.db.search_hash(normalized_king)
if not king_file_path:
log(f"King hash not found in local DB: {king_hash}", file=sys.stderr)
log(
f"King hash not found in local DB: {king_hash}", file=sys.stderr
)
return 1
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, king_file_path, rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
opt.db.set_relationship(
file_path_obj, king_file_path, rel_type, bidirectional=bidirectional
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_file_path), config
)
else:
# Original behavior: first becomes king, rest become alts
try:
@@ -760,16 +872,29 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
log(
f"Established king file: {file_path_obj.name}",
file=sys.stderr,
)
continue
except Exception:
pass
if king_path and king_path != str(file_path_obj):
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, Path(king_path), rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
opt.db.set_relationship(
file_path_obj,
Path(king_path),
rel_type,
bidirectional=bidirectional,
)
log(
f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}",
file=sys.stderr,
)
_refresh_relationship_view_if_current(
None, str(file_path_obj), str(king_path), config
)
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
@@ -781,16 +906,21 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# If explicit -king provided, use it
if king_hash:
try:
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
str(file_path_from_result) if file_path_from_result is not None else None,
king_hash,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, king_hash, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
@@ -800,7 +930,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
existing_king = ctx.load_value("relationship_king")
except Exception:
existing_king = None
# If this is the first item, make it the king
if not existing_king:
try:
@@ -809,34 +939,48 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue # Move to next item
except Exception:
pass
# If we already have a king and this is a different hash, link them
if existing_king and existing_king != file_hash:
try:
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
file=sys.stderr,
)
_refresh_relationship_view_if_current(
file_hash,
(
str(file_path_from_result)
if file_path_from_result is not None
else None
),
existing_king,
config,
)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, existing_king, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
# If we get here, we didn't have a usable local path and Hydrus isn't available/usable.
return 0
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
log(
"Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship",
file=sys.stderr,
)
# Resolve media path from -path arg or result target
target = getattr(result, "target", None) or getattr(result, "path", None)
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
media_path = (
arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
)
if media_path is None:
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
return 1
# Validate local file
if str(media_path).lower().startswith(("http://", "https://")):
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
@@ -851,7 +995,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if hydrus_client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
@@ -861,7 +1005,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if sidecar_path is None:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
return 1
try:
_, tags, _ = read_sidecar(sidecar_path)
except Exception as exc:
@@ -869,63 +1013,68 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
relationship_tags = [
t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")
]
if not relationship_tags:
log(f"No relationship tags found in sidecar", file=sys.stderr)
return 0 # Not an error, just nothing to do
# Get the file hash from result (should have been set by add-file)
file_hash = getattr(result, "hash_hex", None)
if not file_hash:
log("File hash not available (run add-file first)", file=sys.stderr)
return 1
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# Parse relationships from tags and apply them
success_count = 0
error_count = 0
for rel_tag in relationship_tags:
try:
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
# Parse relationships
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
# Set the relationships in Hydrus
for rel_type, related_hashes in rels.items():
if not related_hashes:
continue
for related_hash in related_hashes:
# Don't set relationship between hash and itself
if file_hash == related_hash:
continue
try:
hydrus_client.set_relationship(file_hash, related_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}",
file=sys.stderr
file=sys.stderr,
)
success_count += 1
except Exception as exc:
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
error_count += 1
except Exception as exc:
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
error_count += 1
if success_count > 0:
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
log(
f"Successfully set {success_count} relationship(s) for {media_path.name}",
file=sys.stderr,
)
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
return 0
elif error_count == 0:
@@ -940,5 +1089,3 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET.exec = _run
CMDLET.alias = ["add-rel"]
CMDLET.register()

View File

@@ -118,14 +118,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
name_lower = raw_name.lower()
is_last = idx == (len(matches) - 1)
if is_last:
parts.append(fr"(?P<{raw_name}>.+)")
parts.append(rf"(?P<{raw_name}>.+)")
else:
# Heuristic: common numeric fields should capture full digit runs.
# This avoids ambiguous splits like track='2', title='3 ...'.
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
parts.append(fr"(?P<{raw_name}>\d+)")
if name_lower in {
"disk",
"disc",
"cd",
"track",
"trk",
"episode",
"ep",
"season",
"year",
}:
parts.append(rf"(?P<{raw_name}>\d+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
parts.append(rf"(?P<{raw_name}>.+?)")
last_end = m.end()
@@ -159,7 +169,9 @@ def _extract_tags_from_title(title_text: str, template: str) -> List[str]:
return out
def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[str]] = None) -> List[str]:
def _get_title_candidates_for_extraction(
res: Any, existing_tags: Optional[List[str]] = None
) -> List[str]:
"""Return a list of possible title strings in priority order."""
candidates: List[str] = []
@@ -197,7 +209,9 @@ def _get_title_candidates_for_extraction(res: Any, existing_tags: Optional[List[
return candidates
def _extract_tags_from_title_candidates(candidates: List[str], template: str) -> tuple[List[str], Optional[str]]:
def _extract_tags_from_title_candidates(
candidates: List[str], template: str
) -> tuple[List[str], Optional[str]]:
"""Try candidates in order; return (tags, matched_candidate)."""
for c in candidates:
@@ -207,7 +221,9 @@ def _extract_tags_from_title_candidates(candidates: List[str], template: str) ->
return [], None
def _try_compile_extract_template(template: Optional[str]) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
def _try_compile_extract_template(
template: Optional[str],
) -> tuple[Optional[re.Pattern[str]], Optional[str]]:
"""Compile template for debug; return (pattern, error_message)."""
if template is None:
return None, None
@@ -387,7 +403,13 @@ def _refresh_result_table_title(
pass
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_tag_view(
res: Any,
target_hash: Optional[str],
store_name: Optional[str],
target_path: Optional[str],
config: Dict[str, Any],
) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -421,7 +443,6 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
pass
class Add_Tag(Cmdlet):
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
@@ -429,16 +450,42 @@ class Add_Tag(Cmdlet):
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-query \"hash:<sha256>\"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
usage='add-tag -store <store> [-query "hash:<sha256>"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]',
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
CmdletArg(
"tag",
type="string",
required=False,
description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.",
variadic=True,
),
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("-extract", type="string", description="Extract tags from the item's title using a simple template with (field) placeholders. Example: -extract \"(artist) - (album) - (disk)-(track) (title)\" will add artist:, album:, disk:, track:, title: tags."),
CmdletArg("--extract-debug", type="flag", description="Print debug info for -extract matching (matched title source and extracted tags)."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
CmdletArg(
"-extract",
type="string",
description='Extract tags from the item\'s title using a simple template with (field) placeholders. Example: -extract "(artist) - (album) - (disk)-(track) (title)" will add artist:, album:, disk:, track:, title: tags.',
),
CmdletArg(
"--extract-debug",
type="flag",
description="Print debug info for -extract matching (matched title source and extracted tags).",
),
CmdletArg(
"-duplicate",
type="string",
description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)",
),
CmdletArg(
"-list",
type="string",
description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult).",
),
CmdletArg(
"--all",
type="flag",
description="Include temporary files in tagging (by default, only tag non-temporary files).",
),
],
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
@@ -446,7 +493,7 @@ class Add_Tag(Cmdlet):
"- If -query is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
'- tag can also reference lists with curly braces: add-tag {philosophy} "other:tag"',
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
@@ -484,7 +531,9 @@ class Add_Tag(Cmdlet):
# @N | download-media | add-tag ... | add-file ...
store_override = parsed.get("store")
stage_ctx = ctx.get_stage_context()
has_downstream = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
has_downstream = bool(
stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False)
)
include_temp = bool(parsed.get("all", False))
if has_downstream and not include_temp and not store_override:
@@ -498,7 +547,10 @@ class Add_Tag(Cmdlet):
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
log(
"No valid files to tag (all results were temporary; use --all to include temporary files)",
file=sys.stderr,
)
return 1
# Get tag from arguments (or fallback to pipeline payload)
@@ -512,13 +564,13 @@ class Add_Tag(Cmdlet):
if not raw_tag and results and not extract_template:
first = results[0]
payload_tag = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: getattr(x, "tag", None),
lambda x: x.get("tag") if isinstance(x, dict) else None,
]
for lookup in tag_lookups:
try:
payload_tag = lookup(first)
@@ -526,7 +578,7 @@ class Add_Tag(Cmdlet):
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tag:
if isinstance(payload_tag, str):
raw_tag = [payload_tag]
@@ -536,7 +588,7 @@ class Add_Tag(Cmdlet):
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
for l in list_arg.split(","):
l = l.strip()
if l:
raw_tag.append(f"{{{l}}}")
@@ -590,46 +642,70 @@ class Add_Tag(Cmdlet):
# treat add-tag as a pipeline mutation (carry tags forward for add-file) instead of a store write.
if not store_override:
store_name_str = str(store_name) if store_name is not None else ""
local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
local_mode_requested = (
(not store_name_str)
or (store_name_str.upper() == "PATH")
or (store_name_str.lower() == "local")
)
is_known_backend = bool(store_name_str) and store_registry.is_available(
store_name_str
)
if local_mode_requested and raw_path:
try:
if Path(str(raw_path)).expanduser().exists():
existing_tag_list = _extract_item_tags(res)
existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
existing_lower = {
t.lower() for t in existing_tag_list if isinstance(t, str)
}
item_tag_to_add = list(tag_to_add)
if extract_template:
candidates = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted, matched = _extract_tags_from_title_candidates(candidates, extract_template)
candidates = _get_title_candidates_for_extraction(
res, existing_tag_list
)
extracted, matched = _extract_tags_from_title_candidates(
candidates, extract_template
)
if extracted:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched!r} -> {extracted}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched!r} -> {extracted}",
file=sys.stderr,
)
for new_tag in extracted:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview = (
extract_debug_rx.pattern
if extract_debug_rx
else "<uncompiled>"
)
cand_preview = "; ".join([repr(c) for c in candidates[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview!r}. candidates: {cand_preview}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
item_tag_to_add = collapse_namespace_tag(
item_tag_to_add, "title", prefer="last"
)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -654,7 +730,10 @@ class Add_Tag(Cmdlet):
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
if (
t.lower().startswith(ns_prefix)
and t.lower() != new_tag.lower()
):
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
@@ -664,7 +743,9 @@ class Add_Tag(Cmdlet):
if isinstance(t, str) and t.lower() not in existing_lower
]
updated_tag_list = [t for t in existing_tag_list if t not in removed_namespace_tag]
updated_tag_list = [
t for t in existing_tag_list if t not in removed_namespace_tag
]
updated_tag_list.extend(actual_tag_to_add)
_set_item_tags(res, updated_tag_list)
@@ -672,7 +753,9 @@ class Add_Tag(Cmdlet):
_apply_title_to_result(res, final_title)
total_added += len(actual_tag_to_add)
total_modified += 1 if (removed_namespace_tag or actual_tag_to_add) else 0
total_modified += (
1 if (removed_namespace_tag or actual_tag_to_add) else 0
)
ctx.emit(res)
continue
@@ -680,14 +763,22 @@ class Add_Tag(Cmdlet):
pass
if local_mode_requested:
log("[add_tag] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
log(
"[add_tag] Error: Missing usable local path for tagging (or provide -store)",
file=sys.stderr,
)
return 1
if store_name_str and not is_known_backend:
log(f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
log(
f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}",
file=sys.stderr,
)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
resolved_hash = (
normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
@@ -700,7 +791,10 @@ class Add_Tag(Cmdlet):
resolved_hash = None
if not resolved_hash:
log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
log(
"[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping",
file=sys.stderr,
)
ctx.emit(res)
continue
@@ -724,34 +818,44 @@ class Add_Tag(Cmdlet):
if extract_template:
candidates2 = _get_title_candidates_for_extraction(res, existing_tag_list)
extracted2, matched2 = _extract_tags_from_title_candidates(candidates2, extract_template)
extracted2, matched2 = _extract_tags_from_title_candidates(
candidates2, extract_template
)
if extracted2:
extract_matched_items += 1
if extract_debug:
log(f"[add_tag] extract matched: {matched2!r} -> {extracted2}", file=sys.stderr)
log(
f"[add_tag] extract matched: {matched2!r} -> {extracted2}",
file=sys.stderr,
)
for new_tag in extracted2:
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
else:
extract_no_match_items += 1
if extract_debug:
rx_preview2 = extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
rx_preview2 = (
extract_debug_rx.pattern if extract_debug_rx else "<uncompiled>"
)
cand_preview2 = "; ".join([repr(c) for c in candidates2[:3]])
log(f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}", file=sys.stderr)
log(
f"[add_tag] extract no match for template {extract_template!r}. regex: {rx_preview2!r}. candidates: {cand_preview2}",
file=sys.stderr,
)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tag to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
parts = str(duplicate_arg).split(":")
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
targets = [t.strip() for t in parts[1].split(",") if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
parts2 = str(duplicate_arg).split(",")
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
@@ -812,11 +916,17 @@ class Add_Tag(Cmdlet):
)
if extract_template and extract_matched_items == 0:
log(f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)", file=sys.stderr)
log(
f"[add_tag] extract: no matches for template '{extract_template}' across {len(results)} item(s)",
file=sys.stderr,
)
elif extract_template and extract_no_match_items > 0 and extract_debug:
log(f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}", file=sys.stderr)
log(
f"[add_tag] extract: matched {extract_matched_items}, no-match {extract_no_match_items}",
file=sys.stderr,
)
return 0
CMDLET = Add_Tag()
CMDLET = Add_Tag()

View File

@@ -29,7 +29,7 @@ class Add_Url(sh.Cmdlet):
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
@@ -51,9 +51,11 @@ class Add_Url(sh.Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -61,35 +63,37 @@ class Add_Url(sh.Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (sh.get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (sh.get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
sh.get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
urls = [u.strip() for u in str(url_arg).split(",") if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
# Get backend and add url
try:
storage = Store(config)
@@ -137,23 +141,30 @@ class Add_Url(sh.Cmdlet):
raw_hash = query_hash or sh.get_field(item, "hash")
raw_store = store_override or sh.get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr
)
continue
normalized = sh.normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[add-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[add-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
# Validate backend exists (skip PATH/unknown).
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[add-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[add-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -170,7 +181,7 @@ class Add_Url(sh.Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
@@ -206,7 +217,7 @@ class Add_Url(sh.Cmdlet):
_set_item_url(result, merged)
ctx.emit(result)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
@@ -216,6 +227,3 @@ class Add_Url(sh.Cmdlet):
CMDLET = Add_Url()

View File

@@ -31,395 +31,421 @@ _SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
def _extract_sha256_hex(item: Any) -> str:
try:
if isinstance(item, dict):
h = item.get("hash")
else:
h = getattr(item, "hash", None)
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
try:
if isinstance(item, dict):
h = item.get("hash")
else:
h = getattr(item, "hash", None)
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _extract_store_name(item: Any) -> str:
try:
if isinstance(item, dict):
s = item.get("store")
else:
s = getattr(item, "store", None)
return str(s or "").strip()
except Exception:
return ""
try:
if isinstance(item, dict):
s = item.get("store")
else:
s = getattr(item, "store", None)
return str(s or "").strip()
except Exception:
return ""
def _extract_url(item: Any) -> str:
try:
u = sh.get_field(item, "url") or sh.get_field(item, "target")
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
return u.strip()
except Exception:
pass
return ""
try:
u = sh.get_field(item, "url") or sh.get_field(item, "target")
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
return u.strip()
except Exception:
pass
return ""
def _extract_hash_from_hydrus_file_url(url: str) -> str:
try:
parsed = urlparse(str(url))
if not (parsed.path or "").endswith("/get_files/file"):
return ""
qs = parse_qs(parsed.query or "")
h = (qs.get("hash") or [""])[0]
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
try:
parsed = urlparse(str(url))
if not (parsed.path or "").endswith("/get_files/file"):
return ""
qs = parse_qs(parsed.query or "")
h = (qs.get("hash") or [""])[0]
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
except Exception:
instances = set()
return instances
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
except Exception:
instances = set()
return instances
def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
"""Download a Hydrus-backed item to a local temp path (best-effort).
"""Download a Hydrus-backed item to a local temp path (best-effort).
This is intentionally side-effect free except for writing the local temp file.
"""
try:
from config import get_hydrus_access_key, get_hydrus_url
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
except Exception:
return None
This is intentionally side-effect free except for writing the local temp file.
"""
try:
from config import get_hydrus_access_key, get_hydrus_url
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
except Exception:
return None
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
if not file_hash:
return None
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (
_extract_hash_from_hydrus_file_url(url) if url else ""
)
if not file_hash:
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
# Prefer store name as instance key; fall back to "home".
access_key = None
hydrus_url = None
for inst in [s for s in [store_lower, "home"] if s]:
try:
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
if access_key and hydrus_url:
break
except Exception:
access_key = None
hydrus_url = None
# Prefer store name as instance key; fall back to "home".
access_key = None
hydrus_url = None
for inst in [s for s in [store_lower, "home"] if s]:
try:
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
if access_key and hydrus_url:
break
except Exception:
access_key = None
hydrus_url = None
if not access_key or not hydrus_url:
return None
if not access_key or not hydrus_url:
return None
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
# Best-effort extension from Hydrus metadata.
suffix = ".hydrus"
try:
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
if isinstance(entry, dict):
ext = entry.get("ext")
if isinstance(ext, str) and ext.strip():
cleaned = ext.strip()
if not cleaned.startswith("."):
cleaned = "." + cleaned.lstrip(".")
if len(cleaned) <= 12:
suffix = cleaned
except Exception:
pass
# Best-effort extension from Hydrus metadata.
suffix = ".hydrus"
try:
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
if isinstance(entry, dict):
ext = entry.get("ext")
if isinstance(ext, str) and ext.strip():
cleaned = ext.strip()
if not cleaned.startswith("."):
cleaned = "." + cleaned.lstrip(".")
if len(cleaned) <= 12:
suffix = cleaned
except Exception:
pass
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
dest = output_dir / f"{file_hash}{suffix}"
if dest.exists():
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
dest = output_dir / f"{file_hash}{suffix}"
if dest.exists():
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
headers = {"Hydrus-Client-API-Access-Key": access_key}
download_hydrus_file(file_url, headers, dest, timeout=60.0)
try:
if dest.exists() and dest.is_file():
return dest
except Exception:
return None
return None
headers = {"Hydrus-Client-API-Access-Key": access_key}
download_hydrus_file(file_url, headers, dest, timeout=60.0)
try:
if dest.exists() and dest.is_file():
return dest
except Exception:
return None
return None
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
def _resolve_existing_or_fetch_path(
item: Any, config: Dict[str, Any]
) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = (
getattr(po, "path", None)
or getattr(po, "target", None)
or sh.get_pipe_object_path(item)
)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
from Store import Store
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
from Store import Store
store = Store(config)
backend = store[store_name]
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
store = Store(config)
backend = store[store_name]
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(
{"hash": file_hash, "store": store_name, "url": src.strip()},
config,
out_dir,
)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser()
if tmp_base
else (Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
return None, None
return None, None
def _unique_arcname(name: str, seen: Set[str]) -> str:
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
parsed = parse_cmdlet_args(args, CMDLET)
parsed = parse_cmdlet_args(args, CMDLET)
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = []
if isinstance(result, list):
items = list(result)
elif result is not None:
items = [result]
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = []
if isinstance(result, list):
items = list(result)
elif result is not None:
items = [result]
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
except Exception:
pass
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
except Exception:
pass
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
return 1
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(
fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT
) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
CMDLET = Cmdlet(
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg(
"-level", type="integer", description="Zstandard compression level (default: 11)."
),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
)
CMDLET.exec = _run

View File

@@ -1,4 +1,5 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence
@@ -23,12 +24,16 @@ class Delete_File(sh.Cmdlet):
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-query \"hash:<sha256>\"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
usage='delete-file [-query "hash:<sha256>"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]',
alias=["del-file"],
arg=[
sh.SharedArgs.QUERY,
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg(
"conserve", description="Choose which copy to keep: 'local' or 'hydrus'."
),
sh.CmdletArg(
"lib-root", description="Path to local library root for database cleanup."
),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
detail=[
@@ -62,7 +67,11 @@ class Delete_File(sh.Cmdlet):
title_val = item.get("title") or item.get("name")
else:
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
target = (
sh.get_field(item, "target")
or sh.get_field(item, "file_path")
or sh.get_field(item, "path")
)
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
def _get_ext_from_item() -> str:
@@ -102,7 +111,7 @@ class Delete_File(sh.Cmdlet):
pass
return ""
store = None
if isinstance(item, dict):
store = item.get("store")
@@ -133,19 +142,29 @@ class Delete_File(sh.Cmdlet):
is_hydrus_store = False
# Backwards-compatible fallback heuristic (older items might only carry a name).
if (not is_hydrus_store) and bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"}):
if (
(not is_hydrus_store)
and bool(store_lower)
and ("hydrus" in store_lower or store_lower in {"home", "work"})
):
is_hydrus_store = True
store_label = str(store) if store else "default"
hydrus_prefix = f"[hydrusnetwork:{store_label}]"
# For Hydrus files, the target IS the hash
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
hash_hex = (
sh.normalize_hash(override_hash) if override_hash else sh.normalize_hash(hash_hex_raw)
)
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
local_target = (
isinstance(target, str)
and target.strip()
and not str(target).lower().startswith(("http://", "https://"))
)
deleted_rows: List[Dict[str, Any]] = []
# If this item references a configured non-Hydrus store backend, prefer deleting
@@ -169,11 +188,15 @@ class Delete_File(sh.Cmdlet):
try:
if hash_candidate and hasattr(backend, "get_file"):
candidate_path = backend.get_file(hash_candidate)
resolved_path = candidate_path if isinstance(candidate_path, Path) else None
resolved_path = (
candidate_path if isinstance(candidate_path, Path) else None
)
except Exception:
resolved_path = None
identifier = hash_candidate or (str(target).strip() if isinstance(target, str) else "")
identifier = hash_candidate or (
str(target).strip() if isinstance(target, str) else ""
)
if identifier:
deleter = getattr(backend, "delete_file", None)
if callable(deleter) and bool(deleter(identifier)):
@@ -181,18 +204,27 @@ class Delete_File(sh.Cmdlet):
size_bytes: int | None = None
try:
if resolved_path is not None and isinstance(resolved_path, Path) and resolved_path.exists():
if (
resolved_path is not None
and isinstance(resolved_path, Path)
and resolved_path.exists()
):
size_bytes = int(resolved_path.stat().st_size)
except Exception:
size_bytes = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else (resolved_path.name if resolved_path else identifier),
"title": (
str(title_val).strip()
if title_val
else (resolved_path.name if resolved_path else identifier)
),
"store": store_label,
"hash": hash_candidate or (hash_hex or ""),
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
"ext": _get_ext_from_item()
or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
}
)
@@ -216,7 +248,7 @@ class Delete_File(sh.Cmdlet):
local_target = False
except Exception:
pass
if conserve != "local" and local_target:
path = Path(str(target))
size_bytes: int | None = None
@@ -225,7 +257,7 @@ class Delete_File(sh.Cmdlet):
size_bytes = int(path.stat().st_size)
except Exception:
size_bytes = None
# If lib_root is provided and this is from a folder store, use the Folder class
if lib_root:
try:
@@ -276,7 +308,7 @@ class Delete_File(sh.Cmdlet):
)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
# Remove common sidecars regardless of file removal success
for sidecar in (
path.with_suffix(".tag"),
@@ -291,11 +323,11 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = False
should_try_hydrus = is_hydrus_store
# If conserve is set to hydrus, definitely don't delete
if conserve == "hydrus":
should_try_hydrus = False
if should_try_hydrus and hash_hex:
# Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
# This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
@@ -312,7 +344,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
else:
@@ -328,7 +363,10 @@ class Delete_File(sh.Cmdlet):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
log(
f"Hydrus client unavailable for store '{store}': {exc}",
file=sys.stderr,
)
return False
if client is None:
if not local_deleted:
@@ -365,7 +403,10 @@ class Delete_File(sh.Cmdlet):
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
@@ -411,7 +452,7 @@ class Delete_File(sh.Cmdlet):
lib_root: str | None = None
reason_tokens: list[str] = []
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
@@ -460,7 +501,7 @@ class Delete_File(sh.Cmdlet):
items = result
elif result:
items = [result]
if not items:
log("No items to delete", file=sys.stderr)
return 1
@@ -468,7 +509,9 @@ class Delete_File(sh.Cmdlet):
success_count = 0
deleted_rows: List[Dict[str, Any]] = []
for item in items:
rows = self._process_single_item(item, override_hash, conserve, lib_root, reason, config)
rows = self._process_single_item(
item, override_hash, conserve, lib_root, reason, config
)
if rows:
success_count += 1
deleted_rows.extend(rows)
@@ -481,7 +524,9 @@ class Delete_File(sh.Cmdlet):
result_row.add_column("Title", row.get("title", ""))
result_row.add_column("Store", row.get("store", ""))
result_row.add_column("Hash", row.get("hash", ""))
result_row.add_column("Size", _format_size(row.get("size_bytes"), integer_only=False))
result_row.add_column(
"Size", _format_size(row.get("size_bytes"), integer_only=False)
)
result_row.add_column("Ext", row.get("ext", ""))
# Display-only: print directly and do not affect selection/history.
@@ -504,5 +549,3 @@ class Delete_File(sh.Cmdlet):
# Instantiate and register the cmdlet
Delete_File()

View File

@@ -26,12 +26,14 @@ class Delete_Note(Cmdlet):
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-query \"hash:<sha256>\"] <name>",
usage='delete-note -store <store> [-query "hash:<sha256>"] <name>',
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
CmdletArg(
"name", type="string", required=True, description="The note name/key to delete."
),
],
detail=[
"- Deletes the named note from the selected store backend.",
@@ -44,7 +46,9 @@ class Delete_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +80,10 @@ class Delete_Note(Cmdlet):
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
if not note_name_override and not inferred_note_name:
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
log(
"[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)",
file=sys.stderr,
)
return 1
results = normalize_result_input(result)
@@ -84,7 +91,10 @@ class Delete_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[delete_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -96,9 +106,14 @@ class Delete_Note(Cmdlet):
continue
# Resolve which note name to delete for this item.
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
note_name = (
note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
)
if not note_name:
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
log(
"[delete_note] Error: Missing note name (pass <name> or pipe a note row)",
file=sys.stderr,
)
return 1
store_name = str(store_override or res.get("store") or "").strip()
@@ -106,7 +121,10 @@ class Delete_Note(Cmdlet):
raw_path = res.get("path")
if not store_name:
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
log(
"[delete_note] Error: Missing -store and item has no store field",
file=sys.stderr,
)
return 1
resolved_hash = self._resolve_hash(

View File

@@ -30,7 +30,9 @@ def _extract_hash(item: Any) -> Optional[str]:
return normalize_hash(str(h)) if h else None
def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: Dict[str, Any]) -> None:
def _upsert_relationships(
db: API_folder_store, file_hash: str, relationships: Dict[str, Any]
) -> None:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
@@ -48,7 +50,9 @@ def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: D
)
def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str) -> None:
def _remove_reverse_link(
db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str
) -> None:
meta = db.get_metadata(dst_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
@@ -78,7 +82,12 @@ def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str,
_upsert_relationships(db, dst_hash, rels)
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
def _refresh_relationship_view_if_current(
target_hash: Optional[str],
target_path: Optional[str],
other: Optional[str],
config: Dict[str, Any],
) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -129,12 +138,12 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete relationships from files.
Args:
result: Input result(s) from previous cmdlet
args: Command arguments
config: CLI configuration
Returns:
Exit code (0 = success)
"""
@@ -153,7 +162,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
raw_path = parsed_args.get("path")
# Normalize input
results = normalize_result_input(result)
@@ -163,7 +172,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
results = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not results:
# Legacy -path mode below may still apply
if raw_path:
@@ -179,9 +188,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected in pipeline; use -store to choose one", file=sys.stderr)
log(
"Multiple stores detected in pipeline; use -store to choose one",
file=sys.stderr,
)
return 1
deleted_count = 0
# STORE/HASH FIRST: folder-store DB deletion (preferred)
@@ -208,7 +220,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Enforce same-store when items carry store info
item_store = get_field(single_result, "store")
if item_store and str(item_store) != str(store_name):
log(f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
log(
f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'",
file=sys.stderr,
)
return 1
file_hash = _extract_hash(single_result)
@@ -225,7 +240,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -query \"hash:<sha256>\" or ensure pipeline includes hash)", file=sys.stderr)
log(
'Could not extract file hash for deletion (use -query "hash:<sha256>" or ensure pipeline includes hash)',
file=sys.stderr,
)
return 1
meta = db.get_metadata(file_hash) or {}
@@ -241,7 +259,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
# delete one type (case-insensitive key match)
@@ -257,13 +280,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
@@ -271,12 +302,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
_refresh_relationship_view_if_current(file_hash, None, None, config)
deleted_count += 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
log(
f"Successfully deleted relationships from {deleted_count} file(s)",
file=sys.stderr,
)
return 0
except Exception as exc:
log(f"Error deleting store relationships: {exc}", file=sys.stderr)
return 1
# LEGACY PATH MODE (single local DB)
# Get storage path
local_storage_path = get_local_storage_path(config)
@@ -330,7 +364,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(rt),
)
rels = {}
elif rel_type_filter:
key_to_delete: Optional[str] = None
@@ -345,26 +384,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
_remove_reverse_link(
db,
src_hash=file_hash,
dst_hash=other_norm,
rel_type=str(key_to_delete),
)
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
log(
"Specify --all to delete all relationships or -type <type> to delete specific type",
file=sys.stderr,
)
return 1
_upsert_relationships(db, file_hash, rels)
conn.commit()
_refresh_relationship_view_if_current(file_hash, str(file_path_obj), None, config)
_refresh_relationship_view_if_current(
file_hash, str(file_path_obj), None, config
)
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
return 1
@@ -379,7 +428,11 @@ CMDLET = Cmdlet(
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
CmdletArg(
"type",
type="string",
description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types.",
),
],
detail=[
"- Delete all relationships: pipe files | delete-relationship --all",

View File

@@ -20,7 +20,9 @@ from SYS.logger import debug, log
from Store import Store
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
def _refresh_tag_view_if_current(
file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]
) -> None:
"""If the current subject matches the target, refresh tags via get-tag."""
try:
from cmdlet import get as get_cmdlet # type: ignore
@@ -53,7 +55,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v]
else:
subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)]
subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)]
subj_paths = [
norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)
]
is_match = False
if target_hash and target_hash in subj_hashes:
@@ -76,11 +80,15 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-query \"hash:<sha256>\"] <tag>[,<tag>...]",
usage='delete-tag -store <store> [-query "hash:<sha256>"] <tag>[,<tag>...]',
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
CmdletArg(
"<tag>[,<tag>...]",
required=True,
description="One or more tags to remove. Comma- or space-separated.",
),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -query override.",
@@ -88,28 +96,35 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
def _looks_like_tag_row(obj: Any) -> bool:
if obj is None:
return False
# TagItem (direct) or PipeObject/dict emitted from get-tag table rows.
try:
if hasattr(obj, '__class__') and obj.__class__.__name__ == 'TagItem' and hasattr(obj, 'tag_name'):
if (
hasattr(obj, "__class__")
and obj.__class__.__name__ == "TagItem"
and hasattr(obj, "tag_name")
):
return True
except Exception:
pass
try:
return bool(get_field(obj, 'tag_name'))
return bool(get_field(obj, "tag_name"))
except Exception:
return False
has_piped_tag = _looks_like_tag_row(result)
has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
has_piped_tag_list = (
isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
)
# Parse -query/-store overrides and collect remaining args.
override_query: str | None = None
@@ -151,8 +166,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
grouped_table = ""
grouped_tags = get_field(result, "tag") if result is not None else None
tags_arg = parse_tag_arguments(rest)
if grouped_table == "tag.selection" and isinstance(grouped_tags, list) and grouped_tags and not tags_arg:
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash"))
if (
grouped_table == "tag.selection"
and isinstance(grouped_tags, list)
and grouped_tags
and not tags_arg
):
file_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(result, "hash"))
)
store_name = override_store or get_field(result, "store")
path = get_field(result, "path") or get_field(result, "target")
tags = [str(t) for t in grouped_tags if t]
@@ -161,7 +185,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not tags_arg and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Normalize result to a list for processing
items_to_process = []
if isinstance(result, list):
@@ -171,13 +195,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Process each item
success_count = 0
# If we have TagItems and no args, we are deleting the tags themselves
# If we have Files (or other objects) and args, we are deleting tags FROM those files
# Check if we are in "delete selected tags" mode (tag rows)
is_tag_item_mode = bool(items_to_process) and _looks_like_tag_row(items_to_process[0])
if is_tag_item_mode:
# Collect all tags to delete from the TagItems and batch per file.
# This keeps delete-tag efficient (one backend call per file).
@@ -186,7 +210,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag_name = get_field(item, "tag_name")
if not tag_name:
continue
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_store = override_store or get_field(item, "store")
item_path = get_field(item, "path") or get_field(item, "target")
key = (str(item_hash or ""), str(item_store or ""), str(item_path or ""))
@@ -202,32 +230,33 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# "Delete tags from files" mode
# We need args (tags to delete)
if not tags_arg:
log("Requires at least one tag argument when deleting from files")
return 1
log("Requires at least one tag argument when deleting from files")
return 1
# Process each item
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
# but we might need the piped result for the file context if @ selection was from a Tag table
# Actually, the @ selection logic above already extracted tags.
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
for item in items_to_process:
tags_to_delete: list[str] = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
item_hash = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_field(item, "hash"))
)
item_path = get_field(item, "path") or get_field(item, "target")
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
if tags_arg:
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
tag_name = get_field(item, "tag_name")
if tag_name:
tags_to_delete = [str(tag_name)]
else:
@@ -244,12 +273,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
return 1
def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool:
def _process_deletion(
tags: list[str],
file_hash: str | None,
path: str | None,
store_name: str | None,
config: Dict[str, Any],
) -> bool:
"""Helper to execute the deletion logic for a single target."""
if not tags:
return False
if not store_name:
log("Store is required (use -store or pipe a result with store)", file=sys.stderr)
return False
@@ -258,12 +294,16 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
if not resolved_hash and path:
try:
from SYS.utils import sha256_file
resolved_hash = sha256_file(Path(path))
except Exception:
resolved_hash = None
if not resolved_hash:
log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr)
log(
"Item does not include a usable hash (and hash could not be derived from path)",
file=sys.stderr,
)
return False
def _fetch_existing_tags() -> list[str]:
@@ -278,18 +318,23 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
if title_tags:
existing_tags = _fetch_existing_tags()
current_titles = [t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")]
current_titles = [
t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")
]
del_title_set = {t.lower() for t in title_tags}
remaining_titles = [t for t in current_titles if t.lower() not in del_title_set]
if current_titles and not remaining_titles:
log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr)
log(
'Cannot delete the last title: tag. Add a replacement title first (add-tags "title:new title").',
file=sys.stderr,
)
return False
try:
backend = Store(config)[store_name]
ok = backend.delete_tag(resolved_hash, list(tags), config=config)
if ok:
preview = resolved_hash[:12] + ('' if len(resolved_hash) > 12 else '')
preview = resolved_hash[:12] + ("" if len(resolved_hash) > 12 else "")
debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.")
_refresh_tag_view_if_current(resolved_hash, store_name, path, config)
return True
@@ -302,6 +347,3 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -29,7 +29,11 @@ class Delete_Url(Cmdlet):
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
CmdletArg(
"url",
required=False,
description="URL to remove (optional when piping url rows)",
),
],
detail=[
"- Removes URL association from file identified by hash+store",
@@ -38,7 +42,7 @@ class Delete_Url(Cmdlet):
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
@@ -47,9 +51,11 @@ class Delete_Url(Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
results: List[Any] = (
result if isinstance(result, list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
@@ -57,18 +63,20 @@ class Delete_Url(Cmdlet):
# Extract hash and store from result or args
file_hash = query_hash or (get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None)
store_name = parsed.get("store") or (
get_field(result, "store") if result is not None else None
)
url_arg = parsed.get("url")
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = normalize_hash(file_hash)
@@ -87,7 +95,7 @@ class Delete_Url(Cmdlet):
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
urls_from_cli = _urls_from_arg(url_arg)
# Get backend and delete url
try:
storage = Store(config)
@@ -129,21 +137,29 @@ class Delete_Url(Cmdlet):
raw_hash = query_hash or get_field(item, "hash")
raw_store = store_override or get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item missing hash/store; skipping",
file=sys.stderr,
)
continue
normalized = normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has empty store; skipping", file=sys.stderr
)
continue
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
f"[delete-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
@@ -152,9 +168,17 @@ class Delete_Url(Cmdlet):
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
item_urls = list(urls_from_cli)
if not item_urls:
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
item_urls = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
if not item_urls:
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
ctx.print_if_visible(
"[delete-url] Warning: Item has no url field; skipping", file=sys.stderr
)
continue
batch.setdefault(store_text, []).append((normalized, item_urls))
@@ -168,7 +192,7 @@ class Delete_Url(Cmdlet):
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
@@ -195,27 +219,41 @@ class Delete_Url(Cmdlet):
# remove the piped url row(s).
remove_set = urls_from_cli
if not remove_set:
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
remove_set = [
u.strip()
for u in normalize_urls(
get_field(item, "url") or get_field(item, "source_url")
)
if str(u).strip()
]
_set_item_url(item, _remove_urls(existing, list(remove_set)))
ctx.emit(item)
return 0
# Single-item mode
if not urls_from_cli:
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
urls_from_cli = [
u.strip()
for u in normalize_urls(
get_field(result, "url") or get_field(result, "source_url")
)
if str(u).strip()
]
if not urls_from_cli:
log("Error: No URL provided")
return 1
backend = storage[str(store_name)]
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
ctx.print_if_visible(
f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr
)
if result is not None:
existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
ctx.emit(result)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1

View File

@@ -44,8 +44,12 @@ class Download_File(Cmdlet):
SharedArgs.URL,
SharedArgs.PATH,
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
CmdletArg(
name="-output",
type="string",
alias="o",
description="(deprecated) Output directory (use -path instead)",
),
],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
@@ -67,7 +71,7 @@ class Download_File(Cmdlet):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in (raw_url or []):
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
@@ -99,7 +103,9 @@ class Download_File(Cmdlet):
return 1
@staticmethod
def _build_preview(raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[Any]:
def _build_preview(
raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int
) -> List[Any]:
try:
preview: List[Any] = []
preview.extend(list(raw_urls or [])[: max(0, total_items)])
@@ -212,7 +218,11 @@ class Download_File(Cmdlet):
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
table_title = (
f"Internet Archive: {title}".strip().rstrip(":")
if title
else f"Internet Archive: {identifier}"
)
try:
from result_table import ResultTable
@@ -329,7 +339,9 @@ class Download_File(Cmdlet):
config: Dict[str, Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
title_val = (
title_hint or downloaded_path.stem or "Unknown"
).strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
tag: List[str] = []
if tags_hint:
@@ -406,9 +418,13 @@ class Download_File(Cmdlet):
provider = _get_provider("telegram", config)
if provider is None:
raise DownloadError("Telegram provider not configured or not available (check telethon/app_id/api_hash)")
raise DownloadError(
"Telegram provider not configured or not available (check telethon/app_id/api_hash)"
)
sr = SearchResult(table="telegram", title=str(url), path=str(url), full_metadata={})
sr = SearchResult(
table="telegram", title=str(url), path=str(url), full_metadata={}
)
downloaded_path = None
telegram_info: Optional[Dict[str, Any]] = None
if hasattr(provider, "download_url"):
@@ -428,9 +444,15 @@ class Download_File(Cmdlet):
try:
chat_info_raw = telegram_info.get("chat")
msg_info_raw = telegram_info.get("message")
chat_info: Dict[str, Any] = chat_info_raw if isinstance(chat_info_raw, dict) else {}
msg_info: Dict[str, Any] = msg_info_raw if isinstance(msg_info_raw, dict) else {}
channel = str(chat_info.get("title") or chat_info.get("username") or "").strip()
chat_info: Dict[str, Any] = (
chat_info_raw if isinstance(chat_info_raw, dict) else {}
)
msg_info: Dict[str, Any] = (
msg_info_raw if isinstance(msg_info_raw, dict) else {}
)
channel = str(
chat_info.get("title") or chat_info.get("username") or ""
).strip()
post = msg_info.get("id")
except Exception:
channel = ""
@@ -479,7 +501,16 @@ class Download_File(Cmdlet):
p = urlparse(str(url))
h = (p.hostname or "").strip().lower()
path = (p.path or "").strip().lower()
if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")):
if "libgen" in h and any(
x in path
for x in (
"/edition.php",
"/file.php",
"/ads.php",
"/get.php",
"/series.php",
)
):
provider_name = "libgen"
except Exception:
pass
@@ -489,7 +520,9 @@ class Download_File(Cmdlet):
if provider_name == "openlibrary":
provider = get_provider("openlibrary", config)
if provider is None:
raise DownloadError("OpenLibrary provider not configured or not available")
raise DownloadError(
"OpenLibrary provider not configured or not available"
)
edition_id = self._openlibrary_edition_id_from_url(str(url))
title_hint = self._title_hint_from_url_slug(str(url))
@@ -512,7 +545,9 @@ class Download_File(Cmdlet):
# High-level steps for OpenLibrary borrow/download flow.
progress.begin_steps(5)
def _progress(kind: str, done: int, total: Optional[int], label: str) -> None:
def _progress(
kind: str, done: int, total: Optional[int], label: str
) -> None:
# kind:
# - "step": advance step text
# - "pages": update pipe percent/status
@@ -525,7 +560,9 @@ class Download_File(Cmdlet):
t = int(total) if isinstance(total, int) else 0
d = int(done) if isinstance(done, int) else 0
if t > 0:
pct = int(round((max(0, min(d, t)) / max(1, t)) * 100.0))
pct = int(
round((max(0, min(d, t)) / max(1, t)) * 100.0)
)
progress.set_percent(pct)
progress.set_status(f"downloading pages {d}/{t}")
else:
@@ -538,9 +575,15 @@ class Download_File(Cmdlet):
except Exception:
lbl = "download"
progress.begin_transfer(label=lbl, total=total)
progress.update_transfer(label=lbl, completed=done, total=total)
progress.update_transfer(
label=lbl, completed=done, total=total
)
try:
if isinstance(total, int) and total > 0 and int(done) >= int(total):
if (
isinstance(total, int)
and total > 0
and int(done) >= int(total)
):
progress.finish_transfer(label=lbl)
except Exception:
pass
@@ -590,12 +633,18 @@ class Download_File(Cmdlet):
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
if callable(exec_fn):
ret = exec_fn(None, ["-provider", "libgen", "-query", fallback_query], config)
ret = exec_fn(
None,
["-provider", "libgen", "-query", fallback_query],
config,
)
try:
table = pipeline_context.get_last_result_table()
items = pipeline_context.get_last_result_items()
if table is not None:
pipeline_context.set_last_result_table_overlay(table, items)
pipeline_context.set_last_result_table_overlay(
table, items
)
except Exception:
pass
@@ -606,7 +655,10 @@ class Download_File(Cmdlet):
except Exception:
pass
log("[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr)
log(
"[download-file] OpenLibrary URL could not be downloaded",
file=sys.stderr,
)
continue
# Generic provider URL handler (if a provider implements `download_url`).
@@ -734,11 +786,18 @@ class Download_File(Cmdlet):
full_metadata = get_field(item, "full_metadata")
target = get_field(item, "path") or get_field(item, "url")
if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder":
if (
str(table or "").lower() == "alldebrid"
and str(media_kind or "").lower() == "folder"
):
magnet_id = None
if isinstance(full_metadata, dict):
magnet_id = full_metadata.get("magnet_id")
if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
if (
magnet_id is None
and isinstance(target, str)
and target.lower().startswith("alldebrid:magnet:")
):
try:
magnet_id = int(target.split(":")[-1])
except Exception:
@@ -748,12 +807,20 @@ class Download_File(Cmdlet):
provider = get_search_provider("alldebrid", config)
if provider is not None:
try:
files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
files = provider.search(
"*",
limit=10_000,
filters={"view": "files", "magnet_id": int(magnet_id)},
)
except Exception:
files = []
# If the magnet isn't ready, provider.search returns a single not-ready folder row.
if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
if (
files
and len(files) == 1
and getattr(files[0], "media_kind", "") == "folder"
):
detail = getattr(files[0], "detail", "")
log(
f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})",
@@ -761,7 +828,9 @@ class Download_File(Cmdlet):
)
else:
for sr in files:
expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
expanded_items.append(
sr.to_dict() if hasattr(sr, "to_dict") else sr
)
continue
expanded_items.append(item)
@@ -784,7 +853,9 @@ class Download_File(Cmdlet):
get_search_provider = registry.get("get_search_provider")
SearchResult = registry.get("SearchResult")
expanded_items = self._expand_provider_items(piped_items=piped_items, registry=registry, config=config)
expanded_items = self._expand_provider_items(
piped_items=piped_items, registry=registry, config=config
)
for item in expanded_items:
try:
@@ -800,7 +871,11 @@ class Download_File(Cmdlet):
tags_list = None
full_metadata = get_field(item, "full_metadata")
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
if (
(not full_metadata)
and isinstance(item, dict)
and isinstance(item.get("extra"), dict)
):
extra_md = item["extra"].get("full_metadata")
if isinstance(extra_md, dict):
full_metadata = extra_md
@@ -832,7 +907,9 @@ class Download_File(Cmdlet):
if isinstance(md, dict):
magnet_name = md.get("magnet_name") or md.get("folder")
if not magnet_name:
magnet_name = str(get_field(item, "detail") or "").strip() or None
magnet_name = (
str(get_field(item, "detail") or "").strip() or None
)
magnet_dir_name = _sf(str(magnet_name)) if magnet_name else ""
@@ -845,7 +922,9 @@ class Download_File(Cmdlet):
base_tail_norm = _sf(base_tail).lower() if base_tail.strip() else ""
magnet_dir_norm = magnet_dir_name.lower() if magnet_dir_name else ""
if magnet_dir_name and (not base_tail_norm or base_tail_norm != magnet_dir_norm):
if magnet_dir_name and (
not base_tail_norm or base_tail_norm != magnet_dir_norm
):
output_dir = Path(output_dir) / magnet_dir_name
relpath = None
@@ -855,7 +934,11 @@ class Download_File(Cmdlet):
relpath = md["file"].get("_relpath")
if relpath:
parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}]
parts = [
p
for p in str(relpath).replace("\\", "/").split("/")
if p and p not in {".", ".."}
]
# If the provider relpath already includes the magnet folder name as a
# root directory (common), strip it to prevent double nesting.
@@ -881,7 +964,11 @@ class Download_File(Cmdlet):
provider_sr = sr
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
if (
downloaded_path is None
and attempted_provider_download
and str(table or "").lower() == "openlibrary"
):
availability = None
reason = None
if isinstance(full_metadata, dict):
@@ -898,7 +985,10 @@ class Download_File(Cmdlet):
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr)
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
fallback_query = title_text
@@ -921,7 +1011,9 @@ class Download_File(Cmdlet):
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()
if table_obj is not None:
pipeline_context.set_last_result_table_overlay(table_obj, items_obj)
pipeline_context.set_last_result_table_overlay(
table_obj, items_obj
)
except Exception:
pass
@@ -935,7 +1027,11 @@ class Download_File(Cmdlet):
continue
# Fallback: if we have a direct HTTP URL, download it directly
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
if (
downloaded_path is None
and isinstance(target, str)
and target.startswith("http")
):
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
# Never download these as "files".
if str(table or "").lower() == "libgen":
@@ -946,7 +1042,9 @@ class Download_File(Cmdlet):
file=sys.stderr,
)
continue
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
suggested_name = str(title).strip() if title is not None else None
result_obj = _download_direct_file(
target,
@@ -958,7 +1056,10 @@ class Download_File(Cmdlet):
downloaded_path = self._path_from_download_result(result_obj)
if downloaded_path is None:
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
log(
f"Cannot download item (no provider handler / unsupported target): {title or target}",
file=sys.stderr,
)
continue
# Allow providers to add/enrich tags and metadata during download.
@@ -1038,11 +1139,19 @@ class Download_File(Cmdlet):
# UX: In piped mode, allow a single positional arg to be the destination directory.
# Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo"
if had_piped_input and raw_url and len(raw_url) == 1 and (not parsed.get("path")) and (not parsed.get("output")):
if (
had_piped_input
and raw_url
and len(raw_url) == 1
and (not parsed.get("path"))
and (not parsed.get("output"))
):
candidate = str(raw_url[0] or "").strip()
low = candidate.lower()
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
looks_like_provider = low.startswith(("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:"))
looks_like_provider = low.startswith(
("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:")
)
looks_like_windows_path = (
(len(candidate) >= 2 and candidate[1] == ":")
or candidate.startswith("\\\\")
@@ -1058,7 +1167,9 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
quiet_mode = (
bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
)
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
@@ -1082,7 +1193,9 @@ class Download_File(Cmdlet):
total_items = self._safe_total_items(raw_url, piped_items)
preview = self._build_preview(raw_url, piped_items, total_items)
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
progress.ensure_local_ui(
label="download-file", total_items=total_items, items_preview=preview
)
registry = self._load_provider_registry()
@@ -1155,6 +1268,7 @@ class Download_File(Cmdlet):
# Priority 2: Config default output/temp directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Downloads"
@@ -1173,6 +1287,7 @@ class Download_File(Cmdlet):
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):

File diff suppressed because it is too large Load Diff

View File

@@ -17,6 +17,7 @@ from typing import Any, Dict, Optional, Sequence
from SYS.logger import log
from . import _shared as sh
class Download_Torrent(sh.Cmdlet):
"""Class-based download-torrent cmdlet with self-registration."""
@@ -27,10 +28,29 @@ class Download_Torrent(sh.Cmdlet):
usage="download-torrent <magnet|.torrent> [options]",
alias=["torrent", "magnet"],
arg=[
sh.CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
sh.CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
sh.CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
sh.CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
sh.CmdletArg(
name="magnet",
type="string",
required=False,
description="Magnet link or .torrent file/URL",
variadic=True,
),
sh.CmdletArg(
name="output",
type="string",
description="Output directory for downloaded files",
),
sh.CmdletArg(
name="wait",
type="float",
description="Wait time (seconds) for magnet processing timeout",
),
sh.CmdletArg(
name="background",
type="flag",
alias="bg",
description="Start download in background",
),
],
detail=["Download torrents/magnets via AllDebrid API."],
exec=self.run,
@@ -51,7 +71,10 @@ class Download_Torrent(sh.Cmdlet):
except Exception:
api_key = None
if not api_key:
log("AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", file=sys.stderr)
log(
"AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)",
file=sys.stderr,
)
return 1
for magnet_url in magnet_args:
if background_mode:
@@ -64,7 +87,9 @@ class Download_Torrent(sh.Cmdlet):
magnet_id = self._submit_magnet(worker_id, magnet_url, api_key)
if magnet_id <= 0:
continue
self._start_background_magnet_worker(worker_id, magnet_id, output_dir, api_key, wait_timeout)
self._start_background_magnet_worker(
worker_id, magnet_id, output_dir, api_key, wait_timeout
)
log(f"⧗ Torrent processing started (ID: {magnet_id})")
return 0
@@ -80,7 +105,7 @@ class Download_Torrent(sh.Cmdlet):
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return 0
@@ -90,7 +115,9 @@ class Download_Torrent(sh.Cmdlet):
log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr)
return 0
def _start_background_magnet_worker(self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int) -> None:
def _start_background_magnet_worker(
self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int
) -> None:
thread = threading.Thread(
target=self._download_magnet_worker,
args=(worker_id, magnet_id, output_dir, api_key, wait_timeout),
@@ -119,7 +146,7 @@ class Download_Torrent(sh.Cmdlet):
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -129,13 +156,13 @@ class Download_Torrent(sh.Cmdlet):
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url and file_name:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -154,20 +181,22 @@ class Download_Torrent(sh.Cmdlet):
) -> None:
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
magnet_id = int(magnet_info.get("id", 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
# Poll for ready status (simplified)
import time
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
if status.get("ready"):
break
time.sleep(5)
elapsed += 5
@@ -176,13 +205,13 @@ class Download_Torrent(sh.Cmdlet):
return
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
files_array = magnet_files.get("files", [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
file_url = file_info.get("link")
file_name = file_info.get("name")
if file_url:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
@@ -193,8 +222,9 @@ class Download_Torrent(sh.Cmdlet):
def _download_file(url: str, dest: Path) -> None:
try:
import requests
resp = requests.get(url, stream=True)
with open(dest, 'wb') as f:
with open(dest, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
@@ -211,4 +241,5 @@ class Download_Torrent(sh.Cmdlet):
)
thread.start()
CMDLET = Download_Torrent()

View File

@@ -24,7 +24,7 @@ from config import resolve_output_dir
class Get_File(sh.Cmdlet):
"""Export files to local path via hash+store."""
def __init__(self) -> None:
"""Initialize get-file cmdlet."""
super().__init__(
@@ -39,13 +39,13 @@ class Get_File(sh.Cmdlet):
],
detail=[
"- Exports file from storage backend to local path",
"- Uses selected item's hash, or -query \"hash:<sha256>\"",
'- Uses selected item\'s hash, or -query "hash:<sha256>"',
"- Preserves file extension and metadata",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
debug(f"[get-file] run() called with result type: {type(result)}")
@@ -56,43 +56,45 @@ class Get_File(sh.Cmdlet):
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
debug(f"[get-file] file_hash={file_hash} store_name={store_name}")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
debug(f"[get-file] Getting storage backend: {store_name}")
# Get storage backend
store = Store(config)
backend = store[store_name]
debug(f"[get-file] Backend retrieved: {type(backend).__name__}")
# Get file metadata to determine name and extension
debug(f"[get-file] Getting metadata for hash...")
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"Error: File metadata not found for hash {file_hash}")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
debug(
f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}"
)
def resolve_display_title() -> str:
candidates = [
@@ -110,16 +112,18 @@ class Get_File(sh.Cmdlet):
if text:
return text
return ""
debug(f"[get-file] Calling backend.get_file({file_hash})")
# Get file from backend (may return Path or URL string depending on backend)
source_path = backend.get_file(file_hash)
debug(f"[get-file] backend.get_file returned: {source_path}")
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
if isinstance(source_path, str) and (
source_path.startswith("http://") or source_path.startswith("https://")
):
# Hydrus backend returns a URL; open it only for this explicit user action.
try:
webbrowser.open(source_path)
@@ -127,20 +131,22 @@ class Get_File(sh.Cmdlet):
log(f"Error opening browser: {exc}", file=sys.stderr)
else:
debug(f"Opened in browser: {source_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": resolve_display_title() or "Opened",
}
)
return 0
# Otherwise treat as file path (local/folder backends)
if isinstance(source_path, str):
source_path = Path(source_path)
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash}")
return 1
@@ -154,13 +160,15 @@ class Get_File(sh.Cmdlet):
ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".")
self._open_file_default(source_path)
log(f"Opened: {source_path}", file=sys.stderr)
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
}
)
debug("[get-file] Completed successfully")
return 0
@@ -177,32 +185,38 @@ class Get_File(sh.Cmdlet):
if output_name:
filename = output_name
else:
title = (metadata.get("title") if isinstance(metadata, dict) else None) or resolve_display_title() or "export"
title = (
(metadata.get("title") if isinstance(metadata, dict) else None)
or resolve_display_title()
or "export"
)
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
if not ext.startswith("."):
ext = "." + ext
filename += ext
dest_path = self._unique_path(output_dir / filename)
# Copy file to destination
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
shutil.copy2(source_path, dest_path)
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
})
ctx.emit(
{
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
}
)
debug(f"[get-file] Completed successfully")
return 0
@@ -216,7 +230,17 @@ class Get_File(sh.Cmdlet):
if self._open_local_file_in_browser_via_http(path):
return
if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tif", ".tiff", ".svg"}:
if suffix in {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tif",
".tiff",
".svg",
}:
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
@@ -225,9 +249,13 @@ class Get_File(sh.Cmdlet):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
subprocess.Popen(["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
return
subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
subprocess.Popen(
["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
@@ -282,7 +310,9 @@ class Get_File(sh.Cmdlet):
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
server_thread = threading.Thread(target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True)
server_thread = threading.Thread(
target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True
)
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
@@ -326,10 +356,10 @@ class Get_File(sh.Cmdlet):
"\n".join(
[
"<!doctype html>",
"<meta charset=\"utf-8\">",
'<meta charset="utf-8">',
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
f"<img src=\"{image_url}\" alt=\"{resolved.name}\">",
f'<img src="{image_url}" alt="{resolved.name}">',
]
),
encoding="utf-8",
@@ -346,29 +376,29 @@ class Get_File(sh.Cmdlet):
return bool(webbrowser.open(wrapper_url))
except Exception:
return False
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {'-', '_', ' ', '.'}:
if ch.isalnum() or ch in {"-", "_", " ", "."}:
allowed_chars.append(ch)
else:
allowed_chars.append(' ')
allowed_chars.append(" ")
# Collapse multiple spaces
sanitized = ' '.join(''.join(allowed_chars).split())
sanitized = " ".join("".join(allowed_chars).split())
return sanitized or "export"
def _unique_path(self, path: Path) -> Path:
"""Generate unique path by adding (1), (2), etc. if file exists."""
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
counter = 1
while True:
new_path = parent / f"{stem} ({counter}){suffix}"

View File

@@ -26,7 +26,7 @@ class Get_Metadata(Cmdlet):
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-query \"hash:<sha256>\"] [-store <backend>]",
usage='get-metadata [-query "hash:<sha256>"] [-store <backend>]',
alias=["meta"],
arg=[
SharedArgs.QUERY,
@@ -52,15 +52,16 @@ class Get_Metadata(Cmdlet):
explicit = meta.get("time_imported")
if isinstance(explicit, (int, float)):
return int(explicit)
# Try parsing string timestamps
if isinstance(explicit, str):
try:
import datetime as _dt
return int(_dt.datetime.fromisoformat(explicit).timestamp())
except Exception:
pass
return None
@staticmethod
@@ -70,14 +71,24 @@ class Get_Metadata(Cmdlet):
return ""
try:
import datetime as _dt
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return ""
@staticmethod
def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int],
dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str],
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
def _build_table_row(
title: str,
store: str,
path: str,
mime: str,
size_bytes: Optional[int],
dur_seconds: Optional[int],
imported_ts: Optional[int],
url: list[str],
hash_value: Optional[str],
pages: Optional[int] = None,
) -> Dict[str, Any]:
"""Build a table row dict with metadata fields."""
size_mb = None
size_int: Optional[int] = None
@@ -156,34 +167,38 @@ class Get_Metadata(Cmdlet):
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
# Get hash and store from parsed args or result
file_hash = query_hash or get_field(result, "hash")
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
return 1
if not storage_source:
log("No storage backend specified - use -store to specify", file=sys.stderr)
return 1
# Use storage backend to get metadata
try:
from Store import Store
storage = Store(config)
backend = storage[storage_source]
# Get metadata from backend
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr)
log(
f"No metadata found for hash {file_hash[:8]}... in {storage_source}",
file=sys.stderr,
)
return 1
# Extract title from tags if available
title = get_field(result, "title") or file_hash[:16]
if not get_field(result, "title"):
@@ -196,7 +211,7 @@ class Get_Metadata(Cmdlet):
break
except Exception:
pass
# Extract metadata fields
mime_type = metadata.get("mime") or metadata.get("ext", "")
file_size = metadata.get("size")
@@ -224,13 +239,15 @@ class Get_Metadata(Cmdlet):
if len(nums) == 2:
duration_seconds = float(nums[0] * 60 + nums[1])
else:
duration_seconds = float(nums[0] * 3600 + nums[1] * 60 + nums[2])
duration_seconds = float(
nums[0] * 3600 + nums[1] * 60 + nums[2]
)
else:
duration_seconds = None
pages = metadata.get("pages")
url = metadata.get("url") or []
imported_ts = self._extract_imported_ts(metadata)
# Normalize url
if isinstance(url, str):
try:
@@ -239,7 +256,7 @@ class Get_Metadata(Cmdlet):
url = []
if not isinstance(url, list):
url = []
# Build display row
row = self._build_table_row(
title=title,
@@ -253,14 +270,14 @@ class Get_Metadata(Cmdlet):
hash_value=file_hash,
pages=pages,
)
table_title = f"get-metadata: {title}" if title else "get-metadata"
table = ResultTable(table_title).init_command(table_title, "get-metadata", list(args))
self._add_table_body_row(table, row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
return 0
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1

View File

@@ -25,7 +25,7 @@ class Get_Note(Cmdlet):
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-query \"hash:<sha256>\"]",
usage='get-note -store <store> [-query "hash:<sha256>"]',
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
@@ -43,7 +43,9 @@ class Get_Note(Cmdlet):
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
def _resolve_hash(
self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]
) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
@@ -76,7 +78,10 @@ class Get_Note(Cmdlet):
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
log(
'[get_note] Error: Requires piped item(s) or -store and -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
store_registry = Store(config)
@@ -144,5 +149,3 @@ class Get_Note(Cmdlet):
CMDLET = Get_Note()

View File

@@ -29,7 +29,7 @@ from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-query \"hash:<sha256>\"]",
usage='get-relationship [-query "hash:<sha256>"]',
alias=[],
arg=[
SharedArgs.QUERY,
@@ -40,6 +40,7 @@ CMDLET = Cmdlet(
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
@@ -64,44 +65,63 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue
i += 1
override_hash: str | None = sh.parse_single_hash_query(override_query) if override_query else None
override_hash: str | None = (
sh.parse_single_hash_query(override_query) if override_query else None
)
if override_query and not override_hash:
log("get-relationship requires -query \"hash:<sha256>\"", file=sys.stderr)
log('get-relationship requires -query "hash:<sha256>"', file=sys.stderr)
return 1
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
if isinstance(result, list):
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -query \"hash:<sha256>\"", file=sys.stderr)
log(
'get-relationship expects a single item; select one row (e.g. @1) or pass -query "hash:<sha256>"',
file=sys.stderr,
)
return 1
else:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
found_relationships = [] # List of dicts: {hash, type, title, path, store}
source_title = "Unknown"
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
if (
entry.get("hash")
and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower()
):
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
if (
entry.get("path")
and str(existing.get("path", "")).lower() == str(entry["path"]).lower()
):
return
found_relationships.append(entry)
# Store/hash-first subject resolution
store_name: Optional[str] = override_store
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
source_title = (
get_field(result, "title")
or get_field(result, "name")
or (hash_hex[:16] + "..." if hash_hex else "Unknown")
)
local_db_checked = False
@@ -113,7 +133,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
@@ -142,7 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
entry_type = (
"king" if str(rel_type).lower() == "alt" else str(rel_type)
)
if entry_type == "king":
king_hashes.append(related_hash)
@@ -156,13 +182,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
pass
_add_relationship({
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
}
)
# Reverse relationships (alts pointing to this hash)
try:
@@ -187,13 +215,15 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
}
)
# Siblings (alts that share the same king)
for king_hash in king_hashes:
@@ -218,24 +248,30 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
_add_relationship(
{
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
}
)
except Exception as e:
log(f"Error checking store relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
hash_hex = (
normalize_hash(override_hash)
if override_hash
else normalize_hash(get_hash_for_operation(None, result))
)
if hash_hex and not local_db_checked:
try:
client = None
@@ -274,7 +310,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if backend_obj is not None and hasattr(backend_obj, "get_tag"):
try:
tag_result = backend_obj.get_tag(h)
tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
tags = (
tag_result[0]
if isinstance(tag_result, tuple) and tag_result
else tag_result
)
if isinstance(tags, list):
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
@@ -308,10 +348,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if isinstance(storage, dict):
for group in storage.values():
if isinstance(group, list):
tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in group if isinstance(x, str)]
)
display = svc_data.get("display_tags")
if isinstance(display, list):
tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
tag_candidates.extend(
[str(x) for x in display if isinstance(x, str)]
)
flat = meta.get("tags_flat")
if isinstance(flat, list):
tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
@@ -331,7 +375,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names.
# For /manage_file_relationships/get_file_relationships, the Hydrus docs define:
@@ -353,16 +397,25 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
king_hash = (
normalize_hash(rel_value)
if isinstance(rel_value, str)
else None
)
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == king_hash
for r in found_relationships
):
found_relationships.append(
{
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
}
)
continue
rel_name = rel_map.get(key, f"type-{key}")
@@ -370,29 +423,43 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
rel_hash_norm = (
normalize_hash(rel_hash)
if isinstance(rel_hash, str)
else None
)
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
}
)
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
if not any(
str(r.get("hash", "")).lower() == rel_hash_norm
for r in found_relationships
):
found_relationships.append(
{
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
}
)
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
@@ -402,66 +469,67 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
try:
from rich.panel import Panel
from rich_display import stdout_console
title = source_title or (hash_hex[:16] + "..." if hash_hex else "Item")
stdout_console().print(Panel(f"{title} has no relationships", title="Relationships"))
except Exception:
log("No relationships found.")
return 0
# Display results
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
t = item["type"].lower()
if t == "king":
return 0
elif t == 'derivative':
elif t == "derivative":
return 1
elif t in {'alternative', 'alternate', 'alt'}:
elif t in {"alternative", "alternate", "alt"}:
return 2
elif t == 'duplicate':
elif t == "duplicate":
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
found_relationships.sort(key=lambda x: (type_sort_key(x), x["title"]))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
row.add_column("Type", item["type"].title())
row.add_column("Title", item["title"])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Store", item['store'])
row.add_column("Store", item["store"])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"store": item['store']
"title": item["title"],
"hash": item["hash"],
"file_hash": item["hash"],
"relationship_type": item["type"],
"store": item["store"],
}
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
res_obj["target"] = item["hash"]
pipeline_results.append(res_obj)
# Set selection args
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
table.set_row_selection_args(
i, ["-store", str(item["store"]), "-query", f"hash:{item['hash']}"]
)
ctx.set_last_result_table(table, pipeline_results)
from rich_display import stdout_console
stdout_console().print(table)
return 0
CMDLET.exec = _run
CMDLET.register()

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Sequence
from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
import sys
import re
from fnmatch import fnmatch
from urllib.parse import urlparse
import pipeline as ctx
from . import _shared as sh
@@ -26,51 +29,199 @@ class UrlItem:
class Get_Url(Cmdlet):
"""Get url associated with files via hash+store."""
"""Get url associated with files via hash+store, or search urls by pattern."""
def __init__(self) -> None:
super().__init__(
name="get-url",
summary="List url associated with a file",
usage="@1 | get-url",
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,
],
summary="List url associated with a file, or search urls by pattern",
usage='@1 | get-url OR get-url -url "https://www.youtube.com/watch?v=xx"',
arg=[SharedArgs.QUERY, SharedArgs.STORE, SharedArgs.URL],
detail=[
"- Lists all url associated with file identified by hash+store",
"- Get url for file: @1 | get-url (requires hash+store from result)",
'- Search url across stores: get-url -url "www.google.com" (strips protocol & www prefix)',
'- Wildcard matching: get-url -url "youtube.com*" (matches all youtube.com urls)',
"- Pattern matching: domain matching ignores protocol (https://, http://, ftp://)",
],
exec=self.run,
)
self.register()
@staticmethod
def _normalize_url_for_search(url: str) -> str:
"""Strip protocol and www prefix from URL for searching.
Examples:
https://www.youtube.com/watch?v=xx -> youtube.com/watch?v=xx
http://www.google.com -> google.com
ftp://files.example.com -> files.example.com
"""
url = str(url or "").strip()
# Remove protocol (http://, https://, ftp://, etc.)
url = re.sub(r"^[a-z][a-z0-9+.-]*://", "", url, flags=re.IGNORECASE)
# Remove www. prefix (case-insensitive)
url = re.sub(r"^www\.", "", url, flags=re.IGNORECASE)
return url.lower()
@staticmethod
def _match_url_pattern(url: str, pattern: str) -> bool:
"""Match URL against pattern with wildcard support.
Strips protocol/www from both URL and pattern before matching.
Supports * and ? wildcards.
"""
normalized_url = Get_Url._normalize_url_for_search(url)
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
# Use fnmatch for wildcard matching (* and ?)
return fnmatch(normalized_url, normalized_pattern)
def _search_urls_across_stores(
self, pattern: str, config: Dict[str, Any]
) -> Tuple[List[UrlItem], List[str]]:
"""Search for URLs matching pattern across all stores.
Returns:
Tuple of (matching_items, found_stores)
"""
items: List[UrlItem] = []
found_stores: Set[str] = set()
try:
storage = Store(config)
store_names = storage.list_backends() if hasattr(storage, "list_backends") else []
if not store_names:
log("Error: No stores configured", file=sys.stderr)
return items, list(found_stores)
for store_name in store_names:
try:
backend = storage[store_name]
# Try to search files in this backend
# For now, we'll iterate through known files (this is a limitation)
# Each backend should ideally support get_all_files() or similar
# For now, we use search with a broad query to find candidates
try:
# Try to get files via search (backend-specific)
search_results = backend.search("*", limit=1000)
if search_results:
for result in search_results:
file_hash = result.get("hash") or result.get("file_hash")
if not file_hash:
continue
try:
urls = backend.get_url(file_hash)
if urls:
for url in urls:
if self._match_url_pattern(str(url), pattern):
items.append(
UrlItem(
url=str(url),
hash=file_hash,
store=store_name,
)
)
found_stores.add(store_name)
except Exception:
pass
except Exception:
# Backend might not support search; skip
pass
except KeyError:
continue
except Exception as exc:
debug(f"Error searching store '{store_name}': {exc}", file=sys.stderr)
continue
return items, list(found_stores)
except Exception as exc:
log(f"Error searching stores: {exc}", file=sys.stderr)
return items, []
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
"""Get url for file via hash+store, or search urls by pattern."""
parsed = parse_cmdlet_args(args, self)
# Check if user provided a URL pattern to search for
search_pattern = parsed.get("url")
if search_pattern:
# URL search mode: find all files with matching URLs across stores
items, stores_searched = self._search_urls_across_stores(search_pattern, config)
if not items:
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
return 1
# Create result table
from result_table import ResultTable
table = (
ResultTable("URL Search Results", max_columns=3)
.set_preserve_order(True)
.set_table("urls")
.set_value_case("preserve")
)
table.set_source_command("get-url", ["-url", search_pattern])
# Group by store for display
by_store: Dict[str, List[UrlItem]] = {}
for item in items:
if item.store not in by_store:
by_store[item.store] = []
by_store[item.store].append(item)
# Add rows grouped by store
for store_name in sorted(by_store.keys()):
store_items = by_store[store_name]
for idx, item in enumerate(store_items):
row = table.add_row()
if idx == 0:
row.add_column("Store", store_name)
else:
row.add_column("Store", "")
row.add_column("Url", item.url)
# Normalize for display
normalized = self._normalize_url_for_search(item.url)
row.add_column("Hash", item.hash[:16]) # Show first 16 chars
ctx.emit(item)
ctx.set_last_result_table(table if items else None, items, subject=result)
log(f"Found {len(items)} matching url(s) in {len(stores_searched)} store(s)")
return 0
# Original mode: Get URLs for a specific file by hash+store
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
log('Error: No file hash provided (pipe an item or use -query "hash:<sha256>")')
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Get backend and retrieve url
try:
storage = Store(config)
@@ -111,7 +262,7 @@ class Get_Url(Cmdlet):
log("No url found", file=sys.stderr)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
@@ -120,6 +271,13 @@ class Get_Url(Cmdlet):
return 1
# Import debug function from logger if available
try:
from SYS.logger import debug
except ImportError:
def debug(*args, **kwargs):
pass # Fallback no-op
CMDLET = Get_Url()

File diff suppressed because it is too large Load Diff

View File

@@ -37,7 +37,6 @@ import pipeline as pipeline_context
# ============================================================================
# ============================================================================
# Playwright & Screenshot Dependencies
# ============================================================================
@@ -104,7 +103,6 @@ SITE_SELECTORS: Dict[str, List[str]] = {
}
class ScreenshotError(RuntimeError):
"""Raised when screenshot capture or upload fails."""
@@ -146,6 +144,7 @@ class ScreenshotResult:
# Helper Functions
# ============================================================================
def _slugify_url(url: str) -> str:
"""Convert URL to filesystem-safe slug."""
parsed = urlsplit(url)
@@ -172,7 +171,11 @@ def _tags_from_url(url: str) -> List[str]:
parsed = None
try:
parsed = urlsplit(u)
host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
host = (
str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "")
.strip()
.lower()
)
except Exception:
parsed = None
host = ""
@@ -300,7 +303,12 @@ def _convert_to_webp(
except Exception:
w, h = 0, 0
if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
if (
downscale_if_oversize
and isinstance(max_dim, int)
and max_dim > 0
and (w > max_dim or h > max_dim)
):
scale = 1.0
try:
scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
@@ -320,7 +328,9 @@ def _convert_to_webp(
im = im.resize((new_w, new_h), resample=resample)
did_downscale = True
except Exception as exc:
debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
debug(
f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}"
)
im.save(tmp_path, **save_kwargs)
@@ -332,6 +342,7 @@ def _convert_to_webp(
except Exception:
pass
def _matched_site_selectors(url: str) -> List[str]:
"""Return SITE_SELECTORS for a matched domain; empty if no match.
@@ -355,7 +366,9 @@ def _selectors_for_url(url: str) -> List[str]:
return _matched_site_selectors(url)
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
def _platform_preprocess(
url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000
) -> None:
"""Best-effort page tweaks for popular platforms before capture."""
try:
u = str(url or "").lower()
@@ -373,14 +386,16 @@ def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: i
return clicks
# Dismiss common cookie / consent prompts.
_try_click_buttons([
"Accept all",
"Accept",
"I agree",
"Agree",
"Allow all",
"OK",
])
_try_click_buttons(
[
"Accept all",
"Accept",
"I agree",
"Agree",
"Allow all",
"OK",
]
)
# Some sites need small nudges (best-effort).
if "reddit.com" in u:
@@ -490,7 +505,9 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
return unique_path(path)
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
def _capture(
options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress
) -> None:
"""Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
try:
@@ -499,16 +516,24 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
try:
current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
current_browser = (
getattr(tool.defaults, "browser", "").lower()
if getattr(tool, "defaults", None) is not None
else ""
)
if current_browser != "chromium":
debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
debug(
f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet"
)
base_cfg = {}
try:
base_cfg = dict(getattr(tool, "_config", {}) or {})
except Exception:
base_cfg = {}
tool_block = dict(base_cfg.get("tool") or {}) if isinstance(base_cfg, dict) else {}
pw_block = dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
pw_block = (
dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
)
pw_block["browser"] = "chromium"
tool_block["playwright"] = pw_block
if isinstance(base_cfg, dict):
@@ -523,7 +548,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
if format_name == "pdf" and not options.headless:
warnings.append("pdf output requires headless Chromium; overriding headless mode")
@@ -539,7 +564,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
warnings.append("navigation timeout; capturing current page state")
debug("Navigation timeout; proceeding with current state")
progress.step("loading navigation timeout")
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
@@ -549,7 +574,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
debug("Article element not found; using fallback")
if options.wait_after_load > 0:
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
@@ -591,7 +616,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
for sel in selectors:
try:
debug(f"Trying selector: {sel}")
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
el = page.wait_for_selector(
sel, timeout=max(0, int(options.selector_timeout_ms))
)
except PlaywrightTimeoutError:
debug(f"Selector not found: {sel}")
continue
@@ -604,7 +631,10 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
pass
progress.step("capturing output")
debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
el.screenshot(
path=str(destination),
type=("jpeg" if format_name == "jpeg" else None),
)
element_captured = True
debug("Element captured successfully")
break
@@ -645,8 +675,13 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str],
except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower()
if any(k in msg for k in ["executable", "not found", "no such file", "cannot find", "install"]):
raise ScreenshotError("Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium") from exc
if any(
k in msg
for k in ["executable", "not found", "no such file", "cannot find", "install"]
):
raise ScreenshotError(
"Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium"
) from exc
raise
except ScreenshotError:
# Re-raise ScreenshotError raised intentionally (do not wrap)
@@ -666,7 +701,9 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
will_target = bool(options.prefer_platform_target) and requested_format != "pdf"
will_convert = requested_format == "webp"
will_archive = bool(options.archive and options.url)
total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
total_steps = (
9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
)
progress.begin_steps(total_steps)
progress.step("loading starting")
@@ -726,19 +763,20 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
# Main Cmdlet Function
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of url in the pipeline.
Accepts:
- Single result object (dict or PipeObject) with 'path' field
- List of result objects to screenshot each
- Direct URL as string
Emits PipeObject-formatted results for each screenshot with:
- action: 'cmdlet:screen-shot'
- is_temp: True (screenshots are temporary artifacts)
- parent_id: hash of the original file/URL
Screenshots are created using Playwright and marked as temporary
so they can be cleaned up later with the cleanup cmdlet.
"""
@@ -761,9 +799,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# ARGUMENT PARSING
# ========================================================================
parsed = parse_cmdlet_args(args, CMDLET)
format_value = parsed.get("format")
if not format_value:
# Default format can be set via config.conf tool block:
@@ -782,7 +820,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
selector_arg = parsed.get("selector")
selectors = [selector_arg] if selector_arg else []
archive_enabled = parsed.get("archive", False)
# Positional URL argument (if provided)
url_arg = parsed.get("url")
positional_url = [str(url_arg)] if url_arg else []
@@ -801,15 +839,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract url from piped results
if piped_results:
for item in piped_results:
url = (
get_field(item, 'path')
or get_field(item, 'url')
or get_field(item, 'target')
)
url = get_field(item, "path") or get_field(item, "url") or get_field(item, "target")
if url:
url_to_process.append((str(url), item))
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
@@ -819,9 +853,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
# ========================================================================
screenshot_dir: Optional[Path] = None
# Primary: Use --storage if provided (highest priority)
if storage_value:
try:
@@ -830,7 +864,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
# Secondary: Use config-based resolver ONLY if --storage not provided
if screenshot_dir is None and resolve_output_dir is not None:
try:
@@ -838,7 +872,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[screen_shot] Using config resolver: {screenshot_dir}")
except Exception:
pass
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
if screenshot_dir is None and config and config.get("outfile"):
try:
@@ -846,12 +880,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[screen_shot] Using config outfile: {screenshot_dir}")
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
debug(f"[screen_shot] Using default directory: {screenshot_dir}")
ensure_directory(screenshot_dir)
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
@@ -869,21 +903,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
# PREPARE SCREENSHOT OPTIONS
# ========================================================================
format_name = _normalise_format(format_value)
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
manual_target_selectors = filtered_selectors if filtered_selectors else None
all_emitted = []
exit_code = 0
# ========================================================================
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
def _extract_item_tags(item: Any) -> List[str]:
if item is None:
return []
raw = get_field(item, 'tag')
raw = get_field(item, "tag")
if isinstance(raw, list):
return [str(t) for t in raw if t is not None and str(t).strip()]
if isinstance(raw, str) and raw.strip():
@@ -913,7 +947,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
continue
try:
# Create screenshot with provided options
# Force the Playwright engine to Chromium for the screen-shot cmdlet
@@ -966,28 +1000,32 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
options.prefer_platform_target = True
options.target_selectors = auto_selectors
debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
screenshot_result = _capture_screenshot(options, progress)
# Log results and warnings
debug(f"Screenshot captured to {screenshot_result.path}")
if screenshot_result.archive_url:
debug(f"Archives: {', '.join(screenshot_result.archive_url)}")
for warning in screenshot_result.warnings:
debug(f"Warning: {warning}")
# Compute hash of screenshot file
screenshot_hash = None
try:
with open(screenshot_result.path, 'rb') as f:
with open(screenshot_result.path, "rb") as f:
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
except Exception:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
capture_date = ""
try:
capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat()
capture_date = (
datetime.fromtimestamp(screenshot_result.path.stat().st_mtime)
.date()
.isoformat()
)
except Exception:
capture_date = datetime.now().date().isoformat()
@@ -997,7 +1035,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [
t for t in upstream_tags
t
for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:"))
]
@@ -1007,40 +1046,41 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
pipe_obj = create_pipe_object_result(
source='screenshot',
store='PATH',
source="screenshot",
store="PATH",
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
cmdlet_name="screen-shot",
title=display_title,
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
tag=merged_tags,
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,
'url': screenshot_result.url,
'target': str(screenshot_result.path), # Explicit target for add-file
}
"source_url": url,
"archive_url": screenshot_result.archive_url,
"url": screenshot_result.url,
"target": str(screenshot_result.path), # Explicit target for add-file
},
)
# Emit the result so downstream cmdlet (like add-file) can use it
pipeline_context.emit(pipe_obj)
all_emitted.append(pipe_obj)
# If we created a local progress UI, advance it per completed item.
progress.on_emit(pipe_obj)
except ScreenshotError as exc:
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
exit_code = 1
except Exception as exc:
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
exit_code = 1
progress.close_local_ui(force_complete=True)
if not all_emitted:
@@ -1051,6 +1091,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
return exit_code
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
@@ -1058,16 +1100,17 @@ CMDLET = Cmdlet(
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,
CmdletArg(name="format", type="string", description="Output format: webp, png, jpeg, or pdf"),
CmdletArg(
name="format", type="string", description="Output format: webp, png, jpeg, or pdf"
),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.PATH
SharedArgs.PATH,
],
detail=[
"Uses Playwright Chromium engine only. Install Chromium with: python ./scripts/bootstrap.py --playwright-only --browsers chromium",
"PDF output requires headless Chromium (the cmdlet will enforce headless mode for PDF).",
"Screenshots are temporary artifacts stored in the configured `temp` directory.",
]
],
)
CMDLET.exec = _run

View File

@@ -1,4 +1,5 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
@@ -25,37 +26,54 @@ try:
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
CmdletArg(
"provider",
type="string",
required=True,
description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
),
CmdletArg(
"query",
type="string",
required=True,
description="Search query (supports provider-specific syntax)",
),
CmdletArg(
"limit", type="int", description="Maximum results to return (default: 50)"
),
CmdletArg(
"open",
type="int",
description="(alldebrid) Open folder/magnet by ID and list its files",
),
],
detail=[
"Search external content providers:",
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
" Example: search-provider -provider alldebrid \"*\"",
" Example: search-provider -provider alldebrid -open 123 \"*\"",
' Example: search-provider -provider alldebrid "*"',
' Example: search-provider -provider alldebrid -open 123 "*"',
"- bandcamp: Search for music albums/tracks",
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
' Example: search-provider -provider bandcamp "artist:altrusian grace"',
"- libgen: Search Library Genesis for books",
" Example: search-provider -provider libgen \"python programming\"",
' Example: search-provider -provider libgen "python programming"',
"- loc: Search Library of Congress (Chronicling America)",
" Example: search-provider -provider loc \"lincoln\"",
' Example: search-provider -provider loc "lincoln"',
"- soulseek: Search P2P network for music",
" Example: search-provider -provider soulseek \"pink floyd\"",
' Example: search-provider -provider soulseek "pink floyd"',
"- youtube: Search YouTube for videos",
" Example: search-provider -provider youtube \"tutorial\"",
' Example: search-provider -provider youtube "tutorial"',
"- internetarchive: Search archive.org items (advancedsearch syntax)",
" Example: search-provider -provider internetarchive \"title:(lincoln) AND mediatype:texts\"",
' Example: search-provider -provider internetarchive "title:(lincoln) AND mediatype:texts"',
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
@@ -65,12 +83,12 @@ class Search_Provider(Cmdlet):
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-file",
' search-provider -provider bandcamp "artist:grace" | @1 | download-file',
],
exec=self.run
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
@@ -81,7 +99,9 @@ class Search_Provider(Cmdlet):
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
provider_flags = {f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})}
provider_flags = {
f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
open_flags = {f.lower() for f in (flag_registry.get("open") or {"-open", "--open"})}
@@ -106,14 +126,20 @@ class Search_Provider(Cmdlet):
try:
limit = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50", file=sys.stderr)
log(
f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50",
file=sys.stderr,
)
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr)
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
elif not token.startswith("-"):
@@ -139,9 +165,9 @@ class Search_Provider(Cmdlet):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
@@ -160,6 +186,7 @@ class Search_Provider(Cmdlet):
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
except Exception:
db = None
@@ -178,6 +205,7 @@ class Search_Provider(Cmdlet):
results_list = []
import result_table
importlib.reload(result_table)
from result_table import ResultTable
@@ -190,7 +218,9 @@ class Search_Provider(Cmdlet):
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
provider_label = (
provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
)
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
@@ -205,7 +235,9 @@ class Search_Provider(Cmdlet):
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id})
results = provider.search(
query, limit=limit, filters={"view": "files", "magnet_id": open_id}
)
else:
# Default: show folders (magnets) so user can select @N.
results = provider.search(query, limit=limit, filters={"view": "folders"})
@@ -222,7 +254,11 @@ class Search_Provider(Cmdlet):
# Emit results for pipeline
for search_result in results:
item_dict = search_result.to_dict() if hasattr(search_result, "to_dict") else dict(search_result)
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
)
# Ensure table field is set (should be by provider, but just in case)
if "table" not in item_dict:
@@ -233,13 +269,18 @@ class Search_Provider(Cmdlet):
# For AllDebrid folder rows, allow @N to open and show files.
try:
if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder":
if (
provider_lower == "alldebrid"
and getattr(search_result, "media_kind", "") == "folder"
):
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"])
table.set_row_selection_args(
row_index, ["-open", str(magnet_id), "-query", "*"]
)
except Exception:
pass
results_list.append(item_dict)

View File

@@ -1,4 +1,5 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
@@ -12,7 +13,16 @@ from SYS.logger import log, debug
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query = (
(
Cmdlet,
CmdletArg,
SharedArgs,
get_field,
should_show_help,
normalize_hash,
first_title_tag,
parse_hash_query,
) = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
@@ -37,9 +47,9 @@ class Search_Store(Cmdlet):
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
SharedArgs.STORE,
SharedArgs.QUERY,
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
@@ -84,20 +94,22 @@ class Search_Store(Cmdlet):
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
payload["title"] = (
payload.get("name") or payload.get("target") or payload.get("path") or "Result"
)
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip("."))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
@@ -134,11 +146,15 @@ class Search_Store(Cmdlet):
raw_title = None
try:
raw_title = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
raw_title = (
ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
)
except Exception:
raw_title = None
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-store", list(args_list))
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title(
"search-store", list(args_list)
)
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
@@ -184,7 +200,7 @@ class Search_Store(Cmdlet):
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
query = query.strip().strip(",")
if store_filter and not storage_backend:
storage_backend = store_filter
@@ -198,6 +214,7 @@ class Search_Store(Cmdlet):
from API.folder import API_folder_store
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
@@ -212,12 +229,13 @@ class Search_Store(Cmdlet):
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
pipe=ctx.get_current_command_text(),
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
@@ -233,6 +251,7 @@ class Search_Store(Cmdlet):
pass
from Store import Store
storage = Store(config=config or {})
from Store._base import Store as BaseStore
@@ -301,7 +320,11 @@ class Search_Store(Cmdlet):
else:
maybe_tags = tag_result
if isinstance(maybe_tags, list):
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
tags_list = [
str(t).strip()
for t in maybe_tags
if isinstance(t, str) and str(t).strip()
]
except Exception:
tags_list = []
@@ -336,7 +359,9 @@ class Search_Store(Cmdlet):
if size_bytes is None:
size_bytes = meta_obj.get("size_bytes")
try:
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
size_bytes_int: Optional[int] = (
int(size_bytes) if size_bytes is not None else None
)
except Exception:
size_bytes_int = None
@@ -362,7 +387,7 @@ class Search_Store(Cmdlet):
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
log("No results found", file=sys.stderr)
@@ -373,15 +398,18 @@ class Search_Store(Cmdlet):
except Exception:
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
if type(target_backend).search is BaseStore.search:
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
log(
f"Backend '{backend_to_search}' does not support searching",
file=sys.stderr,
)
db.update_worker_status(worker_id, "error")
return 1
debug(f"[search-store] Searching '{backend_to_search}'")
results = target_backend.search(query, limit=limit)
@@ -395,7 +423,9 @@ class Search_Store(Cmdlet):
debug(f"[search-store] Searching '{backend_name}'")
backend_results = backend.search(query, limit=limit - len(all_results))
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
debug(
f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)"
)
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
@@ -406,6 +436,7 @@ class Search_Store(Cmdlet):
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
@@ -450,15 +481,16 @@ class Search_Store(Cmdlet):
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
db.update_worker_status(worker_id, "completed")
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1

View File

@@ -1,4 +1,5 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
@@ -30,9 +31,19 @@ CMDLET = Cmdlet(
usage="trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
arg=[
CmdletArg("-path", description="Path to the file (optional if piped)."),
CmdletArg("-input", description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s')."),
CmdletArg("-outdir", description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos)."),
CmdletArg(
"-input",
description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL.",
),
CmdletArg(
"-range",
required=True,
description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s').",
),
CmdletArg(
"-outdir",
description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos).",
),
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
],
detail=[
@@ -41,7 +52,7 @@ CMDLET = Cmdlet(
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
]
],
)
@@ -71,6 +82,7 @@ def _format_hms(total_seconds: float) -> str:
return "0s"
return "".join(parts)
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
@@ -88,7 +100,7 @@ def _parse_time(time_str: str) -> float:
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
raw = str(time_str or '').strip()
raw = str(time_str or "").strip()
if not raw:
raise ValueError("Empty time")
@@ -97,15 +109,15 @@ def _parse_time(time_str: str) -> float:
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
if hms and (hms.group('h') or hms.group('m') or hms.group('s')):
hours = float(hms.group('h') or 0)
minutes = float(hms.group('m') or 0)
seconds = float(hms.group('s') or 0)
if hms and (hms.group("h") or hms.group("m") or hms.group("s")):
hours = float(hms.group("h") or 0)
minutes = float(hms.group("m") or 0)
seconds = float(hms.group("s") or 0)
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
parts = [p.strip() for p in raw.split(':')]
parts = [p.strip() for p in raw.split(":")]
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
if len(parts) == 2:
@@ -117,15 +129,15 @@ def _parse_time(time_str: str) -> float:
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
name = str(name or '').strip()
name = str(name or "").strip()
if not name:
return 'clip'
return "clip"
# Windows-forbidden characters: <>:"/\\|?* plus control chars
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', '_', name)
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', "_", name)
name = re.sub(r"\s+", " ", name).strip()
name = name.rstrip('.')
name = name.rstrip(".")
if not name:
return 'clip'
return "clip"
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
@@ -140,7 +152,9 @@ def _extract_store_name(item: Any) -> Optional[str]:
return None
def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str) -> None:
def _persist_alt_relationship(
*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str
) -> None:
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
@@ -155,7 +169,11 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
# Folder-backed local DB
try:
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
if (
type(backend).__name__ == "Folder"
and hasattr(backend, "location")
and callable(getattr(backend, "location"))
):
from API.folder import API_folder_store
from pathlib import Path
@@ -174,12 +192,15 @@ def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_ha
except Exception:
return
def _trim_media(input_source: str, output_path: Path, start_seconds: float, duration_seconds: float) -> bool:
def _trim_media(
input_source: str, output_path: Path, start_seconds: float, duration_seconds: float
) -> bool:
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
ffmpeg_path = shutil.which('ffmpeg')
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
@@ -190,38 +211,45 @@ def _trim_media(input_source: str, output_path: Path, start_seconds: float, dura
return False
cmd = [
ffmpeg_path, '-y',
'-ss', str(float(start_seconds)),
'-i', str(input_source),
'-t', str(float(duration_seconds)),
'-c', 'copy',
'-map_metadata', '0',
ffmpeg_path,
"-y",
"-ss",
str(float(start_seconds)),
"-i",
str(input_source),
"-t",
str(float(duration_seconds)),
"-c",
"copy",
"-map_metadata",
"0",
str(output_path),
]
debug(f"Running ffmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
range_arg = parsed.get("range")
if not range_arg or '-' not in range_arg:
if not range_arg or "-" not in range_arg:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = [s.strip() for s in range_arg.split('-', 1)]
start_str, end_str = [s.strip() for s in range_arg.split("-", 1)]
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
@@ -237,25 +265,25 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if duration_seconds <= 0:
log(f"Invalid range: start {start_str} >= end {end_str}", file=sys.stderr)
return 1
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
input_override = parsed.get("input")
outdir_arg = parsed.get("outdir")
# Collect inputs
inputs = normalize_result_input(result)
# If path arg provided, add it to inputs
if path_arg:
inputs.append({"path": path_arg})
if not inputs:
log("No input files provided.", file=sys.stderr)
return 1
success_count = 0
for item in inputs:
store_name = _extract_store_name(item)
@@ -267,7 +295,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
file_path = item.path
elif isinstance(item, str):
file_path = item
if not file_path and not input_override:
continue
@@ -283,18 +311,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not path_obj or not path_obj.exists():
log(f"File not found: {media_source}", file=sys.stderr)
continue
# Determine output directory
output_dir: Path
if outdir_arg:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
try:
@@ -303,7 +333,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
# Determine output filename
output_ext = ''
output_ext = ""
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
@@ -313,21 +343,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if title:
base_name = _sanitize_filename(str(title))
else:
base_name = time.strftime('%Y%m%d-%H%M%S')
base_name = time.strftime("%Y%m%d-%H%M%S")
if base_name.lower().startswith('clip_'):
if base_name.lower().startswith("clip_"):
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
last = (p.path or '').split('/')[-1]
if last and '.' in last:
output_ext = '.' + last.split('.')[-1]
last = (p.path or "").split("/")[-1]
if last and "." in last:
output_ext = "." + last.split(".")[-1]
except Exception:
pass
if not output_ext or len(output_ext) > 8:
output_ext = '.mkv'
output_ext = ".mkv"
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
@@ -341,30 +371,30 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not candidate.exists():
output_path = candidate
break
# Trim
source_label = (path_obj.name if path_obj is not None else str(media_source))
source_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
# Prepare result for pipeline
# 1. Get source hash for relationship
source_hash = None
if isinstance(item, dict):
source_hash = item.get("hash")
elif hasattr(item, "hash"):
source_hash = item.hash
if not source_hash:
if path_obj is not None:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
# 2. Get tag values
# Do not inherit tags from the source (per UX request).
new_tags: list[str] = []
@@ -382,7 +412,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
urls.append(src_u.strip())
except Exception:
pass
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
@@ -390,7 +420,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
range_hms = f"{_format_hms(start_seconds)}-{_format_hms(end_seconds)}"
new_title = f"[{range_hms}] - {title}"
# 4. Calculate clip hash
clip_hash = None
try:
@@ -449,20 +479,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"title": new_title,
"tag": new_tags,
"url": urls,
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"store": stored_store,
"relationships": {
# Clip is an ALT of the source; store semantics are directional alt -> king.
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
}
},
}
# Emit result
ctx.emit(result_dict)
# Delete original if requested
if delete_original:
try:
@@ -473,11 +503,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
failed_label = (path_obj.name if path_obj is not None else str(media_source))
failed_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Failed to trim {failed_label}", file=sys.stderr)
return 0 if success_count > 0 else 1