This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -627,7 +627,14 @@ def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name
return normalize_hash(hash_value)
def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optional[Dict[str, Any]], Optional[int]]:
def fetch_hydrus_metadata(
config: Any,
hash_hex: str,
*,
store_name: Optional[str] = None,
hydrus_client: Any = None,
**kwargs,
) -> tuple[Optional[Dict[str, Any]], Optional[int]]:
"""Fetch metadata from Hydrus for a given hash, consolidating common fetch pattern.
Eliminates repeated boilerplate: client initialization, error handling, metadata extraction.
@@ -635,6 +642,8 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona
Args:
config: Configuration object (passed to hydrus_wrapper.get_client)
hash_hex: File hash to fetch metadata for
store_name: Optional Hydrus store name. When provided, do not fall back to a global/default Hydrus client.
hydrus_client: Optional explicit Hydrus client. When provided, takes precedence.
**kwargs: Additional arguments to pass to client.fetch_file_metadata()
Common: include_service_keys_to_tags, include_notes, include_file_url, include_duration, etc.
@@ -646,15 +655,33 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona
from API import HydrusNetwork
hydrus_wrapper = HydrusNetwork
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return None, 1
client = hydrus_client
if client is None:
log("Hydrus client unavailable")
return None, 1
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
from Store import Store
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "fetch_file_metadata"):
client = candidate
except Exception as exc:
log(f"Hydrus client unavailable for store '{store_name}': {exc}")
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'")
return None, 1
else:
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return None, 1
if client is None:
log("Hydrus client unavailable")
return None, 1
try:
payload = client.fetch_file_metadata(hashes=[hash_hex], **kwargs)

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import sys
import shutil
import tempfile
import re
import models
import pipeline as ctx
@@ -65,6 +66,15 @@ class Add_File(Cmdlet):
stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
# Determine if -store targets a registered backend (vs a filesystem export path).
is_storage_backend_location = False
if location:
try:
store_probe = Store(config)
is_storage_backend_location = location in (store_probe.list_backends() or [])
except Exception:
is_storage_backend_location = False
# Decide which items to process.
# - If user provided -path, treat this invocation as single-item.
# - Otherwise, if piped input is a list, ingest each item.
@@ -81,13 +91,15 @@ class Add_File(Cmdlet):
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
collected_payloads: List[Dict[str, Any]] = []
pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {}
successes = 0
failures = 0
# Only run the search-store refresh when add-file is the last stage.
# In the middle of a pipeline, downstream cmdlets should receive the emitted
# storage payload directly (no need to re-search and risk duplicate emits).
auto_search_store_after_add = bool(is_last_stage) and len(items_to_process) == 1
# When add-file -store is the last stage, always show a final search-store table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location)
auto_search_store_after_add = False
for item in items_to_process:
pipe_obj = coerce_to_pipe_object(item, path_arg)
@@ -217,7 +229,8 @@ class Add_File(Cmdlet):
config,
delete_after_item,
collect_payloads=collected_payloads,
suppress_last_stage_overlay=is_last_stage and len(items_to_process) > 1,
collect_relationship_pairs=pending_relationship_pairs,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
else:
@@ -243,16 +256,38 @@ class Add_File(Cmdlet):
except Exception:
pass
# If we processed multiple storage ingests, present a single consolidated overlay table.
if is_last_stage and len(items_to_process) > 1 and collected_payloads:
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
try:
from result_table import ResultTable
table = ResultTable("Result")
hashes: List[str] = []
for payload in collected_payloads:
table.add_result(payload)
# Make this the active selectable table so @.. returns here (and playlist table is kept in history).
ctx.set_last_result_table(table, collected_payloads, subject=collected_payloads)
h = payload.get("hash") if isinstance(payload, dict) else None
if isinstance(h, str) and len(h) == 64:
hashes.append(h)
# Deduplicate while preserving order
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
)
if not refreshed_items:
# Fallback: at least show the add-file payloads as a display overlay
from result_table import ResultTable
table = ResultTable("Result")
for payload in collected_payloads:
table.add_result(payload)
ctx.set_last_result_table_overlay(table, collected_payloads, subject=collected_payloads)
except Exception:
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs:
try:
Add_File._apply_pending_relationships(pending_relationship_pairs, config)
except Exception:
pass
@@ -260,6 +295,259 @@ class Add_File(Cmdlet):
return 0
return 1
@staticmethod
def _try_emit_search_store_by_hashes(*, store: str, hash_values: List[str], config: Dict[str, Any]) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr)
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hashes})
except Exception:
pass
return emitted_items
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
return None
@staticmethod
def _parse_relationship_tag_king_alts(tag_value: str) -> tuple[Optional[str], List[str]]:
"""Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance(tag_value, str):
return None, []
raw = tag_value.strip()
if not raw:
return None, []
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if ":" in raw:
prefix, rest = raw.split(":", 1)
if prefix.strip().lower() == "relationship":
rhs = rest.strip()
# Old typed format: hash(type)HEX
typed = re.findall(r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?", rhs)
if typed:
king: Optional[str] = None
alts: List[str] = []
for rel_type, h in typed:
h_norm = str(h).strip().lower()
if rel_type.strip().lower() == "king":
king = h_norm
elif rel_type.strip().lower() in {"alt", "related"}:
alts.append(h_norm)
# If the tag omitted king but had hashes, fall back to first hash.
if not king:
all_hashes = [str(h).strip().lower() for _, h in typed]
king = all_hashes[0] if all_hashes else None
alts = [h for h in all_hashes[1:] if h]
# Dedupe alts while preserving order
seen: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h))]
if king and len(king) == 64:
return king, [h for h in alts if h != king]
return None, []
# New format: a simple list of hashes, first is king.
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return None, []
king = hashes[0]
alts = hashes[1:]
seen2: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))]
return king, [h for h in alts if h != king]
@staticmethod
def _parse_relationships_king_alts(relationships: Dict[str, Any]) -> tuple[Optional[str], List[str]]:
"""Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- {"king": [KING], "alt": [ALT1, ALT2]}
- {"king": KING, "alt": ALT} (strings)
- Also treats "related" hashes as alts for persistence purposes.
"""
if not isinstance(relationships, dict) or not relationships:
return None, []
def _first_hash(val: Any) -> Optional[str]:
if isinstance(val, str):
h = val.strip().lower()
return h if len(h) == 64 else None
if isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
return h
return None
def _many_hashes(val: Any) -> List[str]:
out: List[str] = []
if isinstance(val, str):
h = val.strip().lower()
if len(h) == 64:
out.append(h)
elif isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
out.append(h)
return out
king = _first_hash(relationships.get("king"))
if not king:
return None, []
alts = _many_hashes(relationships.get("alt"))
alts.extend(_many_hashes(relationships.get("related")))
seen: set[str] = set()
alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))]
return king, alts
@staticmethod
def _apply_pending_relationships(pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any]) -> None:
"""Persist relationships into the appropriate backend DB/API.
- Folder stores: write to the per-store SQLite DB (directional alt->king).
- Hydrus stores: call Hydrus relationship API.
"""
if not pending:
return
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in pending.items():
if not pairs:
continue
try:
backend = store[str(backend_name)]
except Exception:
continue
backend_type = type(backend).__name__.lower()
# Folder-backed local DB
location_fn = getattr(backend, "location", None)
is_folder = type(backend).__name__ == "Folder" and callable(location_fn)
if is_folder and location_fn is not None:
try:
root = Path(str(location_fn())).expanduser()
with API_folder_store(root) as db:
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
# Hash-first store DB write; skips if either hash isn't in this store DB.
try:
db.set_relationship_by_hash(str(alt_hash), str(king_hash), "alt", bidirectional=False)
except Exception:
continue
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Hydrus
if "hydrus" in backend_type or hasattr(backend, "_client"):
client: Any = getattr(backend, "_client", None)
# Do not fall back to a global/default Hydrus client here; relationships must not be cross-store.
if client is None or not hasattr(client, "set_relationship"):
continue
def _hash_exists(hash_hex: str) -> bool:
try:
if not hasattr(client, "fetch_file_metadata"):
return False
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
include_notes=False,
)
meta = payload.get("metadata") if isinstance(payload, dict) else None
return bool(isinstance(meta, list) and meta)
except Exception:
return False
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
try:
alt_norm = str(alt_hash).strip().lower()
king_norm = str(king_hash).strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64:
continue
if not _hash_exists(alt_norm) or not _hash_exists(king_norm):
continue
client.set_relationship(alt_norm, king_norm, "alt")
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Other backends: no-op
_ = backend_type
@staticmethod
def _resolve_source(
result: Any,
@@ -310,7 +598,10 @@ class Add_File(Cmdlet):
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(file_hash, config, store_name=str(store_name).strip() if store_name else None)
return media_path, file_hash if success else None
if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return pipe_path_str, None
@@ -331,7 +622,10 @@ class Add_File(Cmdlet):
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config)
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config, store_name=str(store_name).strip() if store_name else None)
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
@@ -395,13 +689,32 @@ class Add_File(Cmdlet):
return None, None
@staticmethod
def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]:
def _fetch_hydrus_path(
file_hash: str,
config: Dict[str, Any],
store_name: Optional[str] = None,
) -> Tuple[Optional[Path], bool]:
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
try:
client = hydrus_wrapper.get_client(config)
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_path"):
client = candidate
except Exception:
client = None
if client is None:
log(f"❌ Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return None, False
else:
client = hydrus_wrapper.get_client(config)
if not client:
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
@@ -630,7 +943,7 @@ class Add_File(Cmdlet):
pass
@staticmethod
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> bool:
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> Optional[List[Any]]:
"""Run search-store for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
@@ -638,7 +951,7 @@ class Add_File(Cmdlet):
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
Returns True if search-store ran successfully, else False.
Returns the emitted search-store payload items on success, else None.
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
@@ -653,10 +966,11 @@ class Add_File(Cmdlet):
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return False
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
@@ -671,10 +985,10 @@ class Add_File(Cmdlet):
except Exception:
pass
return True
return emitted_items
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
return False
return None
@staticmethod
def _prepare_metadata(
@@ -735,6 +1049,39 @@ class Add_File(Cmdlet):
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
t for t in merged_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if relationship_tags:
try:
if not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships:
king: Optional[str] = None
alts: List[str] = []
for rel_tag in relationship_tags:
k, a = Add_File._parse_relationship_tag_king_alts(rel_tag)
if k and not king:
king = k
if a:
alts.extend(a)
if king:
seen_alt: set[str] = set()
alts = [h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h))]
payload: Dict[str, Any] = {"king": [king]}
if alts:
payload["alt"] = alts
pipe_obj.relationships = payload
except Exception:
pass
merged_tags = [
t for t in merged_tags
if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
]
# Persist back to PipeObject
pipe_obj.tag = merged_tags
if preferred_title and not pipe_obj.title:
@@ -880,6 +1227,19 @@ class Add_File(Cmdlet):
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
# Read Soulseek login credentials from config (client credentials), separate from peer username.
try:
from config import get_soulseek_username, get_soulseek_password
client_user = get_soulseek_username(config) or ""
client_pass = get_soulseek_password(config) or ""
except Exception:
client_user = ""
client_pass = ""
if not client_user or not client_pass:
debug("[add-file] ERROR: Soulseek credentials missing (set [provider=soulseek] username/password in config.conf)")
return None
# Determine output directory (prefer downloads folder in config)
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
@@ -900,6 +1260,8 @@ class Add_File(Cmdlet):
username=username,
filename=filename,
output_dir=output_dir,
client_username=client_user,
client_password=client_pass,
timeout=1200 # 20 minutes
)
)
@@ -937,9 +1299,15 @@ class Add_File(Cmdlet):
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if f_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(f_hash, hoster_url)
# Only associate when we can target an explicit Hydrus store backend.
# Do not fall back to a global/default Hydrus client.
store_name = getattr(pipe_obj, "store", None)
if store_name:
store = Store(config)
backend = store[str(store_name)]
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "associate_url"):
client.associate_url(str(f_hash), hoster_url)
except Exception:
pass
@@ -984,6 +1352,7 @@ class Add_File(Cmdlet):
delete_after: bool,
*,
collect_payloads: Optional[List[Dict[str, Any]]] = None,
collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
) -> int:
@@ -1018,6 +1387,21 @@ class Add_File(Cmdlet):
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)
if isinstance(rels, dict) and rels:
king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels)
if king_hash and alt_hashes:
bucket = collect_relationship_pairs.setdefault(str(backend_name), set())
for alt_hash in alt_hashes:
if alt_hash and alt_hash != king_hash:
bucket.add((alt_hash, king_hash))
# Relationships must never be stored as tags.
if isinstance(tags, list) and tags:
tags = [t for t in tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))]
# Call backend's add_file with full metadata
# Backend returns hash as identifier
@@ -1030,13 +1414,16 @@ class Add_File(Cmdlet):
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
stored_path: Optional[str] = None
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (often with an access key) and should
# only be invoked by explicit user commands (e.g. get-file).
try:
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
# Some backends may return a browser URL
stored_path = maybe_path
if type(backend).__name__ == "Folder":
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
stored_path = maybe_path
except Exception:
stored_path = None
@@ -1119,12 +1506,16 @@ class Add_File(Cmdlet):
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False)
ok = Add_File._try_emit_search_store_by_hash(
refreshed_items = Add_File._try_emit_search_store_by_hash(
store=backend_name,
hash_value=resolved_hash,
config=config,
)
if not ok:
if refreshed_items:
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items:
ctx.emit(emitted)
else:
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx.emit(payload)
else:

View File

@@ -3,18 +3,17 @@
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence
import json
import re
from pathlib import Path
import sys
from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
from API.folder import read_sidecar, find_sidecar
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
from API.folder import read_sidecar, find_sidecar, API_folder_store
from Store import Store
CMDLET = Cmdlet(
@@ -23,13 +22,19 @@ CMDLET = Cmdlet(
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
],
detail=[
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
"- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4",
"- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...')",
"- Mode 2b: Use -king and -alt to select both sides from the last table: add-relationship -king @1 -alt @3-5",
"- Mode 3: Read relationships from sidecar tags:",
" - New format: 'relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>' (first hash is king)",
" - Legacy: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...'",
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
"- When using -king, all piped items become the specified relationship type to the king",
],
@@ -47,39 +52,236 @@ def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
"""Parse relationship tags.
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<HASH>,hash(alt)<HASH>...
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
"""
result: Dict[str, list[str]] = {}
if not isinstance(tag_value, str):
return result
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
# Match patterns like hash(king)HASH or hash(type)<HASH>
pattern = r'hash\((\w+)\)<?([a-fA-F0-9]{64})>?'
matches = re.findall(pattern, tag_value)
for rel_type, hash_value in matches:
normalized = _normalise_hash_hex(hash_value)
if normalized:
if rel_type not in result:
result[rel_type] = []
result[rel_type].append(normalized)
if matches:
for rel_type, hash_value in matches:
normalized = _normalise_hash_hex(hash_value)
if normalized:
if rel_type not in result:
result[rel_type] = []
result[rel_type].append(normalized)
return result
# New format: extract hashes, first is king
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", tag_value)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return result
king = _normalise_hash_hex(hashes[0])
if not king:
return result
result["king"] = [king]
alts: list[str] = []
for h in hashes[1:]:
normalized = _normalise_hash_hex(h)
if normalized and normalized != king:
alts.append(normalized)
if alts:
result["alt"] = alts
return result
def _apply_relationships_from_tags(
relationship_tags: Sequence[str],
*,
hydrus_client: Any,
use_local_storage: bool,
local_storage_path: Optional[Path],
config: Dict[str, Any],
) -> int:
"""Persist relationship tags into Hydrus or local DB.
Local DB semantics:
- Treat the first hash (king) as the king.
- Store directional alt -> king relationships (no reverse edge).
"""
rel_tags = [t for t in relationship_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:")]
if not rel_tags:
return 0
# Prefer Hydrus if available (hash-based relationships map naturally).
if hydrus_client is not None and hasattr(hydrus_client, "set_relationship"):
processed: set[tuple[str, str, str]] = set()
for tag in rel_tags:
rels = _extract_relationships_from_tag(tag)
king = (rels.get("king") or [None])[0]
if not king:
continue
king_norm = _normalise_hash_hex(king)
if not king_norm:
continue
for rel_type in ("alt", "related"):
for other in rels.get(rel_type, []) or []:
other_norm = _normalise_hash_hex(other)
if not other_norm or other_norm == king_norm:
continue
key = (other_norm, king_norm, rel_type)
if key in processed:
continue
try:
hydrus_client.set_relationship(other_norm, king_norm, rel_type)
processed.add(key)
except Exception:
pass
return 0
# Local DB fallback (store/hash-first)
if use_local_storage and local_storage_path is not None:
try:
with API_folder_store(local_storage_path) as db:
processed_pairs: set[tuple[str, str]] = set()
for tag in rel_tags:
rels = _extract_relationships_from_tag(tag)
king = (rels.get("king") or [None])[0]
if not king:
continue
king_norm = _normalise_hash_hex(king)
if not king_norm:
continue
# For local DB we treat all non-king hashes as alts.
alt_hashes: list[str] = []
for bucket in ("alt", "related"):
alt_hashes.extend([h for h in (rels.get(bucket) or []) if isinstance(h, str)])
for alt in alt_hashes:
alt_norm = _normalise_hash_hex(alt)
if not alt_norm or alt_norm == king_norm:
continue
if (alt_norm, king_norm) in processed_pairs:
continue
db.set_relationship_by_hash(alt_norm, king_norm, "alt", bidirectional=False)
processed_pairs.add((alt_norm, king_norm))
except Exception:
return 1
return 0
return 0
def _parse_at_selection(token: str) -> Optional[list[int]]:
"""Parse standard @ selection syntax into a list of 0-based indices.
Supports: @2, @2-5, @{1,3,5}, @3,5,7, @3-6,8, @*
"""
if not isinstance(token, str):
return None
t = token.strip()
if not t.startswith('@'):
return None
if t == "@*":
return [] # special sentinel: caller interprets as "all"
selector = t[1:].strip()
if not selector:
return None
if selector.startswith("{") and selector.endswith("}"):
selector = selector[1:-1].strip()
parts = [p.strip() for p in selector.split(",") if p.strip()]
if not parts:
return None
indices_1based: set[int] = set()
for part in parts:
try:
if "-" in part:
start_s, end_s = part.split("-", 1)
start = int(start_s.strip())
end = int(end_s.strip())
if start <= 0 or end <= 0 or start > end:
return None
for i in range(start, end + 1):
indices_1based.add(i)
else:
num = int(part)
if num <= 0:
return None
indices_1based.add(num)
except Exception:
return None
return sorted(i - 1 for i in indices_1based)
def _resolve_items_from_at(token: str) -> Optional[list[Any]]:
"""Resolve @ selection token into actual items from the current result context."""
items = ctx.get_last_result_items()
if not items:
return None
parsed = _parse_at_selection(token)
if parsed is None:
return None
if token.strip() == "@*":
return list(items)
selected: list[Any] = []
for idx in parsed:
if 0 <= idx < len(items):
selected.append(items[idx])
return selected
def _extract_hash_and_store(item: Any) -> tuple[Optional[str], Optional[str]]:
"""Extract (hash_hex, store) from a result item (dict/object)."""
try:
h = get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash")
s = get_field(item, "store")
hash_norm = _normalise_hash_hex(str(h) if h is not None else None)
store_norm: Optional[str]
if s is None:
store_norm = None
else:
store_norm = str(s).strip()
if not store_norm:
store_norm = None
return hash_norm, store_norm
except Exception:
return None, None
def _hydrus_hash_exists(hydrus_client: Any, hash_hex: str) -> bool:
"""Best-effort check whether a hash exists in the connected Hydrus backend."""
try:
if hydrus_client is None or not hasattr(hydrus_client, "fetch_file_metadata"):
return False
payload = hydrus_client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
include_notes=False,
)
meta = payload.get("metadata") if isinstance(payload, dict) else None
return bool(isinstance(meta, list) and meta)
except Exception:
return False
def _resolve_king_reference(king_arg: str) -> Optional[str]:
"""Resolve a king reference like '@4' to its actual hash or path.
Supports:
- Direct hash: '0123456789abcdef...' (64 chars)
- Selection reference: '@4' (resolves from pipeline context)
Returns:
- For Hydrus items: normalized hash
- For local storage items: file path
- None if not found
"""Resolve a king reference like '@4' to its actual hash.
Store/hash mode intentionally avoids file-path dependency.
"""
if not king_arg:
return None
@@ -89,53 +291,30 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
if normalized:
return normalized
# Try to resolve as @N selection from pipeline context
# Try to resolve as @ selection from pipeline context
if king_arg.startswith('@'):
try:
# Get the result items from the pipeline context
from pipeline import get_last_result_items
items = get_last_result_items()
if not items:
log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr)
return None
# Parse @N to get the index (1-based)
index_str = king_arg[1:] # Remove '@'
index = int(index_str) - 1 # Convert to 0-based
if 0 <= index < len(items):
item = items[index]
# Try to extract hash from the item (could be dict or object)
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
)
if item_hash:
normalized = _normalise_hash_hex(item_hash)
if normalized:
return normalized
# If no hash, try to get file path (for local storage)
file_path = (
get_field(item, 'file_path')
or get_field(item, 'path')
or get_field(item, 'target')
)
if file_path:
return str(file_path)
log(f"Item {king_arg} has no hash or path information", file=sys.stderr)
return None
else:
log(f"Index {king_arg} out of range", file=sys.stderr)
return None
except (ValueError, IndexError) as e:
log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr)
selected = _resolve_items_from_at(king_arg)
if not selected:
log(f"Cannot resolve {king_arg}: no selection context", file=sys.stderr)
return None
if len(selected) != 1:
log(f"{king_arg} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
return None
item = selected[0]
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
)
if item_hash:
normalized = _normalise_hash_hex(str(item_hash))
if normalized:
return normalized
log(f"Item {king_arg} has no hash information", file=sys.stderr)
return None
return None
@@ -208,7 +387,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
override_store = parsed.get("store")
override_hash = parsed.get("hash")
king_arg = parsed.get("king")
alt_arg = parsed.get("alt")
rel_type = parsed.get("type", "alt")
raw_path = parsed.get("path")
@@ -221,6 +403,45 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Handle @N selection which creates a list
# Use normalize_result_input to handle both single items and lists
items_to_process = normalize_result_input(result)
# Allow selecting alt items directly from the last table via -alt @...
# This enables: add-relationship -king @1 -alt @3-5
if alt_arg:
alt_text = str(alt_arg).strip()
resolved_alt_items: list[Any] = []
if alt_text.startswith('@'):
selected = _resolve_items_from_at(alt_text)
if not selected:
log(f"Failed to resolve -alt {alt_text}: no selection context", file=sys.stderr)
return 1
resolved_alt_items = selected
else:
# Treat as comma/semicolon-separated list of hashes
parts = [p.strip() for p in alt_text.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -alt value (expected @ selection or 64-hex sha256 hash list)", file=sys.stderr)
return 1
if not override_store:
log("-store is required when using -alt with a raw hash list", file=sys.stderr)
return 1
resolved_alt_items = [{"hash": h, "store": str(override_store)} for h in hashes]
items_to_process = normalize_result_input(resolved_alt_items)
# Allow explicit -hash operation (store/hash-first)
if (not items_to_process) and override_hash:
# Support comma-separated hashes
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",")]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
# Use the selected/override store; required in this mode
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
return 1
items_to_process = [{"hash": h, "store": str(override_store)} for h in hashes]
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
@@ -230,42 +451,242 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not items_to_process and arg_path:
items_to_process = [{"file_path": arg_path}]
# Import local storage utilities
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
# Check if any items have Hydrus hashes (file_hash or hash_hex fields)
has_hydrus_hashes = any(
(isinstance(item, dict) and (item.get('hash_hex') or item.get('hash')))
or (hasattr(item, 'hash_hex') or hasattr(item, 'hash'))
for item in items_to_process
)
# Only try to initialize Hydrus if we actually have Hydrus hashes to work with
# Resolve the king reference once (if provided)
king_hash: Optional[str] = None
king_store: Optional[str] = None
if king_arg:
king_text = str(king_arg).strip()
if king_text.startswith('@'):
selected = _resolve_items_from_at(king_text)
if not selected:
log(f"Cannot resolve {king_text}: no selection context", file=sys.stderr)
return 1
if len(selected) != 1:
log(f"{king_text} selects {len(selected)} items; -king requires exactly 1", file=sys.stderr)
return 1
king_hash, king_store = _extract_hash_and_store(selected[0])
if not king_hash:
log(f"Item {king_text} has no hash information", file=sys.stderr)
return 1
else:
king_hash = _resolve_king_reference(king_text)
if not king_hash:
log(f"Failed to resolve king argument: {king_text}", file=sys.stderr)
return 1
# Decide target store: override_store > (king store + piped item stores) (must be consistent)
store_name: Optional[str] = str(override_store).strip() if override_store else None
if not store_name:
stores = set()
if king_store:
stores.add(str(king_store))
for item in items_to_process:
s = get_field(item, "store")
if s:
stores.add(str(s))
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected (king/alt across stores); use -store and ensure all selections are from the same store", file=sys.stderr)
return 1
# Enforce same-store relationships when store context is available.
if king_store and store_name and str(king_store) != str(store_name):
log(f"Cross-store relationship blocked: king is in store '{king_store}' but -store is '{store_name}'", file=sys.stderr)
return 1
if store_name:
for item in items_to_process:
s = get_field(item, "store")
if s and str(s) != str(store_name):
log(f"Cross-store relationship blocked: alt item store '{s}' != '{store_name}'", file=sys.stderr)
return 1
# Resolve backend for store/hash operations
backend = None
is_folder_store = False
store_root: Optional[Path] = None
if store_name:
try:
store = Store(config)
backend = store[str(store_name)]
loc = getattr(backend, "location", None)
if callable(loc):
is_folder_store = True
store_root = Path(str(loc()))
except Exception:
backend = None
is_folder_store = False
store_root = None
# Select Hydrus client:
# - If a store is specified and maps to a HydrusNetwork backend, use that backend's client.
# - If no store is specified, use the default Hydrus client.
# NOTE: When a store is specified, we do not fall back to a global/default Hydrus client.
hydrus_client = None
if has_hydrus_hashes:
if store_name and (not is_folder_store) and backend is not None:
try:
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "set_relationship"):
hydrus_client = candidate
except Exception:
hydrus_client = None
elif not store_name:
try:
hydrus_client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr)
# Use local storage if it's available and either Hydrus is not available or items are local files
use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists()))
except Exception:
hydrus_client = None
# Resolve the king reference once (if provided)
king_hash = None
if king_arg:
# Resolve the king reference (could be @4 or a direct hash)
king_hash = _resolve_king_reference(king_arg)
if not king_hash:
log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr)
return 1
# Sidecar/tag import fallback DB root (legacy): if a folder store is selected, use it;
# otherwise fall back to configured local storage path.
from config import get_local_storage_path
local_storage_root: Optional[Path] = None
if store_root is not None:
local_storage_root = store_root
else:
try:
p = get_local_storage_path(config) if config else None
local_storage_root = Path(p) if p else None
except Exception:
local_storage_root = None
use_local_storage = local_storage_root is not None
if king_hash:
log(f"Using king hash: {king_hash}", file=sys.stderr)
# Process each item in the list
for item_idx, item in enumerate(items_to_process):
# If -path is provided, try reading relationship tags from its sidecar and persisting them.
if arg_path is not None and arg_path.exists() and arg_path.is_file():
try:
sidecar_path = find_sidecar(arg_path)
if sidecar_path is not None and sidecar_path.exists():
_, tags, _ = read_sidecar(sidecar_path)
relationship_tags = [t for t in (tags or []) if isinstance(t, str) and t.lower().startswith("relationship:")]
if relationship_tags:
code = _apply_relationships_from_tags(
relationship_tags,
hydrus_client=hydrus_client,
use_local_storage=use_local_storage,
local_storage_path=local_storage_root,
config=config,
)
return 0 if code == 0 else 1
except Exception:
pass
# If piped items include relationship tags, persist them (one pass) then exit.
try:
rel_tags_from_pipe: list[str] = []
for item in items_to_process:
tags_val = None
if isinstance(item, dict):
tags_val = item.get("tag") or item.get("tags")
else:
tags_val = getattr(item, "tag", None)
if isinstance(tags_val, list):
rel_tags_from_pipe.extend([t for t in tags_val if isinstance(t, str) and t.lower().startswith("relationship:")])
elif isinstance(tags_val, str) and tags_val.lower().startswith("relationship:"):
rel_tags_from_pipe.append(tags_val)
if rel_tags_from_pipe:
code = _apply_relationships_from_tags(
rel_tags_from_pipe,
hydrus_client=hydrus_client,
use_local_storage=use_local_storage,
local_storage_path=local_storage_root,
config=config,
)
return 0 if code == 0 else 1
except Exception:
pass
# STORE/HASH MODE (preferred): use -store and hashes; do not require file paths.
if store_name and is_folder_store and store_root is not None:
try:
with API_folder_store(store_root) as db:
# Mode 1: no explicit king -> first is king, rest are alts
if not king_hash:
first_hash = None
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
return 1
if not h:
continue
if not first_hash:
first_hash = h
continue
# directional alt -> king by default for local DB
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, first_hash, str(rel_type), bidirectional=bidirectional)
return 0
# Mode 2: explicit king
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
return 1
if not h or h == king_hash:
continue
bidirectional = str(rel_type).lower() != "alt"
db.set_relationship_by_hash(h, king_hash, str(rel_type), bidirectional=bidirectional)
return 0
except Exception as exc:
log(f"Failed to set store relationships: {exc}", file=sys.stderr)
return 1
if store_name and (not is_folder_store):
# Hydrus store/hash mode
if hydrus_client is None:
log("Hydrus client unavailable for this store", file=sys.stderr)
return 1
# Verify hashes exist in this Hydrus backend to prevent cross-store edges.
if king_hash and (not _hydrus_hash_exists(hydrus_client, king_hash)):
log(f"Cross-store relationship blocked: king hash not found in store '{store_name}'", file=sys.stderr)
return 1
# Mode 1: first is king
if not king_hash:
first_hash = None
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
return 1
if not h:
continue
if not first_hash:
first_hash = h
if not _hydrus_hash_exists(hydrus_client, first_hash):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
return 1
continue
if h != first_hash:
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
return 1
hydrus_client.set_relationship(h, first_hash, str(rel_type))
return 0
# Mode 2: explicit king
for item in items_to_process:
h, item_store = _extract_hash_and_store(item)
if item_store and store_name and str(item_store) != str(store_name):
log(f"Cross-store relationship blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
return 1
if not h or h == king_hash:
continue
if not _hydrus_hash_exists(hydrus_client, h):
log(f"Cross-store relationship blocked: hash not found in store '{store_name}'", file=sys.stderr)
return 1
hydrus_client.set_relationship(h, king_hash, str(rel_type))
return 0
# Process each item in the list (legacy path-based mode)
for item in items_to_process:
# Extract hash and path from current item
file_hash = None
file_path_from_result = None
@@ -277,9 +698,83 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
# Legacy LOCAL STORAGE MODE: Handle relationships for local files
# (kept for -path sidecar workflows; store/hash mode above is preferred)
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
use_local_storage = bool(local_storage_path)
local_storage_root: Optional[Path] = None
if local_storage_path:
try:
local_storage_root = Path(local_storage_path)
except Exception:
local_storage_root = None
if use_local_storage and file_path_from_result:
try:
file_path_obj = Path(str(file_path_from_result))
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
if not file_path_obj.exists():
# Not a local file; fall through to Hydrus if possible.
file_path_obj = None
if file_path_obj is not None:
try:
if local_storage_root is None:
log("Local storage path unavailable", file=sys.stderr)
return 1
with LocalLibrarySearchOptimizer(local_storage_root) as opt:
if opt.db is None:
log("Local storage DB unavailable", file=sys.stderr)
return 1
if king_hash:
normalized_king = _normalise_hash_hex(str(king_hash))
if not normalized_king:
log(f"King hash invalid: {king_hash}", file=sys.stderr)
return 1
king_file_path = opt.db.search_hash(normalized_king)
if not king_file_path:
log(f"King hash not found in local DB: {king_hash}", file=sys.stderr)
return 1
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, king_file_path, rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
else:
# Original behavior: first becomes king, rest become alts
try:
king_path = ctx.load_value("relationship_king_path")
except Exception:
king_path = None
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
continue
except Exception:
pass
if king_path and king_path != str(file_path_obj):
bidirectional = str(rel_type).lower() != "alt"
opt.db.set_relationship(file_path_obj, Path(king_path), rel_type, bidirectional=bidirectional)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
continue
# PIPELINE MODE with Hydrus: Track relationships using hash
if file_hash and hydrus_client:
file_hash = _normalise_hash_hex(file_hash)
file_hash = _normalise_hash_hex(str(file_hash) if file_hash is not None else None)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
@@ -292,7 +787,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
)
_refresh_relationship_view_if_current(file_hash, file_path_from_result, king_hash, config)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, king_hash, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
@@ -320,58 +815,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
)
_refresh_relationship_view_if_current(file_hash, file_path_from_result, existing_king, config)
_refresh_relationship_view_if_current(file_hash, str(file_path_from_result) if file_path_from_result is not None else None, existing_king, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
# LOCAL STORAGE MODE: Handle relationships for local files
elif use_local_storage and file_path_from_result:
try:
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
if king_hash:
# king_hash is a file path from _resolve_king_reference (or a Hydrus hash)
king_file_path = Path(str(king_hash)) if king_hash else None
if king_file_path and king_file_path.exists():
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, king_file_path, rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
else:
log(f"King file not found or invalid: {king_hash}", file=sys.stderr)
return 1
else:
# Original behavior: first becomes king, rest become alts
try:
king_path = ctx.load_value("relationship_king_path")
except Exception:
king_path = None
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
continue # Move to next item
except Exception:
pass
if king_path and king_path != str(file_path_obj):
try:
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, Path(king_path), rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
# If we get here, we didn't have a usable local path and Hydrus isn't available/usable.
return 0
@@ -395,12 +844,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Build Hydrus client
try:
client = hydrus_wrapper.get_client(config)
hydrus_client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
if hydrus_client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1

View File

@@ -272,8 +272,16 @@ class Add_Tag(Cmdlet):
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Check for --all flag
include_temp = parsed.get("all", False)
# If add-tag is in the middle of a pipeline (has downstream stages), default to
# including temp files. This enables common flows like:
# @N | download-media | add-tag ... | add-file ...
store_override = parsed.get("store")
stage_ctx = ctx.get_stage_context()
has_downstream = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
include_temp = bool(parsed.get("all", False))
if has_downstream and not include_temp and not store_override:
include_temp = True
# Normalize input to list
results = normalize_result_input(result)
@@ -352,8 +360,6 @@ class Add_Tag(Cmdlet):
total_added = 0
total_modified = 0
store_override = parsed.get("store")
store_registry = Store(config)
for res in results:

View File

@@ -8,14 +8,16 @@ from SYS.logger import log
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
from Store import Store
CMDLET = Cmdlet(
name="check-file-status",
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
usage="check-file-status [-hash <sha256>]",
usage="check-file-status [-hash <sha256>] [-store <name>]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Shows whether file is active in Hydrus or marked as deleted",
@@ -34,6 +36,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Parse arguments
override_hash: str | None = None
override_store: str | None = None
i = 0
while i < len(args):
token = args[i]
@@ -42,24 +45,57 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
override_hash = str(args[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args):
override_store = str(args[i + 1]).strip()
i += 2
continue
i += 1
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
store_name: str | None = override_store
if not store_name:
if isinstance(result, dict):
store_name = str(result.get("store") or "").strip() or None
else:
store_name = str(getattr(result, "store", "") or "").strip() or None
if override_hash:
hash_hex = normalize_hash(override_hash)
else:
if isinstance(result, dict):
hash_hex = normalize_hash(result.get("hash") or result.get("hash_hex"))
else:
hash_hex = normalize_hash(getattr(result, "hash", None) or getattr(result, "hash_hex", None))
if not hash_hex:
log("No hash provided and no result selected", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "fetch_file_metadata"):
client = candidate
except Exception:
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return 1
else:
client = hydrus_wrapper.get_client(config)
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
result_data = client.fetch_file_metadata(hashes=[hash_hex])
if not result_data.get("metadata"):

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from SYS.logger import debug, log
from Store.Folder import Folder
from Store import Store
from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help
from API import HydrusNetwork as hydrus_wrapper
import pipeline as ctx
@@ -119,30 +120,48 @@ class Delete_File(Cmdlet):
should_try_hydrus = False
if should_try_hydrus and hash_hex:
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return False
client = None
if store:
# Store specified: do not fall back to a global/default Hydrus client.
try:
registry = Store(config)
backend = registry[str(store)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "_post"):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
return False
else:
# No store context; use default Hydrus client.
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log("Hydrus client unavailable", file=sys.stderr)
return False
else:
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
hydrus_deleted = True
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
debug(f"Deleted from Hydrus: {preview}", file=sys.stderr)
except Exception as exc:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return False
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
hydrus_deleted = True
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
debug(f"Deleted from Hydrus: {preview}", file=sys.stderr)
except Exception:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return False
if hydrus_deleted and hash_hex:
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')

View File

@@ -10,11 +10,65 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help
from API.folder import LocalLibrarySearchOptimizer
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, normalize_hash, normalize_result_input, get_field, should_show_help
from API.folder import API_folder_store
from Store import Store
from config import get_local_storage_path
def _extract_hash(item: Any) -> Optional[str]:
h = get_field(item, "hash_hex") or get_field(item, "hash") or get_field(item, "file_hash")
return normalize_hash(str(h)) if h else None
def _upsert_relationships(db: API_folder_store, file_hash: str, relationships: Dict[str, Any]) -> None:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
cursor = conn.cursor()
cursor.execute(
"""
INSERT INTO metadata (hash, relationships)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
""",
(file_hash, json.dumps(relationships) if relationships else "{}"),
)
def _remove_reverse_link(db: API_folder_store, *, src_hash: str, dst_hash: str, rel_type: str) -> None:
meta = db.get_metadata(dst_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
return
key_to_edit: Optional[str] = None
for k in list(rels.keys()):
if str(k).lower() == str(rel_type).lower():
key_to_edit = str(k)
break
if not key_to_edit:
return
bucket = rels.get(key_to_edit)
if not isinstance(bucket, list) or not bucket:
return
new_bucket = [h for h in bucket if str(h).lower() != str(src_hash).lower()]
if new_bucket:
rels[key_to_edit] = new_bucket
else:
try:
del rels[key_to_edit]
except Exception:
rels[key_to_edit] = []
_upsert_relationships(db, dst_hash, rels)
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
@@ -84,109 +138,223 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed_args = parse_cmdlet_args(args, CMDLET)
delete_all_flag = parsed_args.get("all", False)
rel_type_filter = parsed_args.get("type")
override_store = parsed_args.get("store")
override_hash = parsed_args.get("hash")
raw_path = parsed_args.get("path")
# Normalize input
results = normalize_result_input(result)
# Allow store/hash-first usage when no pipeline items were provided
if (not results) and override_hash:
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (normalize_hash(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
return 1
results = [{"hash": h, "store": str(override_store)} for h in hashes]
if not results:
# Legacy -path mode below may still apply
if raw_path:
results = [{"file_path": raw_path}]
else:
log("No results to process", file=sys.stderr)
return 1
# Decide store (for same-store enforcement + folder-store DB routing)
store_name: Optional[str] = str(override_store).strip() if override_store else None
if not store_name:
stores = {str(get_field(r, "store")) for r in results if get_field(r, "store")}
if len(stores) == 1:
store_name = next(iter(stores))
elif len(stores) > 1:
log("Multiple stores detected in pipeline; use -store to choose one", file=sys.stderr)
return 1
deleted_count = 0
# STORE/HASH FIRST: folder-store DB deletion (preferred)
if store_name:
backend = None
store_root: Optional[Path] = None
try:
store = Store(config)
backend = store[str(store_name)]
loc = getattr(backend, "location", None)
if callable(loc):
store_root = Path(str(loc()))
except Exception:
backend = None
store_root = None
if store_root is not None:
try:
with API_folder_store(store_root) as db:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
for single_result in results:
# Enforce same-store when items carry store info
item_store = get_field(single_result, "store")
if item_store and str(item_store) != str(store_name):
log(f"Cross-store delete blocked: item store '{item_store}' != '{store_name}'", file=sys.stderr)
return 1
file_hash = _extract_hash(single_result)
if not file_hash:
# Try path -> hash lookup within this store
fp = (
get_field(single_result, "file_path")
or get_field(single_result, "path")
or get_field(single_result, "target")
)
if fp:
try:
file_hash = db.get_file_hash(Path(str(fp)))
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -hash or ensure pipeline includes hash)", file=sys.stderr)
return 1
meta = db.get_metadata(file_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
continue
if delete_all_flag:
# remove reverse edges for all types
for rt, hashes in list(rels.items()):
if not isinstance(hashes, list):
continue
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
rels = {}
elif rel_type_filter:
# delete one type (case-insensitive key match)
key_to_delete: Optional[str] = None
for k in list(rels.keys()):
if str(k).lower() == str(rel_type_filter).lower():
key_to_delete = str(k)
break
if not key_to_delete:
continue
hashes = rels.get(key_to_delete)
if isinstance(hashes, list):
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
return 1
_upsert_relationships(db, file_hash, rels)
conn.commit()
_refresh_relationship_view_if_current(file_hash, None, None, config)
deleted_count += 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error deleting store relationships: {exc}", file=sys.stderr)
return 1
# LEGACY PATH MODE (single local DB)
# Get storage path
local_storage_path = get_local_storage_path(config)
if not local_storage_path:
log("Local storage path not configured", file=sys.stderr)
return 1
# Normalize input
results = normalize_result_input(result)
if not results:
log("No results to process", file=sys.stderr)
return 1
deleted_count = 0
for single_result in results:
try:
# Get file path from result
file_path_from_result = (
get_field(single_result, "file_path")
or get_field(single_result, "path")
or get_field(single_result, "target")
or (str(single_result) if not isinstance(single_result, dict) else None)
)
if not file_path_from_result:
log("Could not extract file path from result", file=sys.stderr)
return 1
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
with LocalLibrarySearchOptimizer(local_storage_path) as db:
file_id = db.db.get_file_id(file_path_obj)
if not file_id:
try:
with API_folder_store(Path(local_storage_path)) as db:
conn = db.connection
if conn is None:
raise RuntimeError("Store DB connection is not initialized")
for single_result in results:
# Get file path from result
file_path_from_result = (
get_field(single_result, "file_path")
or get_field(single_result, "path")
or get_field(single_result, "target")
or (str(single_result) if not isinstance(single_result, dict) else None)
)
if not file_path_from_result:
log("Could not extract file path from result", file=sys.stderr)
return 1
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
try:
file_hash = db.get_file_hash(file_path_obj)
except Exception:
file_hash = None
file_hash = normalize_hash(str(file_hash)) if file_hash else None
if not file_hash:
log(f"File not in database: {file_path_obj.name}", file=sys.stderr)
continue
# Get current relationships
cursor = db.db.connection.cursor()
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
if not row:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
meta = db.get_metadata(file_hash) or {}
rels = meta.get("relationships") if isinstance(meta, dict) else None
if not isinstance(rels, dict) or not rels:
continue
relationships_str = row[0]
if not relationships_str:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
try:
relationships = json.loads(relationships_str)
except json.JSONDecodeError:
log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr)
continue
if not isinstance(relationships, dict):
relationships = {}
# Determine what to delete
if delete_all_flag:
# Delete all relationships
deleted_types = list(relationships.keys())
relationships = {}
log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr)
for rt, hashes in list(rels.items()):
if not isinstance(hashes, list):
continue
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(rt))
rels = {}
elif rel_type_filter:
# Delete specific type
if rel_type_filter in relationships:
deleted_count_for_type = len(relationships[rel_type_filter])
del relationships[rel_type_filter]
log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr)
else:
log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr)
key_to_delete: Optional[str] = None
for k in list(rels.keys()):
if str(k).lower() == str(rel_type_filter).lower():
key_to_delete = str(k)
break
if not key_to_delete:
continue
hashes = rels.get(key_to_delete)
if isinstance(hashes, list):
for other_hash in hashes:
other_norm = normalize_hash(str(other_hash))
if other_norm:
_remove_reverse_link(db, src_hash=file_hash, dst_hash=other_norm, rel_type=str(key_to_delete))
try:
del rels[key_to_delete]
except Exception:
rels[key_to_delete] = []
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
return 1
# Save updated relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships) if relationships else None))
db.db.connection.commit()
_refresh_relationship_view_if_current(None, str(file_path_obj), None, config)
_upsert_relationships(db, file_hash, rels)
conn.commit()
_refresh_relationship_view_if_current(file_hash, str(file_path_obj), None, config)
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
@@ -201,7 +369,9 @@ CMDLET = Cmdlet(
summary="Remove relationships from files.",
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
],

View File

@@ -215,6 +215,38 @@ class Download_File(Cmdlet):
if availability or reason:
msg += f" (availability={availability or ''} reason={reason or ''})"
log(msg, file=sys.stderr)
# Fallback: run a LibGen title search so the user can pick an alternative source.
try:
title_text = str(title or "").strip()
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
# Use plain title text (LibGen mirrors can be finicky with fielded query prefixes).
fallback_query = title_text
ret = _SEARCH_PROVIDER_CMDLET.exec(
None,
["-provider", "libgen", "-query", fallback_query],
config,
)
# download-file is treated as an action command by the pipeline printer.
# Promote the search-provider table to a display overlay so it renders.
try:
table = pipeline_context.get_last_result_table()
items = pipeline_context.get_last_result_items()
if table is not None:
pipeline_context.set_last_result_table_overlay(table, items)
except Exception:
pass
return int(ret)
except Exception:
pass
continue
# Fallback: if we have a direct HTTP URL, download it directly

View File

@@ -23,6 +23,7 @@ import re
import string
import subprocess
import sys
import tempfile
import time
import traceback
from typing import Any, Dict, Iterator, List, Optional
@@ -274,17 +275,28 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]
if opts.clip_sections:
sections = []
for section_range in opts.clip_sections.split(','):
sections: List[str] = []
def _secs_to_hms(seconds: float) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
for section_range in str(opts.clip_sections).split(","):
section_range = section_range.strip()
if not section_range:
continue
try:
start_s, end_s = [int(x) for x in section_range.split('-')]
def _secs_to_hms(s: int) -> str:
minutes, seconds = divmod(s, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
start_s_raw, end_s_raw = section_range.split("-", 1)
start_s = float(start_s_raw.strip())
end_s = float(end_s_raw.strip())
if start_s >= end_s:
continue
sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
except (ValueError, AttributeError):
pass
continue
if sections:
base_options["download_sections"] = sections
debug(f"Download sections configured: {', '.join(sections)}")
@@ -1150,6 +1162,7 @@ class Download_Media(Cmdlet):
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
SharedArgs.PATH
],
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
exec=self.run,
@@ -1215,11 +1228,11 @@ class Download_Media(Cmdlet):
mode = "audio" if parsed.get("audio") else "video"
# Parse clip range if specified
clip_range = None
# Parse clip range(s) if specified
clip_ranges: Optional[List[tuple[int, int]]] = None
if clip_spec:
clip_range = self._parse_time_range(clip_spec)
if not clip_range:
clip_ranges = self._parse_time_ranges(str(clip_spec))
if not clip_ranges:
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
return 1
@@ -1738,7 +1751,7 @@ class Download_Media(Cmdlet):
# Download each URL
downloaded_count = 0
clip_sections_spec = self._build_clip_sections_spec(clip_range)
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
for url in supported_url:
try:
@@ -1789,15 +1802,58 @@ class Download_Media(Cmdlet):
result_obj = _download_with_timeout(opts, timeout_seconds=300)
debug(f"Download completed, building pipe object...")
# Emit one PipeObject per downloaded file (playlists/albums return a list)
results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj]
debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...")
# Expand result set:
# - playlists return a list
# - section clips return a single DownloadMediaResult with `paths` populated
results_to_emit: List[Any] = []
if isinstance(result_obj, list):
results_to_emit = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
# Create one DownloadMediaResult per section file
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
results_to_emit.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or opts.url,
hash_value=hv,
)
)
else:
results_to_emit = [result_obj]
# Build PipeObjects first so we can attach cross-clip relationships.
pipe_objects: List[Dict[str, Any]] = []
for downloaded in results_to_emit:
pipe_objects.append(self._build_pipe_object(downloaded, url, opts))
# If this is a clip download, decorate titles/tags so the title: tag is clip-based.
# Relationship tags are only added when multiple clips exist.
try:
if clip_ranges and len(pipe_objects) == len(clip_ranges):
source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
except Exception:
pass
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
stage_ctx = pipeline_context.get_stage_context()
emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
for downloaded in results_to_emit:
pipe_obj_dict = self._build_pipe_object(downloaded, url, opts)
for pipe_obj_dict in pipe_objects:
# Only emit when there is a downstream stage.
# This keeps `download-media` from producing a result table when run standalone.
if emit_enabled:
@@ -1808,7 +1864,7 @@ class Download_Media(Cmdlet):
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
register_url_with_local_library(pipe_obj, config)
downloaded_count += len(results_to_emit)
downloaded_count += len(pipe_objects)
debug("✓ Downloaded and emitted")
except DownloadError as e:
@@ -1828,62 +1884,120 @@ class Download_Media(Cmdlet):
return 1
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
"""Resolve the output directory from storage location or config."""
storage_location = parsed.get("storage")
"""Resolve the output directory.
# Priority 1: --storage flag
if storage_location:
Rules:
- If user passes `-path`, use that directory (override).
- Otherwise default to a temp directory (config["temp"] if present, else OS temp).
"""
# Priority 1: explicit output directory override
path_override = parsed.get("path")
if path_override:
try:
return SharedArgs.resolve_storage(storage_location)
candidate = Path(str(path_override)).expanduser()
# If user passed a file path, treat its parent as output dir.
if candidate.suffix:
candidate = candidate.parent
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using output directory override: {candidate}")
return candidate
except Exception as e:
log(f"Invalid storage location: {e}", file=sys.stderr)
log(f"Invalid -path output directory: {e}", file=sys.stderr)
return None
# Priority 2: Config default output/temp directory
# Priority 2: config-provided temp/output directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
except Exception:
final_output_dir = Path.home() / "Videos"
debug(f"Using default directory: {final_output_dir}")
temp_value = None
if temp_value:
try:
candidate = Path(str(temp_value)).expanduser()
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using config temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
return None
# Priority 3: OS temp fallback
try:
final_output_dir.mkdir(parents=True, exist_ok=True)
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using OS temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
return None
return final_output_dir
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
"""Parse clip specs into a list of (start_seconds, end_seconds).
def _parse_time_range(self, spec: str) -> Optional[tuple]:
"""Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds)."""
try:
parts = spec.split("-")
if len(parts) != 2:
Supported inputs:
- "MM:SS-MM:SS"
- "HH:MM:SS-HH:MM:SS"
- seconds: "280-300"
- multiple ranges separated by commas: "4:40-5:00,5:15-5:25"
"""
def _to_seconds(ts: str) -> Optional[int]:
ts = str(ts).strip()
if not ts:
return None
def to_seconds(ts: str) -> int:
ts = ts.strip()
if ":" in ts:
mm, ss = ts.split(":")
return int(mm) * 60 + int(ss)
return int(ts)
start = to_seconds(parts[0])
end = to_seconds(parts[1])
return (start, end) if start < end else None
except Exception:
return None
if ":" in ts:
parts = [p.strip() for p in ts.split(":")]
if len(parts) == 2:
hh_s = "0"
mm_s, ss_s = parts
elif len(parts) == 3:
hh_s, mm_s, ss_s = parts
else:
return None
try:
hours = int(hh_s)
minutes = int(mm_s)
seconds = float(ss_s)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
try:
return int(float(ts))
except Exception:
return None
ranges: List[tuple[int, int]] = []
if not spec:
return ranges
for piece in str(spec).split(","):
piece = piece.strip()
if not piece:
continue
if "-" not in piece:
return []
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
start = _to_seconds(start_s)
end = _to_seconds(end_s)
if start is None or end is None or start >= end:
return []
ranges.append((start, end))
return ranges
def _build_clip_sections_spec(
self,
clip_range: Optional[tuple],
clip_ranges: Optional[List[tuple[int, int]]],
) -> Optional[str]:
"""Convert parsed clip range into downloader spec (seconds)."""
ranges: List[str] = []
if clip_range:
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
if clip_ranges:
for start_s, end_s in clip_ranges:
ranges.append(f"{start_s}-{end_s}")
return ",".join(ranges) if ranges else None
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
@@ -1926,6 +2040,159 @@ class Download_Media(Cmdlet):
"media_kind": "video" if opts.mode == "video" else "audio",
}
@staticmethod
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
candidate = value.strip().lower()
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
return candidate
return None
@classmethod
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
if not isinstance(hit, dict):
return None
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
v = hit.get(key)
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
if normalized:
return normalized
return None
@classmethod
def _find_existing_hash_for_url(
cls,
storage: Any,
canonical_url: str,
*,
hydrus_available: bool,
) -> Optional[str]:
"""Best-effort lookup of an existing stored item hash by url:<canonical_url>.
Used to make the stored source video the king for multi-clip relationships.
"""
if storage is None or not canonical_url:
return None
try:
from Store.HydrusNetwork import HydrusNetwork
except Exception:
HydrusNetwork = None # type: ignore
try:
backend_names = list(storage.list_searchable_backends() or [])
except Exception:
backend_names = []
for backend_name in backend_names:
try:
backend = storage[backend_name]
except Exception:
continue
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
except Exception:
pass
try:
hits = backend.search(f"url:{canonical_url}", limit=5) or []
except Exception:
hits = []
for hit in hits:
extracted = cls._extract_hash_from_search_hit(hit)
if extracted:
return extracted
return None
@staticmethod
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
if force_hours:
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
return f"{minutes:02d}:{secs:02d}"
@classmethod
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
force_hours = bool(start_s >= 3600 or end_s >= 3600)
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
@classmethod
def _apply_clip_decorations(
cls,
pipe_objects: List[Dict[str, Any]],
clip_ranges: List[tuple[int, int]],
*,
source_king_hash: Optional[str],
) -> None:
"""Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads.
- Sets the clip title (and title: tag) to exactly `clip:{range}`.
- Adds `clip:{range}` tag.
- Sets `relationships` on each emitted item (king hash first, then alt hashes)
so downstream can persist relationships into a DB/API without storing relationship tags.
"""
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
return
# Always apply clip titles/tags (even for a single clip).
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
clip_range = cls._format_clip_range(start_s, end_s)
clip_tag = f"clip:{clip_range}"
# Title: make it generic/consistent for clips.
po["title"] = clip_tag
tags = po.get("tag")
if not isinstance(tags, list):
tags = []
# Replace any existing title: tags with title:<clip_tag>
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
# Relationships must not be stored as tags.
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
tags.insert(0, f"title:{clip_tag}")
# Ensure clip tag exists
if clip_tag not in tags:
tags.append(clip_tag)
po["tag"] = tags
# Relationship tagging only makes sense when multiple clips exist.
if len(pipe_objects) < 2:
return
hashes: List[str] = []
for po in pipe_objects:
h = cls._normalise_hash_hex(str(po.get("hash") or ""))
hashes.append(h or "")
# Determine king: prefer an existing source video hash if present; else first clip becomes king.
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
if not king_hash:
king_hash = hashes[0] if hashes and hashes[0] else None
if not king_hash:
return
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
if not alt_hashes:
return
# Carry relationship metadata through the pipeline without using tags.
rel_payload = {"king": [king_hash], "alt": list(alt_hashes)}
for po in pipe_objects:
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib

View File

@@ -43,18 +43,105 @@ class Download_Torrent(Cmdlet):
output_dir = Path(parsed.get("output") or Path.home() / "Downloads")
wait_timeout = int(float(parsed.get("wait", 600)))
background_mode = parsed.get("background", False)
api_key = config.get("alldebrid_api_key")
api_key = None
try:
from Provider.alldebrid import _get_debrid_api_key # type: ignore
api_key = _get_debrid_api_key(config)
except Exception:
api_key = None
if not api_key:
log("AllDebrid API key not configured", file=sys.stderr)
log("AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", file=sys.stderr)
return 1
for magnet_url in magnet_args:
if background_mode:
self._start_background_worker(magnet_url, output_dir, config, api_key, wait_timeout)
log(f"⧗ Torrent download queued in background: {magnet_url}")
else:
self._download_torrent_worker(str(uuid.uuid4()), magnet_url, output_dir, config, api_key, wait_timeout)
# Foreground mode: submit quickly, then continue processing in background
# so we return control to the REPL immediately.
worker_id = str(uuid.uuid4())
magnet_id = self._submit_magnet(worker_id, magnet_url, api_key)
if magnet_id <= 0:
continue
self._start_background_magnet_worker(worker_id, magnet_id, output_dir, api_key, wait_timeout)
log(f"⧗ Torrent processing started (ID: {magnet_id})")
return 0
@staticmethod
def _submit_magnet(worker_id: str, magnet_url: str, api_key: str) -> int:
"""Submit a magnet and return its AllDebrid magnet ID.
This is intentionally fast so the caller can return to the REPL.
"""
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return 0
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
return magnet_id
except Exception as e:
log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr)
return 0
def _start_background_magnet_worker(self, worker_id: str, magnet_id: int, output_dir: Path, api_key: str, wait_timeout: int) -> None:
thread = threading.Thread(
target=self._download_magnet_worker,
args=(worker_id, magnet_id, output_dir, api_key, wait_timeout),
daemon=True,
name=f"TorrentWorker_{worker_id}",
)
thread.start()
@staticmethod
def _download_magnet_worker(
worker_id: str,
magnet_id: int,
output_dir: Path,
api_key: str,
wait_timeout: int = 600,
) -> None:
"""Poll AllDebrid magnet status until ready, then download the files."""
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
# Poll for ready status (simplified)
import time
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
break
time.sleep(5)
elapsed += 5
if elapsed >= wait_timeout:
log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr)
return
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
if file_url and file_name:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
except Exception as e:
log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr)
@staticmethod
def _download_torrent_worker(
worker_id: str,
@@ -119,7 +206,7 @@ class Download_Torrent(Cmdlet):
thread = threading.Thread(
target=self._download_torrent_worker,
args=(worker_id, magnet_url, output_dir, config, api_key, wait_timeout),
daemon=False,
daemon=True,
name=f"TorrentWorker_{worker_id}",
)
thread.start()

View File

@@ -2,10 +2,12 @@ from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import os
import sys
import shutil
import subprocess
import webbrowser
from . import register
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from SYS.logger import log, debug
@@ -25,8 +27,8 @@ class Get_File(Cmdlet):
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-path", description="Output directory path (default: from config)"),
CmdletArg("-name", description="Output filename (default: from metadata title)"),
SharedArgs.PATH,
CmdletArg("name", description="Output filename (default: from metadata title)"),
],
detail=[
"- Exports file from storage backend to local path",
@@ -79,32 +81,23 @@ class Get_File(Cmdlet):
log(f"Error: File metadata not found for hash {file_hash[:12]}...")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
# Determine output filename
if output_name:
filename = output_name
else:
# Use title from metadata, sanitize it
title = metadata.get("title", "export")
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
filename += ext
# Determine output directory
if output_path:
output_dir = Path(output_path).expanduser()
else:
output_dir = resolve_output_dir(config)
debug(f"[get-file] Output dir: {output_dir}")
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
def resolve_display_title() -> str:
candidates = [
get_field(result, "title"),
get_field(result, "name"),
get_field(result, "filename"),
(metadata.get("title") if isinstance(metadata, dict) else None),
(metadata.get("name") if isinstance(metadata, dict) else None),
(metadata.get("filename") if isinstance(metadata, dict) else None),
]
for candidate in candidates:
if candidate is None:
continue
text = str(candidate).strip()
if text:
return text
return ""
debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)")
@@ -115,15 +108,20 @@ class Get_File(Cmdlet):
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
log(f"File opened in browser: {source_path}", file=sys.stderr)
ctx.emit(f"Opened in browser: {source_path}")
# Hydrus backend returns a URL; open it only for this explicit user action.
try:
webbrowser.open(source_path)
except Exception as exc:
log(f"Error opening browser: {exc}", file=sys.stderr)
else:
log(f"Opened in browser: {source_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": filename,
"title": resolve_display_title() or "Opened",
})
return 0
@@ -131,32 +129,58 @@ class Get_File(Cmdlet):
if isinstance(source_path, str):
source_path = Path(source_path)
# Determine output directory
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...")
return 1
# Folder store UX: without -path, just open the file in the default app.
# Only export/copy when -path is explicitly provided.
backend_name = type(backend).__name__
is_folder_backend = backend_name.lower() == "folder"
if is_folder_backend and not output_path:
display_title = resolve_display_title() or source_path.stem or "Opened"
ext_for_emit = metadata.get("ext") or source_path.suffix.lstrip(".")
self._open_file_default(source_path)
log(f"Opened: {source_path}", file=sys.stderr)
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(source_path),
"title": str(display_title),
"ext": str(ext_for_emit or ""),
})
debug("[get-file] Completed successfully")
return 0
# Otherwise: export/copy to output_dir.
if output_path:
output_dir = Path(output_path).expanduser()
else:
output_dir = resolve_output_dir(config)
debug(f"[get-file] Output dir: {output_dir}")
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
# Build full output path
dest_path = output_dir / filename
# Make path unique if file exists
dest_path = self._unique_path(dest_path)
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...")
return 1
# Determine output filename (only when exporting)
if output_name:
filename = output_name
else:
title = (metadata.get("title") if isinstance(metadata, dict) else None) or resolve_display_title() or "export"
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
filename += ext
dest_path = self._unique_path(output_dir / filename)
# Copy file to destination
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
shutil.copy2(source_path, dest_path)
ctx.emit(f"Exported to: {dest_path}")
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
@@ -169,6 +193,19 @@ class Get_File(Cmdlet):
debug(f"[get-file] Completed successfully")
return 0
def _open_file_default(self, path: Path) -> None:
"""Open a local file in the OS default application."""
try:
if sys.platform.startswith("win"):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
subprocess.Popen(["open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return
subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""

View File

@@ -10,10 +10,11 @@ from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
from API.folder import API_folder_store
from config import get_local_storage_path
from result_table import ResultTable
from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
@@ -24,6 +25,7 @@ CMDLET = Cmdlet(
],
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Lists relationship data as returned by Hydrus or Local DB.",
@@ -36,8 +38,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash override
# Parse -hash and -store override
override_hash: str | None = None
override_store: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
@@ -46,11 +49,20 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
if low in {"-store", "--store", "store"} and i + 1 < len(args_list):
override_store = str(args_list[i + 1]).strip()
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
if isinstance(result, list):
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr)
return 1
else:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
@@ -65,256 +77,170 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return
found_relationships.append(entry)
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
if path_obj.exists():
storage_path = get_local_storage_path(config)
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
if storage_path:
with API_folder_store(storage_path) as db:
file_hash = db.get_file_hash(path_obj)
metadata = db.get_metadata(file_hash) if file_hash else None
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
if hashes:
for h in hashes:
# h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
resolved_path = db.search_hash(h)
title = h[:16] + "..."
path = None
if resolved_path and resolved_path.exists():
path = str(resolved_path)
# Try to get title from tags
try:
tags = db.get_tags(h)
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
entry_type = "king" if rel_type.lower() == "alt" else rel_type
_add_relationship({
"hash": h,
"type": entry_type,
"title": title,
"path": path,
"store": "local"
})
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
existing_parent = None
for r in found_relationships:
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
existing_parent = r
break
if not existing_parent:
parent_title = parent_path_obj.stem
try:
parent_hash = db.get_file_hash(parent_path_obj)
if parent_hash:
parent_tags = db.get_tags(parent_hash)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
_add_relationship({
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
"store": "local"
})
else:
# If already in results, ensure it's marked as king if appropriate
if rel_type.lower() == "alt":
existing_parent['type'] = "king"
# 1. Check forward relationships from parent (siblings)
parent_hash = db.get_file_hash(parent_path_obj)
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
child_path_obj = db.search_hash(child_h)
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
# Check if already added (case-insensitive hash/path check)
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
"store": "local"
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
# Skip if already added (check by path/hash, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
"store": "local"
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# Store/hash-first subject resolution
store_name: Optional[str] = override_store
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
local_db_checked = False
if store_name and hash_hex:
try:
store = Store(config)
backend = store[str(store_name)]
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
# Update source title from tags if possible
try:
tags = db.get_tags(hash_hex)
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
source_title = t[6:].strip()
break
except Exception:
pass
metadata = db.get_metadata(hash_hex)
rels = (metadata or {}).get("relationships")
king_hashes: list[str] = []
# Forward relationships
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
if not isinstance(hashes, list):
continue
for related_hash in hashes:
related_hash = normalize_hash(str(related_hash))
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
if entry_type == "king":
king_hashes.append(related_hash)
related_title = related_hash[:16] + "..."
try:
rel_tags = db.get_tags(related_hash)
for t in rel_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
related_title = t[6:].strip()
break
except Exception:
pass
_add_relationship({
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
# Reverse relationships (alts pointing to this hash)
try:
reverse_children = db.find_files_pointing_to_hash(hash_hex)
except Exception:
reverse_children = []
for child in reverse_children or []:
child_hash = normalize_hash(str(child.get("hash") or ""))
rel_type = str(child.get("type") or "").strip().lower()
if not child_hash or child_hash == hash_hex:
continue
child_title = child_hash[:16] + "..."
try:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
child_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
# Siblings (alts that share the same king)
for king_hash in king_hashes:
try:
siblings = db.find_files_pointing_to_hash(king_hash)
except Exception:
siblings = []
for sib in siblings or []:
sib_hash = normalize_hash(str(sib.get("hash") or ""))
sib_type = str(sib.get("type") or "").strip().lower()
if not sib_hash or sib_hash in {hash_hex, king_hash}:
continue
sib_title = sib_hash[:16] + "..."
try:
sib_tags = db.get_tags(sib_hash)
for t in sib_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
sib_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
# "store": "local"
# })
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
log(f"Error checking store relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = get_hash_for_operation(override_hash, result)
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if hash_hex and not local_db_checked:
try:
client = hydrus_wrapper.get_client(config)
client = None
store_label = "hydrus"
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
store_label = str(store_name)
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_relationships"):
client = candidate
except Exception:
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return 1
else:
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
@@ -322,38 +248,66 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
# Map Hydrus relationship IDs to names.
# For /manage_file_relationships/get_file_relationships, the Hydrus docs define:
# 0=potential duplicates, 1=false positives, 3=alternates, 8=duplicates
# Additionally, this endpoint includes metadata keys like 'king'/'is_king'.
rel_map = {
"0": "potential duplicate",
"0": "potential",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
"3": "alternate",
"8": "duplicate",
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
for rel_type_id, rel_value in this_file_rels.items():
key = str(rel_type_id)
# Handle metadata keys explicitly.
if key in {"is_king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
"store": "hydrus"
})
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": king_hash,
"path": None,
"store": store_label,
})
continue
rel_name = rel_map.get(key, f"type-{key}")
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm, # Can't resolve title easily without another API call
"path": None,
"store": store_label,
})
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm,
"path": None,
"store": store_label,
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
@@ -374,7 +328,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
elif t in {'alternative', 'alternate', 'alt'}:
return 2
elif t == 'duplicate':
return 3
@@ -400,22 +354,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"relationship_type": item['type'],
"store": item['store']
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)

View File

@@ -27,10 +27,10 @@ from ._shared import (
import pipeline as ctx
try:
from PyPDF2 import PdfWriter, PdfReader
HAS_PYPDF2 = True
from pypdf import PdfWriter, PdfReader
HAS_PYPDF = True
except ImportError:
HAS_PYPDF2 = False
HAS_PYPDF = False
PdfWriter = None
PdfReader = None
@@ -753,8 +753,8 @@ def _merge_text(files: List[Path], output: Path) -> bool:
def _merge_pdf(files: List[Path], output: Path) -> bool:
"""Merge PDF files."""
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
if (not HAS_PYPDF) or (PdfWriter is None) or (PdfReader is None):
log("pypdf is required for PDF merging. Install with: pip install pypdf", file=sys.stderr)
return False
try:

View File

@@ -10,6 +10,7 @@ import contextlib
import hashlib
import sys
import time
from datetime import datetime
import httpx
from dataclasses import dataclass, field
from pathlib import Path
@@ -547,7 +548,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
piped_results = normalize_result_input(result)
url_to_process = []
url_to_process: List[Tuple[str, Any]] = []
# Extract url from piped results
if piped_results:
@@ -559,17 +560,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
if url:
url_to_process.append(str(url))
url_to_process.append((str(url), item))
# Use positional arguments if no pipeline input
if not url_to_process and positional_url:
url_to_process = positional_url
url_to_process = [(u, None) for u in positional_url]
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
debug(f"[_run] url to process: {url_to_process}")
debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
@@ -623,7 +624,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
for url in url_to_process:
def _extract_item_tags(item: Any) -> List[str]:
if item is None:
return []
raw = get_field(item, 'tag')
if isinstance(raw, list):
return [str(t) for t in raw if t is not None and str(t).strip()]
if isinstance(raw, str) and raw.strip():
return [raw.strip()]
return []
def _extract_item_title(item: Any) -> str:
if item is None:
return ""
for key in ("title", "name", "filename"):
val = get_field(item, key)
if val is None:
continue
text = str(val).strip()
if text:
return text
return ""
def _clean_title(text: str) -> str:
value = (text or "").strip()
if value.lower().startswith("screenshot:"):
value = value.split(":", 1)[1].strip()
return value
for url, origin_item in url_to_process:
# Validate URL format
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
@@ -660,15 +689,34 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
capture_date = ""
try:
capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat()
except Exception:
capture_date = datetime.now().date().isoformat()
upstream_title = _clean_title(_extract_item_title(origin_item))
display_title = upstream_title or url
upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [
t for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:"))
]
merged_tags = unique_preserve_order(
["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags
)
pipe_obj = create_pipe_object_result(
source='screenshot',
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
title=f"Screenshot: {Path(screenshot_result.path).name}",
title=display_title,
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
tag=merged_tags,
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,

View File

@@ -1,4 +1,4 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube)."""
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
@@ -25,23 +25,27 @@ class Search_Provider(Cmdlet):
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube)",
usage="search-provider <provider> <query> [-limit N]",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"),
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
],
detail=[
"Search external content providers:",
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
" Example: search-provider -provider alldebrid \"*\"",
" Example: search-provider -provider alldebrid -open 123 \"*\"",
"- bandcamp: Search for music albums/tracks",
" Example: search-provider bandcamp \"artist:altrusian grace\"",
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
"- libgen: Search Library Genesis for books",
" Example: search-provider libgen \"python programming\"",
" Example: search-provider -provider libgen \"python programming\"",
"- soulseek: Search P2P network for music",
" Example: search-provider soulseek \"pink floyd\"",
" Example: search-provider -provider soulseek \"pink floyd\"",
"- youtube: Search YouTube for videos",
" Example: search-provider youtube \"tutorial\"",
" Example: search-provider -provider youtube \"tutorial\"",
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
@@ -50,7 +54,7 @@ class Search_Provider(Cmdlet):
"- youtube: Plain text search",
"",
"Results can be piped to other cmdlet:",
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-data",
],
exec=self.run
)
@@ -61,10 +65,62 @@ class Search_Provider(Cmdlet):
if should_show_help(args):
ctx.emit(self.__dict__)
return 0
# Parse arguments
if len(args) < 2:
log("Error: search-provider requires <provider> and <query> arguments", file=sys.stderr)
args_list = [str(a) for a in (args or [])]
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
provider_flags = {f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"})}
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
open_flags = {f.lower() for f in (flag_registry.get("open") or {"-open", "--open"})}
provider_name: Optional[str] = None
query: Optional[str] = None
limit = 50
open_id: Optional[int] = None
positionals: List[str] = []
i = 0
while i < len(args_list):
token = args_list[i]
low = token.lower()
if low in provider_flags and i + 1 < len(args_list):
provider_name = args_list[i + 1]
i += 2
elif low in query_flags and i + 1 < len(args_list):
query = args_list[i + 1]
i += 2
elif low in limit_flags and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50", file=sys.stderr)
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr)
open_id = None
i += 2
elif not token.startswith("-"):
positionals.append(token)
i += 1
else:
i += 1
# Backwards-compatible positional form: search-provider <provider> <query>
if provider_name is None and positionals:
provider_name = positionals[0]
positionals = positionals[1:]
if query is None and positionals:
query = " ".join(positionals).strip() or None
if not provider_name or not query:
log("Error: search-provider requires a provider and query", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
@@ -73,17 +129,6 @@ class Search_Provider(Cmdlet):
log(f" {status} {name}", file=sys.stderr)
return 1
provider_name = args[0]
query = args[1]
# Parse optional limit
limit = 50
if len(args) >= 4 and args[2] in ("-limit", "--limit"):
try:
limit = int(args[3])
except ValueError:
log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr)
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
# Get provider
@@ -120,13 +165,34 @@ class Search_Provider(Cmdlet):
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query} [{provider_name}]"
provider_text = str(provider_name or "").strip()
provider_lower = provider_text.lower()
if provider_lower == "youtube":
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_name.lower() in ('youtube', 'openlibrary')
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table.set_source_command("search-provider", list(args))
debug(f"[search-provider] Calling {provider_name}.search()")
results = provider.search(query, limit=limit)
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id})
else:
# Default: show folders (magnets) so user can select @N.
results = provider.search(query, limit=limit, filters={"view": "folders"})
else:
results = provider.search(query, limit=limit)
debug(f"[search-provider] Got {len(results)} results")
if not results:
@@ -143,11 +209,26 @@ class Search_Provider(Cmdlet):
if 'table' not in item_dict:
item_dict['table'] = provider_name
row_index = len(table.rows)
table.add_result(search_result) # ResultTable handles SearchResult objects
# For AllDebrid folder rows, allow @N to open and show files.
try:
if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder":
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"])
except Exception:
pass
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
# Ensure @N selection expands against this newly displayed table.
ctx.set_current_stage_table(table)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
@@ -165,5 +246,8 @@ class Search_Provider(Cmdlet):
return 1
# Register cmdlet instance
Search_Provider_Instance = Search_Provider()
# Register cmdlet instance (catalog + REPL autocomplete expects module-level CMDLET)
CMDLET = Search_Provider()
# Backwards-compatible alias
Search_Provider_Instance = CMDLET

View File

@@ -1,7 +1,7 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple
from typing import Any, Dict, Sequence, List, Optional
from pathlib import Path
from collections import OrderedDict
import re
@@ -10,7 +10,7 @@ import sys
from SYS.logger import log, debug
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help, normalize_hash, first_title_tag
from ._shared import Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag
import pipeline as ctx
@@ -24,21 +24,16 @@ class Search_Store(Cmdlet):
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
usage="search-store [query] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
SharedArgs.STORE,
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
"URL search: url:* (any URL) or url:<value> (URL substring)",
"Filter results by: tag, size, type, duration",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
@@ -46,8 +41,6 @@ class Search_Store(Cmdlet):
"search-store -store test 'video' # Search 'test' folder store",
"search-store 'url:*' # Files that have any URL",
"search-store 'url:youtube.com' # Files whose URL contains substring",
"search-store song -type audio # Search for audio files",
"search-store movie -tag action # Search with tag filter",
],
exec=self.run,
)
@@ -136,12 +129,14 @@ class Search_Store(Cmdlet):
args_list = [str(arg) for arg in (args or [])]
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
store_flags = {f.lower() for f in (flag_registry.get("store") or {"-store", "--store"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
# Parse arguments
query = ""
_tag_filters: List[str] = []
_size_filter: Optional[Tuple[str, int]] = None
_duration_filter: Optional[Tuple[str, float]] = None
_type_filter: Optional[str] = None
storage_backend: Optional[str] = None
limit = 100
searched_backends: List[str] = []
@@ -150,21 +145,15 @@ class Search_Store(Cmdlet):
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-store", "--store"} and i + 1 < len(args_list):
if low in store_flags and i + 1 < len(args_list):
storage_backend = args_list[i + 1]
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
_tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
elif low in limit_flags and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
_type_filter = args_list[i + 1].lower()
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
@@ -268,12 +257,16 @@ class Search_Store(Cmdlet):
# Resolve a path/URL string if possible
path_str: Optional[str] = None
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (and may include access keys),
# which should not be pulled during search/refresh.
try:
maybe_path = resolved_backend.get_file(h)
if isinstance(maybe_path, Path):
path_str = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
path_str = maybe_path
if type(resolved_backend).__name__ == "Folder":
maybe_path = resolved_backend.get_file(h)
if isinstance(maybe_path, Path):
path_str = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
path_str = maybe_path
except Exception:
path_str = None