This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -5,6 +5,7 @@ from pathlib import Path
import sys
import shutil
import tempfile
import re
import models
import pipeline as ctx
@@ -65,6 +66,15 @@ class Add_File(Cmdlet):
stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
# Determine if -store targets a registered backend (vs a filesystem export path).
is_storage_backend_location = False
if location:
try:
store_probe = Store(config)
is_storage_backend_location = location in (store_probe.list_backends() or [])
except Exception:
is_storage_backend_location = False
# Decide which items to process.
# - If user provided -path, treat this invocation as single-item.
# - Otherwise, if piped input is a list, ingest each item.
@@ -81,13 +91,15 @@ class Add_File(Cmdlet):
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
collected_payloads: List[Dict[str, Any]] = []
pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {}
successes = 0
failures = 0
# Only run the search-store refresh when add-file is the last stage.
# In the middle of a pipeline, downstream cmdlets should receive the emitted
# storage payload directly (no need to re-search and risk duplicate emits).
auto_search_store_after_add = bool(is_last_stage) and len(items_to_process) == 1
# When add-file -store is the last stage, always show a final search-store table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location)
auto_search_store_after_add = False
for item in items_to_process:
pipe_obj = coerce_to_pipe_object(item, path_arg)
@@ -217,7 +229,8 @@ class Add_File(Cmdlet):
config,
delete_after_item,
collect_payloads=collected_payloads,
suppress_last_stage_overlay=is_last_stage and len(items_to_process) > 1,
collect_relationship_pairs=pending_relationship_pairs,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
else:
@@ -243,16 +256,38 @@ class Add_File(Cmdlet):
except Exception:
pass
# If we processed multiple storage ingests, present a single consolidated overlay table.
if is_last_stage and len(items_to_process) > 1 and collected_payloads:
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
try:
from result_table import ResultTable
table = ResultTable("Result")
hashes: List[str] = []
for payload in collected_payloads:
table.add_result(payload)
# Make this the active selectable table so @.. returns here (and playlist table is kept in history).
ctx.set_last_result_table(table, collected_payloads, subject=collected_payloads)
h = payload.get("hash") if isinstance(payload, dict) else None
if isinstance(h, str) and len(h) == 64:
hashes.append(h)
# Deduplicate while preserving order
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
)
if not refreshed_items:
# Fallback: at least show the add-file payloads as a display overlay
from result_table import ResultTable
table = ResultTable("Result")
for payload in collected_payloads:
table.add_result(payload)
ctx.set_last_result_table_overlay(table, collected_payloads, subject=collected_payloads)
except Exception:
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs:
try:
Add_File._apply_pending_relationships(pending_relationship_pairs, config)
except Exception:
pass
@@ -260,6 +295,259 @@ class Add_File(Cmdlet):
return 0
return 1
@staticmethod
def _try_emit_search_store_by_hashes(*, store: str, hash_values: List[str], config: Dict[str, Any]) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr)
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hashes})
except Exception:
pass
return emitted_items
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
return None
@staticmethod
def _parse_relationship_tag_king_alts(tag_value: str) -> tuple[Optional[str], List[str]]:
"""Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance(tag_value, str):
return None, []
raw = tag_value.strip()
if not raw:
return None, []
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if ":" in raw:
prefix, rest = raw.split(":", 1)
if prefix.strip().lower() == "relationship":
rhs = rest.strip()
# Old typed format: hash(type)HEX
typed = re.findall(r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?", rhs)
if typed:
king: Optional[str] = None
alts: List[str] = []
for rel_type, h in typed:
h_norm = str(h).strip().lower()
if rel_type.strip().lower() == "king":
king = h_norm
elif rel_type.strip().lower() in {"alt", "related"}:
alts.append(h_norm)
# If the tag omitted king but had hashes, fall back to first hash.
if not king:
all_hashes = [str(h).strip().lower() for _, h in typed]
king = all_hashes[0] if all_hashes else None
alts = [h for h in all_hashes[1:] if h]
# Dedupe alts while preserving order
seen: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h))]
if king and len(king) == 64:
return king, [h for h in alts if h != king]
return None, []
# New format: a simple list of hashes, first is king.
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return None, []
king = hashes[0]
alts = hashes[1:]
seen2: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))]
return king, [h for h in alts if h != king]
@staticmethod
def _parse_relationships_king_alts(relationships: Dict[str, Any]) -> tuple[Optional[str], List[str]]:
"""Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- {"king": [KING], "alt": [ALT1, ALT2]}
- {"king": KING, "alt": ALT} (strings)
- Also treats "related" hashes as alts for persistence purposes.
"""
if not isinstance(relationships, dict) or not relationships:
return None, []
def _first_hash(val: Any) -> Optional[str]:
if isinstance(val, str):
h = val.strip().lower()
return h if len(h) == 64 else None
if isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
return h
return None
def _many_hashes(val: Any) -> List[str]:
out: List[str] = []
if isinstance(val, str):
h = val.strip().lower()
if len(h) == 64:
out.append(h)
elif isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
out.append(h)
return out
king = _first_hash(relationships.get("king"))
if not king:
return None, []
alts = _many_hashes(relationships.get("alt"))
alts.extend(_many_hashes(relationships.get("related")))
seen: set[str] = set()
alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))]
return king, alts
@staticmethod
def _apply_pending_relationships(pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any]) -> None:
"""Persist relationships into the appropriate backend DB/API.
- Folder stores: write to the per-store SQLite DB (directional alt->king).
- Hydrus stores: call Hydrus relationship API.
"""
if not pending:
return
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in pending.items():
if not pairs:
continue
try:
backend = store[str(backend_name)]
except Exception:
continue
backend_type = type(backend).__name__.lower()
# Folder-backed local DB
location_fn = getattr(backend, "location", None)
is_folder = type(backend).__name__ == "Folder" and callable(location_fn)
if is_folder and location_fn is not None:
try:
root = Path(str(location_fn())).expanduser()
with API_folder_store(root) as db:
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
# Hash-first store DB write; skips if either hash isn't in this store DB.
try:
db.set_relationship_by_hash(str(alt_hash), str(king_hash), "alt", bidirectional=False)
except Exception:
continue
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Hydrus
if "hydrus" in backend_type or hasattr(backend, "_client"):
client: Any = getattr(backend, "_client", None)
# Do not fall back to a global/default Hydrus client here; relationships must not be cross-store.
if client is None or not hasattr(client, "set_relationship"):
continue
def _hash_exists(hash_hex: str) -> bool:
try:
if not hasattr(client, "fetch_file_metadata"):
return False
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
include_notes=False,
)
meta = payload.get("metadata") if isinstance(payload, dict) else None
return bool(isinstance(meta, list) and meta)
except Exception:
return False
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
try:
alt_norm = str(alt_hash).strip().lower()
king_norm = str(king_hash).strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64:
continue
if not _hash_exists(alt_norm) or not _hash_exists(king_norm):
continue
client.set_relationship(alt_norm, king_norm, "alt")
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Other backends: no-op
_ = backend_type
@staticmethod
def _resolve_source(
result: Any,
@@ -310,7 +598,10 @@ class Add_File(Cmdlet):
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(file_hash, config, store_name=str(store_name).strip() if store_name else None)
return media_path, file_hash if success else None
if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return pipe_path_str, None
@@ -331,7 +622,10 @@ class Add_File(Cmdlet):
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config)
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config, store_name=str(store_name).strip() if store_name else None)
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
@@ -395,13 +689,32 @@ class Add_File(Cmdlet):
return None, None
@staticmethod
def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]:
def _fetch_hydrus_path(
file_hash: str,
config: Dict[str, Any],
store_name: Optional[str] = None,
) -> Tuple[Optional[Path], bool]:
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
try:
client = hydrus_wrapper.get_client(config)
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_path"):
client = candidate
except Exception:
client = None
if client is None:
log(f"❌ Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return None, False
else:
client = hydrus_wrapper.get_client(config)
if not client:
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
@@ -630,7 +943,7 @@ class Add_File(Cmdlet):
pass
@staticmethod
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> bool:
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> Optional[List[Any]]:
"""Run search-store for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
@@ -638,7 +951,7 @@ class Add_File(Cmdlet):
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
Returns True if search-store ran successfully, else False.
Returns the emitted search-store payload items on success, else None.
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
@@ -653,10 +966,11 @@ class Add_File(Cmdlet):
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return False
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
@@ -671,10 +985,10 @@ class Add_File(Cmdlet):
except Exception:
pass
return True
return emitted_items
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
return False
return None
@staticmethod
def _prepare_metadata(
@@ -735,6 +1049,39 @@ class Add_File(Cmdlet):
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
t for t in merged_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if relationship_tags:
try:
if not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships:
king: Optional[str] = None
alts: List[str] = []
for rel_tag in relationship_tags:
k, a = Add_File._parse_relationship_tag_king_alts(rel_tag)
if k and not king:
king = k
if a:
alts.extend(a)
if king:
seen_alt: set[str] = set()
alts = [h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h))]
payload: Dict[str, Any] = {"king": [king]}
if alts:
payload["alt"] = alts
pipe_obj.relationships = payload
except Exception:
pass
merged_tags = [
t for t in merged_tags
if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
]
# Persist back to PipeObject
pipe_obj.tag = merged_tags
if preferred_title and not pipe_obj.title:
@@ -880,6 +1227,19 @@ class Add_File(Cmdlet):
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
# Read Soulseek login credentials from config (client credentials), separate from peer username.
try:
from config import get_soulseek_username, get_soulseek_password
client_user = get_soulseek_username(config) or ""
client_pass = get_soulseek_password(config) or ""
except Exception:
client_user = ""
client_pass = ""
if not client_user or not client_pass:
debug("[add-file] ERROR: Soulseek credentials missing (set [provider=soulseek] username/password in config.conf)")
return None
# Determine output directory (prefer downloads folder in config)
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
@@ -900,6 +1260,8 @@ class Add_File(Cmdlet):
username=username,
filename=filename,
output_dir=output_dir,
client_username=client_user,
client_password=client_pass,
timeout=1200 # 20 minutes
)
)
@@ -937,9 +1299,15 @@ class Add_File(Cmdlet):
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if f_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(f_hash, hoster_url)
# Only associate when we can target an explicit Hydrus store backend.
# Do not fall back to a global/default Hydrus client.
store_name = getattr(pipe_obj, "store", None)
if store_name:
store = Store(config)
backend = store[str(store_name)]
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "associate_url"):
client.associate_url(str(f_hash), hoster_url)
except Exception:
pass
@@ -984,6 +1352,7 @@ class Add_File(Cmdlet):
delete_after: bool,
*,
collect_payloads: Optional[List[Dict[str, Any]]] = None,
collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
) -> int:
@@ -1018,6 +1387,21 @@ class Add_File(Cmdlet):
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)
if isinstance(rels, dict) and rels:
king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels)
if king_hash and alt_hashes:
bucket = collect_relationship_pairs.setdefault(str(backend_name), set())
for alt_hash in alt_hashes:
if alt_hash and alt_hash != king_hash:
bucket.add((alt_hash, king_hash))
# Relationships must never be stored as tags.
if isinstance(tags, list) and tags:
tags = [t for t in tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))]
# Call backend's add_file with full metadata
# Backend returns hash as identifier
@@ -1030,13 +1414,16 @@ class Add_File(Cmdlet):
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
stored_path: Optional[str] = None
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (often with an access key) and should
# only be invoked by explicit user commands (e.g. get-file).
try:
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
# Some backends may return a browser URL
stored_path = maybe_path
if type(backend).__name__ == "Folder":
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
stored_path = maybe_path
except Exception:
stored_path = None
@@ -1119,12 +1506,16 @@ class Add_File(Cmdlet):
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False)
ok = Add_File._try_emit_search_store_by_hash(
refreshed_items = Add_File._try_emit_search_store_by_hash(
store=backend_name,
hash_value=resolved_hash,
config=config,
)
if not ok:
if refreshed_items:
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items:
ctx.emit(emitted)
else:
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx.emit(payload)
else: