Files
Medios-Macina/cmdlet/add_file.py

1687 lines
71 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
2025-12-14 00:53:52 -08:00
from typing import Any, Dict, Optional, Sequence, Tuple, List
2025-11-25 20:09:33 -08:00
from pathlib import Path
import sys
2025-12-11 12:47:30 -08:00
import shutil
2025-12-14 00:53:52 -08:00
import tempfile
2025-12-16 01:45:01 -08:00
import re
2025-11-25 20:09:33 -08:00
import models
import pipeline as ctx
2025-12-11 19:04:02 -08:00
from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
2025-12-14 00:53:52 -08:00
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
2025-12-11 19:04:02 -08:00
from Store import Store
2025-11-25 20:09:33 -08:00
from ._shared import (
2025-12-11 12:47:30 -08:00
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
2025-12-11 23:21:45 -08:00
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
2025-12-11 19:04:02 -08:00
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
2025-11-25 20:09:33 -08:00
)
2025-12-11 23:21:45 -08:00
from ._shared import collapse_namespace_tag
2025-12-11 19:04:02 -08:00
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
2025-12-11 12:47:30 -08:00
from metadata import write_metadata
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
# Canonical supported filetypes for all stores/cmdlets
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
class Add_File(Cmdlet):
"""Add file into the DB"""
def __init__(self) -> None:
"""Initialize add-file cmdlet."""
super().__init__(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del"),
],
detail=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
parsed = parse_cmdlet_args(args, self)
path_arg = parsed.get("path")
2025-12-14 00:53:52 -08:00
location = parsed.get("store")
2025-12-11 12:47:30 -08:00
provider_name = parsed.get("provider")
delete_after = parsed.get("delete", False)
2025-12-14 00:53:52 -08:00
stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
2025-12-16 01:45:01 -08:00
# Determine if -store targets a registered backend (vs a filesystem export path).
is_storage_backend_location = False
if location:
try:
store_probe = Store(config)
is_storage_backend_location = location in (store_probe.list_backends() or [])
except Exception:
is_storage_backend_location = False
2025-12-14 00:53:52 -08:00
# Decide which items to process.
# - If user provided -path, treat this invocation as single-item.
# - Otherwise, if piped input is a list, ingest each item.
if path_arg:
items_to_process: List[Any] = [result]
elif isinstance(result, list) and result:
items_to_process = list(result)
else:
items_to_process = [result]
2025-12-11 12:47:30 -08:00
debug(f"[add-file] INPUT result type={type(result).__name__}")
if isinstance(result, list):
debug(f"[add-file] INPUT result is list with {len(result)} items")
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
collected_payloads: List[Dict[str, Any]] = []
2025-12-16 01:45:01 -08:00
pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {}
2025-12-14 00:53:52 -08:00
successes = 0
failures = 0
2025-11-25 20:09:33 -08:00
2025-12-16 01:45:01 -08:00
# When add-file -store is the last stage, always show a final search-store table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location)
auto_search_store_after_add = False
2025-12-11 12:47:30 -08:00
2025-12-14 00:53:52 -08:00
for item in items_to_process:
pipe_obj = coerce_to_pipe_object(item, path_arg)
2025-12-11 12:47:30 -08:00
2025-12-14 00:53:52 -08:00
temp_dir_to_cleanup: Optional[Path] = None
delete_after_item = delete_after
2025-12-01 01:10:16 -08:00
try:
2025-12-14 00:53:52 -08:00
media_path_or_url, file_hash = self._resolve_source(item, path_arg, pipe_obj, config)
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
if not media_path_or_url:
failures += 1
continue
# Update pipe_obj with resolved path
pipe_obj.path = str(media_path_or_url)
# URL targets: prefer provider-aware download for OpenLibrary selections.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
table = None
full_metadata = None
if isinstance(pipe_obj.extra, dict):
table = pipe_obj.extra.get("table")
full_metadata = pipe_obj.extra.get("full_metadata")
is_openlibrary = (str(table or "").lower() == "openlibrary") or ("openlibrary.org/books/" in media_path_or_url.lower())
if is_openlibrary:
# Enrich tags from OpenLibrary metadata so the stored file has book tags (author/pages/etc).
try:
from Provider.openlibrary import OpenLibrary as _OpenLibrary
olid = None
archive_id = None
if isinstance(full_metadata, dict):
olid = full_metadata.get("openlibrary_id") or full_metadata.get("openlibrary")
archive_id = full_metadata.get("archive_id")
if not olid:
import re
m = re.search(r"/books/(OL\d+M)", str(media_path_or_url), flags=re.IGNORECASE)
if m:
olid = m.group(1)
scraped_tags: List[str] = []
if olid:
scraped_tags.extend(_OpenLibrary.scrape_openlibrary_metadata(str(olid)) or [])
if archive_id:
scraped_tags.append(f"internet_archive:{archive_id}")
if scraped_tags:
existing = list(pipe_obj.tag or [])
pipe_obj.tag = merge_sequences(existing, scraped_tags, case_sensitive=False)
except Exception:
pass
from ProviderCore.registry import get_search_provider
from ProviderCore.base import SearchResult
provider = get_search_provider("openlibrary", config)
if provider is None:
log("[add-file] OpenLibrary provider not available", file=sys.stderr)
failures += 1
continue
temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
sr = SearchResult(
table="openlibrary",
title=str(getattr(pipe_obj, "title", None) or "Unknown"),
path=str(media_path_or_url),
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
)
downloaded = provider.download(sr, temp_dir_to_cleanup)
if downloaded is None:
log("[add-file] OpenLibrary download failed", file=sys.stderr)
failures += 1
continue
downloaded_path = Path(downloaded)
if downloaded_path.exists() and downloaded_path.is_dir():
log(
"[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
file=sys.stderr,
)
failures += 1
continue
media_path_or_url = str(downloaded_path)
pipe_obj.path = str(downloaded_path)
delete_after_item = True
# For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
if code == 0:
successes += 1
else:
failures += 1
continue
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
if not self._validate_source(media_path):
failures += 1
continue
if provider_name:
code = self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after_item)
if code == 0:
successes += 1
else:
failures += 1
continue
if location:
try:
store = Store(config)
backends = store.list_backends()
if location in backends:
code = self._handle_storage_backend(
item,
media_path,
location,
pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
2025-12-16 01:45:01 -08:00
collect_relationship_pairs=pending_relationship_pairs,
suppress_last_stage_overlay=want_final_search_store,
2025-12-14 00:53:52 -08:00
auto_search_store=auto_search_store_after_add,
)
else:
code = self._handle_local_export(media_path, location, pipe_obj, config, delete_after_item)
except Exception as exc:
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
failures += 1
continue
if code == 0:
successes += 1
else:
failures += 1
continue
log("No destination specified", file=sys.stderr)
failures += 1
finally:
if temp_dir_to_cleanup is not None:
try:
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
except Exception:
pass
2025-12-16 01:45:01 -08:00
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
2025-12-14 00:53:52 -08:00
try:
2025-12-16 01:45:01 -08:00
hashes: List[str] = []
2025-12-14 00:53:52 -08:00
for payload in collected_payloads:
2025-12-16 01:45:01 -08:00
h = payload.get("hash") if isinstance(payload, dict) else None
if isinstance(h, str) and len(h) == 64:
hashes.append(h)
# Deduplicate while preserving order
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
)
if not refreshed_items:
# Fallback: at least show the add-file payloads as a display overlay
from result_table import ResultTable
table = ResultTable("Result")
for payload in collected_payloads:
table.add_result(payload)
ctx.set_last_result_table_overlay(table, collected_payloads, subject=collected_payloads)
except Exception:
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs:
try:
Add_File._apply_pending_relationships(pending_relationship_pairs, config)
2025-12-14 00:53:52 -08:00
except Exception:
pass
if successes > 0:
return 0
return 1
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
@staticmethod
def _try_emit_search_store_by_hashes(*, store: str, hash_values: List[str], config: Dict[str, Any]) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr)
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hashes})
except Exception:
pass
return emitted_items
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
return None
@staticmethod
def _parse_relationship_tag_king_alts(tag_value: str) -> tuple[Optional[str], List[str]]:
"""Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance(tag_value, str):
return None, []
raw = tag_value.strip()
if not raw:
return None, []
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if ":" in raw:
prefix, rest = raw.split(":", 1)
if prefix.strip().lower() == "relationship":
rhs = rest.strip()
# Old typed format: hash(type)HEX
typed = re.findall(r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?", rhs)
if typed:
king: Optional[str] = None
alts: List[str] = []
for rel_type, h in typed:
h_norm = str(h).strip().lower()
if rel_type.strip().lower() == "king":
king = h_norm
elif rel_type.strip().lower() in {"alt", "related"}:
alts.append(h_norm)
# If the tag omitted king but had hashes, fall back to first hash.
if not king:
all_hashes = [str(h).strip().lower() for _, h in typed]
king = all_hashes[0] if all_hashes else None
alts = [h for h in all_hashes[1:] if h]
# Dedupe alts while preserving order
seen: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h))]
if king and len(king) == 64:
return king, [h for h in alts if h != king]
return None, []
# New format: a simple list of hashes, first is king.
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return None, []
king = hashes[0]
alts = hashes[1:]
seen2: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))]
return king, [h for h in alts if h != king]
@staticmethod
def _parse_relationships_king_alts(relationships: Dict[str, Any]) -> tuple[Optional[str], List[str]]:
"""Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- {"king": [KING], "alt": [ALT1, ALT2]}
- {"king": KING, "alt": ALT} (strings)
- Also treats "related" hashes as alts for persistence purposes.
"""
if not isinstance(relationships, dict) or not relationships:
return None, []
def _first_hash(val: Any) -> Optional[str]:
if isinstance(val, str):
h = val.strip().lower()
return h if len(h) == 64 else None
if isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
return h
return None
def _many_hashes(val: Any) -> List[str]:
out: List[str] = []
if isinstance(val, str):
h = val.strip().lower()
if len(h) == 64:
out.append(h)
elif isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
out.append(h)
return out
king = _first_hash(relationships.get("king"))
if not king:
return None, []
alts = _many_hashes(relationships.get("alt"))
alts.extend(_many_hashes(relationships.get("related")))
seen: set[str] = set()
alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))]
return king, alts
@staticmethod
def _apply_pending_relationships(pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any]) -> None:
"""Persist relationships into the appropriate backend DB/API.
- Folder stores: write to the per-store SQLite DB (directional alt->king).
- Hydrus stores: call Hydrus relationship API.
"""
if not pending:
return
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in pending.items():
if not pairs:
continue
try:
backend = store[str(backend_name)]
except Exception:
continue
backend_type = type(backend).__name__.lower()
# Folder-backed local DB
location_fn = getattr(backend, "location", None)
is_folder = type(backend).__name__ == "Folder" and callable(location_fn)
if is_folder and location_fn is not None:
try:
root = Path(str(location_fn())).expanduser()
with API_folder_store(root) as db:
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
# Hash-first store DB write; skips if either hash isn't in this store DB.
try:
db.set_relationship_by_hash(str(alt_hash), str(king_hash), "alt", bidirectional=False)
except Exception:
continue
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Hydrus
if "hydrus" in backend_type or hasattr(backend, "_client"):
client: Any = getattr(backend, "_client", None)
# Do not fall back to a global/default Hydrus client here; relationships must not be cross-store.
if client is None or not hasattr(client, "set_relationship"):
continue
def _hash_exists(hash_hex: str) -> bool:
try:
if not hasattr(client, "fetch_file_metadata"):
return False
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
include_notes=False,
)
meta = payload.get("metadata") if isinstance(payload, dict) else None
return bool(isinstance(meta, list) and meta)
except Exception:
return False
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
try:
alt_norm = str(alt_hash).strip().lower()
king_norm = str(king_hash).strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64:
continue
if not _hash_exists(alt_norm) or not _hash_exists(king_norm):
continue
client.set_relationship(alt_norm, king_norm, "alt")
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Other backends: no-op
_ = backend_type
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_source(
result: Any,
path_arg: Optional[str],
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[Optional[Path | str], Optional[str]]:
"""Resolve the source file path from args or pipeline result.
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
Returns (media_path_or_url, file_hash)
where media_path_or_url can be a Path object or a URL string.
"""
# PRIORITY 1: Try hash+store from result dict (most reliable for @N selections)
if isinstance(result, dict):
result_hash = result.get("hash")
result_store = result.get("store")
if result_hash and result_store:
2025-12-14 00:53:52 -08:00
debug(
f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}"
)
2025-12-11 12:47:30 -08:00
try:
2025-12-11 19:04:02 -08:00
store = Store(config)
if result_store in store.list_backends():
backend = store[result_store]
2025-12-11 12:47:30 -08:00
media_path = backend.get_file(result_hash)
2025-12-11 19:04:02 -08:00
if isinstance(media_path, Path) and media_path.exists():
2025-12-11 12:47:30 -08:00
pipe_obj.path = str(media_path)
2025-12-14 00:53:52 -08:00
return media_path, str(result_hash)
if isinstance(media_path, str) and media_path.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
2025-12-11 19:04:02 -08:00
pipe_obj.path = media_path
2025-12-14 00:53:52 -08:00
return media_path, str(result_hash)
2025-12-11 12:47:30 -08:00
except Exception as exc:
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
# PRIORITY 2: Try explicit path argument
if path_arg:
media_path = Path(path_arg)
pipe_obj.path = str(media_path)
debug(f"[add-file] Using explicit path argument: {media_path}")
return media_path, None
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
pipe_path = getattr(pipe_obj, "path", None)
if pipe_path:
pipe_path_str = str(pipe_path)
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
2025-12-16 01:45:01 -08:00
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(file_hash, config, store_name=str(store_name).strip() if store_name else None)
2025-12-11 12:47:30 -08:00
return media_path, file_hash if success else None
2025-12-14 00:53:52 -08:00
if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return pipe_path_str, None
return Path(pipe_path_str), None
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
pipe_url = getattr(pipe_obj, "url", None)
if pipe_url and isinstance(pipe_url, str):
# Check if it's a URL
if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in pipe_obj.url: {pipe_url}")
return pipe_url, None
# Try from hydrus hash in pipe_obj.extra or hash
hydrus_hash = None
if isinstance(pipe_obj.extra, dict):
hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash")
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
2025-12-16 01:45:01 -08:00
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config, store_name=str(store_name).strip() if store_name else None)
2025-12-11 12:47:30 -08:00
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
if isinstance(result, str):
debug(f"Checking result string: {result}")
# Check if result is a URL before treating as file path
if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result string: {result}")
return result, None # Return URL string directly
media_path = Path(result)
pipe_obj.path = str(media_path)
return media_path, None
# Try from result if it's a list (pipeline emits multiple results)
if isinstance(result, list) and result:
first_item = result[0]
# If the first item is a string, it's either a URL or a file path
if isinstance(first_item, str):
debug(f"Checking result list[0]: {first_item}")
if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result list: {first_item}")
return first_item, None # Return URL string directly
media_path = Path(first_item)
pipe_obj.path = str(media_path)
return media_path, None
# If the first item is a dict, interpret it as a PipeObject-style result
if isinstance(first_item, dict):
# Look for path or path-like keys
path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
2025-12-14 00:53:52 -08:00
paths_val = first_item.get("paths")
if not path_candidate and isinstance(paths_val, (list, tuple)) and paths_val:
path_candidate = paths_val[0]
2025-12-11 12:47:30 -08:00
if path_candidate:
debug(f"Resolved path from result dict: {path_candidate}")
try:
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, first_item.get("hash")
except Exception:
# Fallback to returning string if not a path
return str(path_candidate), first_item.get("hash")
# If first item is a PipeObject object
try:
# models.PipeObject is an actual class; check attribute presence
import models as _models
if isinstance(first_item, _models.PipeObject):
path_candidate = getattr(first_item, "path", None)
if path_candidate:
debug(f"Resolved path from PipeObject: {path_candidate}")
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, getattr(first_item, "hash", None)
except Exception:
pass
debug(f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}")
log("File path could not be resolved")
return None, None
@staticmethod
2025-12-16 01:45:01 -08:00
def _fetch_hydrus_path(
file_hash: str,
config: Dict[str, Any],
store_name: Optional[str] = None,
) -> Tuple[Optional[Path], bool]:
2025-12-11 12:47:30 -08:00
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
2025-11-25 20:09:33 -08:00
try:
2025-12-16 01:45:01 -08:00
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_path"):
client = candidate
except Exception:
client = None
if client is None:
log(f"❌ Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return None, False
else:
client = hydrus_wrapper.get_client(config)
2025-11-25 20:09:33 -08:00
if not client:
2025-12-11 12:47:30 -08:00
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
2025-11-25 20:09:33 -08:00
response = client.get_file_path(file_hash)
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
2025-11-25 20:09:33 -08:00
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
2025-11-25 20:09:33 -08:00
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return media_path, True
2025-11-25 20:09:33 -08:00
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
@staticmethod
def _validate_source(media_path: Optional[Path]) -> bool:
"""Validate that the source file exists and is supported."""
if media_path is None:
return False
target_str = str(media_path)
# If it's a URL target, we skip file existence checks
if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return True
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
# Validate file type
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
return False
return True
@staticmethod
def _is_url_target(media_path: Optional[Path]) -> bool:
"""Check if the target is a URL that needs downloading."""
if media_path and str(media_path).lower().startswith(("http://", "https://")):
return True
return False
def _delegate_to_download_data(
self,
result: Any,
url_str: str,
location: Optional[str],
provider_name: Optional[str],
args: Sequence[str],
config: Dict[str, Any],
) -> int:
"""Delegate URL handling to download-media cmdlet."""
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
2025-12-12 21:55:38 -08:00
from cmdlet.download_media import CMDLET as dl_cmdlet
2025-12-11 12:47:30 -08:00
dl_args = list(args) if args else []
# Add the URL to the argument list for download-media
dl_args.insert(0, url_str)
# If result has selection_args (like -item from @N selection), include them
if isinstance(result, dict) and "_selection_args" in result:
selection_args = result["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
2025-12-14 00:53:52 -08:00
else:
extra_val = getattr(result, "extra", None)
if isinstance(extra_val, dict) and "_selection_args" in extra_val:
selection_args = extra_val["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
2025-12-11 12:47:30 -08:00
# download-media doesn't support -storage flag
# It downloads to the configured directory, then add-file will handle storage
# Note: Provider uploads (0x0) are not supported via this path
# Call download-media with the URL in args
return dl_cmdlet.run(None, dl_args, config)
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
2025-12-14 00:53:52 -08:00
from metadata import normalize_urls
# Prefer explicit PipeObject.url if present
urls: List[str] = []
2025-12-11 12:47:30 -08:00
try:
2025-12-14 00:53:52 -08:00
urls = normalize_urls(getattr(pipe_obj, "url", None))
2025-12-11 12:47:30 -08:00
except Exception:
2025-12-14 00:53:52 -08:00
urls = []
# Then check extra.url
if not urls:
try:
if isinstance(pipe_obj.extra, dict):
urls = normalize_urls(pipe_obj.extra.get("url"))
except Exception:
pass
# Then check result dict
if not urls and isinstance(result, dict):
urls = normalize_urls(result.get("url"))
2025-12-11 12:47:30 -08:00
2025-12-14 00:53:52 -08:00
# Finally, try extractor helper
if not urls:
urls = normalize_urls(extract_url_from_result(result))
return urls
2025-12-11 12:47:30 -08:00
@staticmethod
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
try:
rels = pipe_obj.get_relationships()
if rels:
return rels
except Exception:
pass
if isinstance(result, dict) and result.get("relationships"):
return result.get("relationships")
try:
return extract_relationships(result)
except Exception:
return None
@staticmethod
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
2025-12-14 00:53:52 -08:00
def _parse_duration(value: Any) -> Optional[float]:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value) if value > 0 else None
if isinstance(value, str):
s = value.strip()
if not s:
return None
try:
candidate = float(s)
return candidate if candidate > 0 else None
except ValueError:
pass
if ":" in s:
parts = [p.strip() for p in s.split(":") if p.strip()]
if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
nums = [int(p) for p in parts]
if len(nums) == 2:
minutes, seconds = nums
return float(minutes * 60 + seconds)
hours, minutes, seconds = nums
return float(hours * 3600 + minutes * 60 + seconds)
return None
parsed = _parse_duration(getattr(pipe_obj, "duration", None))
if parsed is not None:
return parsed
2025-12-11 12:47:30 -08:00
try:
2025-12-14 00:53:52 -08:00
return _parse_duration(extract_duration(result))
2025-12-11 12:47:30 -08:00
except Exception:
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
*,
2025-12-11 19:04:02 -08:00
hash_value: str,
2025-12-11 12:47:30 -08:00
store: str,
2025-12-11 19:04:02 -08:00
path: Optional[str],
2025-12-11 23:21:45 -08:00
tag: List[str],
2025-12-11 12:47:30 -08:00
title: Optional[str],
extra_updates: Optional[Dict[str, Any]] = None,
) -> None:
2025-12-11 19:04:02 -08:00
pipe_obj.hash = hash_value
2025-12-11 12:47:30 -08:00
pipe_obj.store = store
2025-12-11 19:04:02 -08:00
pipe_obj.path = path
2025-12-11 23:21:45 -08:00
pipe_obj.tag = tag
2025-12-11 12:47:30 -08:00
if title:
pipe_obj.title = title
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.update(extra_updates or {})
else:
pipe_obj.extra = dict(extra_updates or {})
@staticmethod
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
from result_table import format_result
log(format_result(pipe_obj, title="Result"), file=sys.stderr)
ctx.emit(pipe_obj.to_dict())
ctx.set_current_stage_table(None)
2025-12-13 12:09:50 -08:00
@staticmethod
2025-12-14 00:53:52 -08:00
def _emit_storage_result(payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True) -> None:
2025-12-13 12:09:50 -08:00
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
- If this is the last stage (or not in a pipeline), prints a search-store-like table
and sets an overlay table/items for @N selection.
"""
2025-12-14 00:53:52 -08:00
# Emit for downstream commands (no-op if not in a pipeline)
if emit:
ctx.emit(payload)
2025-12-13 12:09:50 -08:00
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
2025-12-14 00:53:52 -08:00
if not is_last or not overlay:
2025-12-13 12:09:50 -08:00
return
try:
from result_table import ResultTable
table = ResultTable("Result")
table.add_result(payload)
# Overlay so @1 refers to this add-file result without overwriting search history
ctx.set_last_result_table_overlay(table, [payload], subject=payload)
except Exception:
# If table rendering fails, still keep @ selection items
try:
ctx.set_last_result_items_only([payload])
except Exception:
pass
2025-12-14 00:53:52 -08:00
@staticmethod
2025-12-16 01:45:01 -08:00
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> Optional[List[Any]]:
2025-12-14 00:53:52 -08:00
"""Run search-store for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
prints tables for it when a display overlay exists. After running search-store,
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
2025-12-16 01:45:01 -08:00
Returns the emitted search-store payload items on success, else None.
2025-12-14 00:53:52 -08:00
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
args = ["-store", str(store), f"hash:{str(hash_value)}"]
log(f"[add-file] Refresh: search-store -store {store} \"hash:{hash_value}\"", file=sys.stderr)
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
2025-12-16 01:45:01 -08:00
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
2025-12-14 00:53:52 -08:00
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hash_value})
except Exception:
pass
2025-12-16 01:45:01 -08:00
return emitted_items
2025-12-14 00:53:52 -08:00
except Exception as exc:
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _prepare_metadata(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[List[str], List[str], Optional[str], Optional[str]]:
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
2025-12-11 23:21:45 -08:00
tags_from_result = list(pipe_obj.tag or [])
2025-12-11 12:47:30 -08:00
if not tags_from_result:
try:
2025-12-11 23:21:45 -08:00
tags_from_result = list(extract_tag_from_result(result) or [])
2025-12-11 12:47:30 -08:00
except Exception:
tags_from_result = []
url_from_result = Add_File._get_url(result, pipe_obj)
preferred_title = pipe_obj.title
if not preferred_title:
for t in tags_from_result:
if str(t).strip().lower().startswith("title:"):
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
if candidate:
preferred_title = candidate
break
if not preferred_title:
preferred_title = extract_title_from_result(result)
if preferred_title:
preferred_title = preferred_title.replace("_", " ").strip()
2025-12-11 19:04:02 -08:00
store = getattr(pipe_obj, "store", None)
2025-12-11 12:47:30 -08:00
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
2025-12-11 19:04:02 -08:00
media_path, store, config
2025-12-11 12:47:30 -08:00
)
def normalize_title_tag(tag: str) -> str:
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")]
2025-12-11 23:21:45 -08:00
sidecar_tags = collapse_namespace_tag([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
2025-12-11 12:47:30 -08:00
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True)
if preferred_title:
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
2025-12-16 01:45:01 -08:00
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
t for t in merged_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if relationship_tags:
try:
if not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships:
king: Optional[str] = None
alts: List[str] = []
for rel_tag in relationship_tags:
k, a = Add_File._parse_relationship_tag_king_alts(rel_tag)
if k and not king:
king = k
if a:
alts.extend(a)
if king:
seen_alt: set[str] = set()
alts = [h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h))]
payload: Dict[str, Any] = {"king": [king]}
if alts:
payload["alt"] = alts
pipe_obj.relationships = payload
except Exception:
pass
merged_tags = [
t for t in merged_tags
if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
]
2025-12-11 12:47:30 -08:00
# Persist back to PipeObject
2025-12-11 23:21:45 -08:00
pipe_obj.tag = merged_tags
2025-12-11 12:47:30 -08:00
if preferred_title and not pipe_obj.title:
pipe_obj.title = preferred_title
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.setdefault("url", merged_url)
return merged_tags, merged_url, preferred_title, file_hash
@staticmethod
def _handle_local_export(
media_path: Path,
location: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle exporting to a specific local path (Copy)."""
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
log(f"Exporting to local path: {destination_root}", file=sys.stderr)
result = None
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Determine Filename (Title-based)
title_value = title
if not title_value:
# Try to find title in tags
title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None)
if title_tag:
title_value = title_tag.split(":", 1)[1].strip()
if not title_value:
title_value = media_path.stem.replace("_", " ").strip()
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
base_name = safe_title or media_path.stem
new_name = base_name + media_path.suffix
destination_root.mkdir(parents=True, exist_ok=True)
target_path = destination_root / new_name
if target_path.exists():
target_path = unique_path(target_path)
# COPY Operation (Safe Export)
2025-11-30 11:39:04 -08:00
try:
2025-12-11 12:47:30 -08:00
shutil.copy2(str(media_path), target_path)
except Exception as exc:
log(f"❌ Failed to export file: {exc}", file=sys.stderr)
return 1
# Copy Sidecars
Add_File._copy_sidecars(media_path, target_path)
# Ensure hash for exported copy
if not f_hash:
try:
f_hash = sha256_file(target_path)
except Exception:
f_hash = None
# Write Metadata Sidecars (since it's an export)
relationships = Add_File._get_relationships(result, pipe_obj)
try:
write_sidecar(target_path, tags, url, f_hash)
write_metadata(target_path, hash_value=f_hash, url=url, relationships=relationships or [])
except Exception:
pass
# Update PipeObject and emit
extra_updates = {
"url": url,
"export_path": str(destination_root),
}
if relationships:
extra_updates["relationships"] = relationships
chosen_title = title or title_value or pipe_obj.title or target_path.name
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store="local",
2025-12-11 19:04:02 -08:00
path=str(target_path),
2025-12-11 23:21:45 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
title=chosen_title,
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
# Cleanup
# Only delete if explicitly requested!
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
2025-11-30 11:39:04 -08:00
return 0
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
@staticmethod
def _download_soulseek_file(
result: Any,
config: Dict[str, Any]
) -> Optional[Path]:
"""
Download a file from Soulseek peer.
Extracts username and filename from soulseek result metadata and initiates download.
"""
try:
import asyncio
2025-12-12 21:55:38 -08:00
from ProviderCore.registry import download_soulseek_file
2025-12-11 19:04:02 -08:00
from pathlib import Path
# Extract metadata from result
full_metadata = {}
if isinstance(result, dict):
full_metadata = result.get("full_metadata", {})
elif hasattr(result, "extra") and isinstance(result.extra, dict) and "full_metadata" in result.extra:
full_metadata = result.extra.get("full_metadata", {})
elif hasattr(result, "full_metadata"):
# Direct attribute access (fallback)
val = getattr(result, "full_metadata", {})
if isinstance(val, dict):
full_metadata = val
username = full_metadata.get("username")
filename = full_metadata.get("filename")
if not username or not filename:
debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
2025-12-14 00:53:52 -08:00
extra_val = getattr(result, "extra", None)
if isinstance(extra_val, dict):
debug(f"[add-file] Result extra keys: {list(extra_val.keys())}")
2025-12-11 19:04:02 -08:00
return None
if not username or not filename:
debug(f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})")
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
2025-12-16 01:45:01 -08:00
# Read Soulseek login credentials from config (client credentials), separate from peer username.
try:
from config import get_soulseek_username, get_soulseek_password
client_user = get_soulseek_username(config) or ""
client_pass = get_soulseek_password(config) or ""
except Exception:
client_user = ""
client_pass = ""
if not client_user or not client_pass:
debug("[add-file] ERROR: Soulseek credentials missing (set [provider=soulseek] username/password in config.conf)")
return None
2025-12-11 19:04:02 -08:00
# Determine output directory (prefer downloads folder in config)
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
output_dir.mkdir(parents=True, exist_ok=True)
# Run async download in event loop
try:
loop = asyncio.get_event_loop()
if loop.is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
downloaded_path = loop.run_until_complete(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
2025-12-16 01:45:01 -08:00
client_username=client_user,
client_password=client_pass,
2025-12-11 19:04:02 -08:00
timeout=1200 # 20 minutes
)
)
return downloaded_path
except Exception as e:
log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr)
debug(f"[add-file] Soulseek download traceback: {e}")
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_provider_upload(
media_path: Path,
provider_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a file provider (e.g. 0x0)."""
2025-12-12 21:55:38 -08:00
from ProviderCore.registry import get_file_provider
2025-12-11 12:47:30 -08:00
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
try:
file_provider = get_file_provider(provider_name, config)
2025-12-11 12:47:30 -08:00
if not file_provider:
2025-11-27 10:59:01 -08:00
log(f"File provider '{provider_name}' not available", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
hoster_url = file_provider.upload(str(media_path))
log(f"File uploaded: {hoster_url}", file=sys.stderr)
# Associate URL with Hydrus if possible
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if f_hash:
2025-11-25 20:09:33 -08:00
try:
2025-12-16 01:45:01 -08:00
# Only associate when we can target an explicit Hydrus store backend.
# Do not fall back to a global/default Hydrus client.
store_name = getattr(pipe_obj, "store", None)
if store_name:
store = Store(config)
backend = store[str(store_name)]
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "associate_url"):
client.associate_url(str(f_hash), hoster_url)
2025-12-11 12:47:30 -08:00
except Exception:
pass
2025-11-25 20:09:33 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
log(f"Upload failed: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
# Update PipeObject and emit
extra_updates: Dict[str, Any] = {
"provider": provider_name,
"provider_url": hoster_url,
}
if isinstance(pipe_obj.extra, dict):
# Also track hoster URL as a url for downstream steps
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store=provider_name or "provider",
2025-12-11 19:04:02 -08:00
path=file_path,
2025-12-11 23:21:45 -08:00
tag=pipe_obj.tag,
2025-12-11 12:47:30 -08:00
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_storage_backend(
2025-12-14 00:53:52 -08:00
result: Any,
2025-12-11 12:47:30 -08:00
media_path: Path,
backend_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
2025-12-14 00:53:52 -08:00
*,
collect_payloads: Optional[List[Dict[str, Any]]] = None,
2025-12-16 01:45:01 -08:00
collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None,
2025-12-14 00:53:52 -08:00
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
2025-12-11 12:47:30 -08:00
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
2025-12-14 00:53:52 -08:00
delete_after_effective = bool(delete_after)
if not delete_after_effective:
# When download-media is piped into add-file, the downloaded artifact is a temp file.
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
try:
if (
str(backend_name or "").strip().lower() != "temp"
and getattr(pipe_obj, "is_temp", False)
and getattr(pipe_obj, "action", None) == "cmdlet:download-media"
):
from config import resolve_output_dir
temp_dir = resolve_output_dir(config)
try:
if media_path.resolve().is_relative_to(temp_dir.expanduser().resolve()):
delete_after_effective = True
debug(f"[add-file] Auto-delete temp source after ingest: {media_path}")
except Exception:
# If path resolution fails, fall back to non-destructive behavior
pass
except Exception:
pass
2025-11-25 20:09:33 -08:00
try:
2025-12-11 19:04:02 -08:00
store = Store(config)
backend = store[backend_name]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Prepare metadata from pipe_obj and sidecars
2025-12-14 00:53:52 -08:00
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
2025-12-16 01:45:01 -08:00
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)
if isinstance(rels, dict) and rels:
king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels)
if king_hash and alt_hashes:
bucket = collect_relationship_pairs.setdefault(str(backend_name), set())
for alt_hash in alt_hashes:
if alt_hash and alt_hash != king_hash:
bucket.add((alt_hash, king_hash))
# Relationships must never be stored as tags.
if isinstance(tags, list) and tags:
tags = [t for t in tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
media_path,
title=title,
2025-12-14 00:53:52 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
url=url
2025-11-25 20:09:33 -08:00
)
2025-12-11 12:47:30 -08:00
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
stored_path: Optional[str] = None
2025-12-16 01:45:01 -08:00
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (often with an access key) and should
# only be invoked by explicit user commands (e.g. get-file).
2025-12-11 19:04:02 -08:00
try:
2025-12-16 01:45:01 -08:00
if type(backend).__name__ == "Folder":
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
stored_path = maybe_path
2025-12-11 19:04:02 -08:00
except Exception:
stored_path = None
2025-11-30 11:39:04 -08:00
2025-12-11 12:47:30 -08:00
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store=backend_name,
2025-12-11 19:04:02 -08:00
path=stored_path,
2025-12-11 23:21:45 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
title=title or pipe_obj.title or media_path.name,
extra_updates={
"url": url,
},
2025-11-30 11:39:04 -08:00
)
2025-12-13 12:09:50 -08:00
# Emit a search-store-like payload for consistent tables and natural piping.
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
resolved_hash = file_identifier if len(file_identifier) == 64 else (f_hash or file_identifier or "unknown")
2025-12-14 00:53:52 -08:00
# If we have url(s), ensure they get associated with the destination file.
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
if url:
try:
backend.add_url(resolved_hash, list(url))
except Exception:
pass
2025-12-13 12:09:50 -08:00
meta: Dict[str, Any] = {}
try:
meta = backend.get_metadata(resolved_hash) or {}
except Exception:
meta = {}
# Determine size bytes
size_bytes: Optional[int] = None
for key in ("size_bytes", "size", "filesize", "file_size"):
try:
raw_size = meta.get(key)
if raw_size is not None:
size_bytes = int(raw_size)
break
except Exception:
pass
if size_bytes is None:
try:
size_bytes = int(media_path.stat().st_size)
except Exception:
size_bytes = None
# Determine title/ext
title_out = (
meta.get("title")
or title
or pipe_obj.title
or media_path.stem
or media_path.name
)
ext_out = (meta.get("ext") or media_path.suffix.lstrip("."))
payload: Dict[str, Any] = {
"title": title_out,
"ext": str(ext_out or ""),
"size_bytes": size_bytes,
"store": backend_name,
"hash": resolved_hash,
# Preserve extra fields for downstream commands (kept hidden by default table rules)
"path": stored_path,
"tag": list(tags or []),
"url": list(url or []),
}
2025-12-14 00:53:52 -08:00
if collect_payloads is not None:
try:
collect_payloads.append(payload)
except Exception:
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
# canonical search-store payload/table for piping/selection consistency.
if auto_search_store and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False)
2025-12-16 01:45:01 -08:00
refreshed_items = Add_File._try_emit_search_store_by_hash(
2025-12-14 00:53:52 -08:00
store=backend_name,
hash_value=resolved_hash,
config=config,
)
2025-12-16 01:45:01 -08:00
if refreshed_items:
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items:
ctx.emit(emitted)
else:
2025-12-14 00:53:52 -08:00
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx.emit(payload)
else:
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=True)
2025-12-11 12:47:30 -08:00
2025-12-14 00:53:52 -08:00
Add_File._cleanup_after_success(media_path, delete_source=delete_after_effective)
2025-12-11 12:47:30 -08:00
return 0
2025-11-30 11:39:04 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
2025-11-30 11:39:04 -08:00
return 1
2025-12-11 12:47:30 -08:00
# --- Helpers ---
@staticmethod
def _load_sidecar_bundle(
media_path: Path,
2025-12-11 19:04:02 -08:00
store: Optional[str],
2025-12-11 12:47:30 -08:00
config: Dict[str, Any],
) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]:
"""Load sidecar metadata."""
2025-12-11 19:04:02 -08:00
if store and store.lower() == "local":
2025-11-30 11:39:04 -08:00
try:
2025-12-11 12:47:30 -08:00
from config import get_local_storage_path
db_root = get_local_storage_path(config)
if db_root:
2025-12-11 19:04:02 -08:00
with API_folder_store(Path(db_root)) as db:
2025-12-11 12:47:30 -08:00
file_hash = db.get_file_hash(media_path)
if file_hash:
tags = db.get_tags(file_hash) or []
metadata = db.get_metadata(file_hash) or {}
url = metadata.get("url") or []
f_hash = metadata.get("hash") or file_hash
if tags or url or f_hash:
return None, f_hash, tags, url
except Exception:
pass
2025-11-30 11:39:04 -08:00
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
sidecar_path = find_sidecar(media_path)
if sidecar_path and sidecar_path.exists():
h, t, u = read_sidecar(sidecar_path)
return sidecar_path, h, t or [], u or []
except Exception:
pass
return None, None, [], []
@staticmethod
def _resolve_file_hash(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
fallback_hash: Optional[str],
) -> Optional[str]:
if pipe_obj.hash and pipe_obj.hash != "unknown":
return pipe_obj.hash
if fallback_hash:
return fallback_hash
if isinstance(result, dict):
candidate = result.get('hash')
if candidate:
return str(candidate)
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
return sha256_file(media_path)
except Exception:
return None
@staticmethod
def _resolve_media_kind(path: Path) -> str:
# Reusing logic
suffix = path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
@staticmethod
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: List[str],
url: List[str],
f_hash: Optional[str],
relationships: Any,
duration: Any,
media_kind: str,
):
payload = {
'hash': f_hash,
'url': url,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
2025-12-11 19:04:02 -08:00
with API_folder_store(library_root) as db:
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
db.save_file_info(dest_path, payload, tags)
except Exception as exc:
log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr)
@staticmethod
def _copy_sidecars(source_path: Path, target_path: Path):
possible_sidecars = [
source_path.with_suffix(source_path.suffix + ".json"),
source_path.with_name(source_path.name + ".tag"),
source_path.with_name(source_path.name + ".metadata"),
source_path.with_name(source_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(source_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(sc), dest_sidecar)
except Exception:
pass
2025-12-07 00:21:30 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _cleanup_after_success(media_path: Path, delete_source: bool):
if not delete_source:
return
# Check if it's a temp file that should always be deleted
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
if delete_source or is_temp_merge:
log(f"Deleting source file...", file=sys.stderr)
try:
media_path.unlink()
Add_File._cleanup_sidecar_files(media_path)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
@staticmethod
def _cleanup_sidecar_files(media_path: Path):
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tag'),
]
for target in targets:
try:
if target.exists():
target.unlink()
except Exception:
pass
# Create and register the cmdlet
CMDLET = Add_File()