Files
Medios-Macina/cmdlet/file/add.py
T

3204 lines
124 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
2026-01-02 02:28:59 -08:00
from typing import Any, Dict, Optional, Sequence, Tuple, List
2025-11-25 20:09:33 -08:00
from pathlib import Path
import sys
2025-12-11 12:47:30 -08:00
import shutil
2026-01-04 02:23:50 -08:00
import tempfile
2025-12-16 01:45:01 -08:00
import re
2026-01-24 01:38:12 -08:00
from urllib.parse import urlparse
2025-11-25 20:09:33 -08:00
from SYS import models
from SYS import pipeline as ctx
from SYS.logger import log, debug, debug_panel
2026-03-25 22:39:30 -07:00
from SYS.payload_builders import build_table_result_payload
2025-12-22 02:11:53 -08:00
from SYS.pipeline_progress import PipelineProgress
2026-03-25 22:39:30 -07:00
from SYS.result_publication import overlay_existing_result_table, publish_result_table
from SYS.rich_display import show_available_plugins_panel, show_plugin_config_panel
2025-12-14 00:53:52 -08:00
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
2026-05-23 13:49:47 -07:00
from PluginCore.backend_registry import BackendRegistry
2026-05-26 15:32:01 -07:00
from API.HTTP import download_direct_file
2026-05-04 18:41:01 -07:00
from .. import _shared as sh
2025-12-16 23:23:43 -08:00
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
SharedArgs = sh.SharedArgs
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
merge_sequences = sh.merge_sequences
extract_relationships = sh.extract_relationships
extract_duration = sh.extract_duration
coerce_to_pipe_object = sh.coerce_to_pipe_object
2026-05-26 15:32:01 -07:00
collapse_namespace_tags = sh.collapse_namespace_tags
2026-01-12 04:05:52 -08:00
resolve_target_dir = sh.resolve_target_dir
resolve_media_kind_by_extension = sh.resolve_media_kind_by_extension
coerce_to_path = sh.coerce_to_path
build_pipeline_preview = sh.build_pipeline_preview
get_field = sh.get_field
2026-01-24 01:38:12 -08:00
from SYS.utils import sha256_file, unique_path, sanitize_filename
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
# Canonical supported filetypes for all stores/cmdlets
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
2025-11-25 20:09:33 -08:00
class _CommandDependencies:
2026-05-23 13:49:47 -07:00
"""Command-scope cache for the backend registry and plugin instances."""
def __init__(self, config: Dict[str, Any]) -> None:
self.config = config
2026-05-23 13:49:47 -07:00
self._backend_registry: Optional[BackendRegistry] = None
self._plugins: Dict[str, Any] = {}
2026-05-23 13:49:47 -07:00
def get_backend_registry(self) -> Optional[BackendRegistry]:
"""Lazily initialize and return the command-scope backend registry."""
if self._backend_registry is None:
try:
2026-05-23 13:49:47 -07:00
self._backend_registry = BackendRegistry(self.config)
except Exception:
2026-05-23 13:49:47 -07:00
self._backend_registry = None
return self._backend_registry
def get_plugin(self, name: str) -> Optional[Any]:
"""Cached plugin lookup by name."""
2026-05-21 16:19:17 -07:00
from PluginCore.registry import get_plugin
norm_name = str(name or "").strip().lower()
if not norm_name:
return None
if norm_name in self._plugins:
return self._plugins[norm_name]
plugin = get_plugin(norm_name, self.config)
self._plugins[norm_name] = plugin
return plugin
def get_plugin_with_capability(self, name: str, capability: str) -> Optional[Any]:
"""Cached plugin lookup with capability check."""
2026-05-21 16:19:17 -07:00
from PluginCore.registry import get_plugin_with_capability
norm_name = str(name or "").strip().lower()
if not norm_name:
return None
cache_key = f"{norm_name}#{capability}"
if cache_key in self._plugins:
return self._plugins[cache_key]
plugin = get_plugin_with_capability(norm_name, capability, self.config)
self._plugins[cache_key] = plugin
return plugin
2026-02-25 17:35:38 -08:00
_REMOTE_URL_PREFIXES: tuple[str, ...] = (
2026-05-04 15:58:24 -07:00
"http://", "https://", "ftp://", "ftps://", "magnet:", "torrent:", "tidal:", "hydrus:",
2026-02-25 17:35:38 -08:00
)
2026-01-02 02:28:59 -08:00
2025-12-30 23:19:02 -08:00
def _maybe_apply_florencevision_tags(
media_path: Path,
tags: List[str],
config: Dict[str, Any],
pipe_obj: Optional[models.PipeObject] = None,
) -> List[str]:
2026-05-26 15:32:01 -07:00
"""Optionally auto-tag images using the FlorenceVision plugin helper.
2025-12-30 23:19:02 -08:00
Controlled via config:
2026-05-26 15:32:01 -07:00
[plugin=florencevision]
2025-12-30 23:19:02 -08:00
enabled=true
strict=false
If strict=false (default), failures log a warning and return the original tags.
If strict=true, failures raise to abort the ingest.
"""
2026-01-02 02:28:59 -08:00
strict = False
2025-12-30 23:19:02 -08:00
try:
2026-05-26 15:32:01 -07:00
plugin_block = (config or {}).get("plugin")
fv_block = plugin_block.get("florencevision") if isinstance(plugin_block, dict) else None
2025-12-30 23:19:02 -08:00
enabled = False
if isinstance(fv_block, dict):
enabled = bool(fv_block.get("enabled"))
strict = bool(fv_block.get("strict"))
if not enabled:
return tags
2026-05-26 15:32:01 -07:00
from plugins.florencevision import FlorenceVisionTool
2025-12-30 23:19:02 -08:00
# Special-case: if this file was produced by the `screen-shot` cmdlet,
# OCR is more useful than caption/detection for tagging screenshots.
cfg_for_tool: Dict[str, Any] = config
try:
action = str(getattr(pipe_obj, "action", "") or "") if pipe_obj is not None else ""
cmdlet_name = ""
if action.lower().startswith("cmdlet:"):
cmdlet_name = action.split(":", 1)[1].strip().lower()
if cmdlet_name in {"screen-shot", "screen_shot", "screenshot"}:
2026-05-26 15:32:01 -07:00
plugin_block2 = dict((config or {}).get("plugin") or {})
fv_block2 = dict(plugin_block2.get("florencevision") or {})
2025-12-30 23:19:02 -08:00
fv_block2["task"] = "ocr"
2026-05-26 15:32:01 -07:00
plugin_block2["florencevision"] = fv_block2
2025-12-30 23:19:02 -08:00
cfg_for_tool = dict(config or {})
2026-05-26 15:32:01 -07:00
cfg_for_tool["plugin"] = plugin_block2
2025-12-30 23:19:02 -08:00
except Exception:
cfg_for_tool = config
fv = FlorenceVisionTool(cfg_for_tool)
if not fv.enabled() or not fv.applicable_path(media_path):
return tags
auto_tags = fv.tags_for_file(media_path)
# Capture caption (if any) into PipeObject notes for downstream persistence.
try:
caption_text = getattr(fv, "last_caption", None)
if caption_text and pipe_obj is not None:
if not isinstance(pipe_obj.extra, dict):
pipe_obj.extra = {}
notes = pipe_obj.extra.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("caption", caption_text)
pipe_obj.extra["notes"] = notes
except Exception:
pass
if not auto_tags:
return tags
merged = merge_sequences(tags or [], auto_tags, case_sensitive=False)
debug(f"[add-file] FlorenceVision added {len(auto_tags)} tag(s)")
return merged
except Exception as exc:
# Decide strictness from config if we couldn't read it above.
strict2 = False
try:
tool_block = (config or {}).get("tool")
fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None
strict2 = bool(fv_block.get("strict")) if isinstance(fv_block, dict) else False
except Exception:
strict2 = False
if strict or strict2:
raise
log(f"[add-file] Warning: FlorenceVision tagging failed: {exc}", file=sys.stderr)
return tags
2025-12-11 12:47:30 -08:00
class Add_File(Cmdlet):
"""Add file into the DB"""
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
def __init__(self) -> None:
"""Initialize add-file cmdlet."""
super().__init__(
name="add-file",
summary=
"Ingest a local media file to a configured store or plugin destination.",
usage=
2026-05-24 12:32:57 -07:00
"add-file (<source> | <piped>) (-instance <store-name> | -plugin <plugin> [-instance <name|path>]) [-delete]",
2025-12-11 12:47:30 -08:00
arg=[
2026-05-24 12:32:57 -07:00
CmdletArg(
name="source",
type="string",
required=False,
description="Local file or directory path to ingest or scan.",
),
SharedArgs.INSTANCE,
2026-03-21 22:56:37 -07:00
SharedArgs.URL,
SharedArgs.PLUGIN,
2025-12-29 17:05:03 -08:00
CmdletArg(
name="delete",
type="flag",
required=False,
description="Delete file after successful upload",
alias="del",
),
2025-12-11 12:47:30 -08:00
],
detail=[
2026-01-02 02:28:59 -08:00
"Note: add-file ingests local files. To fetch remote sources, use download-file and pipe into add-file.",
"- Store options (use -instance without -plugin):",
2025-12-11 12:47:30 -08:00
" hydrus: Upload to Hydrus database with metadata tagging",
"- Plugin options (use -plugin):",
" local: Copy file to a configured local destination or direct path via -instance",
2025-12-11 12:47:30 -08:00
" 0x0: Upload to 0x0.st for temporary hosting",
2025-12-26 21:04:09 -08:00
" file.io: Upload to file.io for temporary hosting",
" internetarchive: Upload to archive.org (optional tag: ia:<identifier> to upload into an existing item)",
2026-05-24 12:32:57 -07:00
"- Use a positional source path with -instance and -plugin to target a named provider config: add-file C:\\Media\\file.pdf -plugin ftp -instance archive",
2025-12-11 12:47:30 -08:00
],
2026-01-03 03:37:48 -08:00
examples=[
2026-05-03 21:20:05 -07:00
'download-file "https://themathesontrust.org/papers/christianity/alcock-alphabet1.pdf" | add-file -instance tutorial',
'@1 | add-file -plugin local -instance C:\\Users\\Me\\Downloads',
2026-05-24 12:32:57 -07:00
'add-file C:\\Media\\report.pdf -plugin ftp -instance archive',
2026-01-03 03:37:48 -08:00
],
2025-12-11 12:47:30 -08:00
exec=self.run,
)
self.register()
2025-11-25 20:09:33 -08:00
2026-05-24 12:32:57 -07:00
@staticmethod
def _uses_legacy_path_flag(args: Sequence[str]) -> bool:
for token in args or []:
lowered = str(token or "").strip().lower()
if lowered in {"-path", "--path", "-p"}:
return True
return False
@staticmethod
def _legacy_path_flag_message() -> str:
return (
"add-file no longer supports -path. Pass the source file or directory as a positional argument, "
"and use -plugin local -instance <name|path> for local export."
)
2025-12-11 12:47:30 -08:00
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
2026-05-24 12:32:57 -07:00
if Add_File._uses_legacy_path_flag(args):
log(Add_File._legacy_path_flag_message(), file=sys.stderr)
return 1
2025-12-11 12:47:30 -08:00
parsed = parse_cmdlet_args(args, self)
2025-12-22 02:11:53 -08:00
progress = PipelineProgress(ctx)
2025-11-25 20:09:33 -08:00
# Initialize command-scope dependency context (caches Store/plugins)
deps = _CommandDependencies(config)
2026-05-23 13:49:47 -07:00
storage_registry = deps.get_backend_registry()
2026-01-12 04:05:52 -08:00
2026-05-24 12:32:57 -07:00
source_arg = parsed.get("source")
2026-05-03 21:20:05 -07:00
location = parsed.get("instance")
plugin_instance = parsed.get("instance")
2026-03-21 22:56:37 -07:00
source_url_arg = parsed.get("url")
plugin_name = parsed.get("plugin")
2025-12-11 12:47:30 -08:00
delete_after = parsed.get("delete", False)
local_export_destination: Optional[str] = None
if plugin_name and not plugin_instance and location:
plugin_instance = location
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx
is None) or bool(getattr(stage_ctx,
"is_last_stage",
False))
2026-04-16 17:18:50 -07:00
has_downstream_stage = bool(stage_ctx is not None and not is_last_stage)
2025-12-14 00:53:52 -08:00
2025-12-27 06:05:07 -08:00
# Directory-mode selector:
2026-05-24 12:32:57 -07:00
# - Terminal use: `add-file <DIR> -instance X` shows a selectable table.
# - Pipelined use: `add-file <DIR> -instance X | ...` processes the full batch
2026-04-16 17:18:50 -07:00
# immediately so downstream stages receive the uploaded items.
2026-05-24 12:32:57 -07:00
# - Selection replay: `@N` re-runs add-file with `file1,file2,...` as the source token.
2025-12-27 06:05:07 -08:00
dir_scan_mode = False
dir_scan_results: Optional[List[Dict[str, Any]]] = None
2026-05-24 12:32:57 -07:00
explicit_source_list_results: Optional[List[Dict[str, Any]]] = None
2025-12-27 06:05:07 -08:00
2026-05-24 12:32:57 -07:00
if source_arg and location and not plugin_name:
# Support comma-separated source lists: "file1,file2,file3"
2025-12-27 06:05:07 -08:00
# This is the mechanism used by @N expansion for directory tables.
try:
2026-05-24 12:32:57 -07:00
source_text = str(source_arg)
2025-12-27 06:05:07 -08:00
except Exception:
2026-05-24 12:32:57 -07:00
source_text = ""
2025-12-27 06:05:07 -08:00
2026-05-24 12:32:57 -07:00
if "," in source_text:
parts = [p.strip().strip('"') for p in source_text.split(",")]
2025-12-27 06:05:07 -08:00
parts = [p for p in parts if p]
batch: List[Dict[str, Any]] = []
for p in parts:
try:
file_path = Path(p)
except Exception:
continue
if not file_path.exists() or not file_path.is_file():
continue
ext = file_path.suffix.lower()
if ext not in SUPPORTED_MEDIA_EXTENSIONS:
continue
try:
hv = sha256_file(file_path)
except Exception:
continue
try:
size = file_path.stat().st_size
except Exception:
size = 0
2025-12-29 17:05:03 -08:00
batch.append(
{
"path": file_path,
"name": file_path.name,
"hash": hv,
"size": size,
"ext": ext,
}
)
2025-12-27 06:05:07 -08:00
if batch:
2026-05-24 12:32:57 -07:00
explicit_source_list_results = batch
# Clear source_arg so add-file doesn't treat it as a single path.
source_arg = None
2025-12-27 06:05:07 -08:00
else:
# Directory scan (selector table, no ingest yet)
try:
2026-05-24 12:32:57 -07:00
candidate_dir = Path(str(source_arg))
2025-12-27 06:05:07 -08:00
if candidate_dir.exists() and candidate_dir.is_dir():
dir_scan_mode = True
debug(
f"[add-file] Scanning directory for batch add: {candidate_dir}"
)
dir_scan_results = Add_File._scan_directory_for_files(
candidate_dir
)
2025-12-27 06:05:07 -08:00
if dir_scan_results:
2025-12-29 17:05:03 -08:00
debug(
f"[add-file] Found {len(dir_scan_results)} supported files in directory"
)
2026-05-24 12:32:57 -07:00
# Clear source_arg so it doesn't trigger single-item mode.
source_arg = None
2025-12-27 06:05:07 -08:00
except Exception as exc:
debug(f"[add-file] Directory scan failed: {exc}")
2026-05-24 12:32:57 -07:00
if result is None and not source_arg and not explicit_source_list_results and not dir_scan_results:
2026-04-30 18:56:22 -07:00
try:
if ctx.get_stage_context() is not None:
return 0
except Exception:
pass
2026-05-03 21:20:05 -07:00
# Determine if -instance targets a registered backend (vs a filesystem export path).
2025-12-16 01:45:01 -08:00
is_storage_backend_location = False
if location:
try:
2026-05-23 13:49:47 -07:00
backend_registry_for_lookup = storage_registry or deps.get_backend_registry()
is_storage_backend_location = Add_File._resolve_backend_by_name(backend_registry_for_lookup, str(location)) is not None
2025-12-16 01:45:01 -08:00
except Exception:
is_storage_backend_location = False
2026-04-21 10:31:38 -07:00
if location and not plugin_name and not is_storage_backend_location:
resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target(
location,
config,
deps=deps,
require_explicit=True,
)
if resolved_local_path:
plugin_name = "local"
plugin_instance = resolved_local_instance or str(location)
location = None
local_export_destination = resolved_local_path
else:
2026-04-21 10:31:38 -07:00
log(
f"Storage backend '{location}' not found. Use -plugin local -instance <name|path> for local export or configure that store backend.",
2026-04-21 10:31:38 -07:00
file=sys.stderr,
)
return 1
normalized_plugin_name = Add_File._normalize_provider_key(plugin_name)
if normalized_plugin_name == "local":
resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target(
plugin_instance or location,
config,
deps=deps,
require_explicit=bool(plugin_instance or location),
)
if not resolved_local_path:
requested_local = str(plugin_instance or location or "").strip() or "<default>"
log(
f"Local destination '{requested_local}' is not configured. Use -plugin local -instance <name|path>.",
file=sys.stderr,
)
return 1
plugin_name = "local"
plugin_instance = resolved_local_instance or str(plugin_instance or location or "").strip() or None
location = None
local_export_destination = resolved_local_path
2026-04-30 18:56:22 -07:00
plugin_storage_backend = None
if plugin_name:
plugin_storage_backend = Add_File._resolve_plugin_storage_backend(
plugin_name,
plugin_instance,
config,
store_instance=storage_registry,
deps=deps,
2026-04-30 18:56:22 -07:00
)
effective_storage_backend_name = plugin_storage_backend or (
str(location) if location and is_storage_backend_location else None
)
2025-12-14 00:53:52 -08:00
# Decide which items to process.
2025-12-27 06:05:07 -08:00
# - If directory scan was performed, use those results
2026-05-24 12:32:57 -07:00
# - If user provided a positional source path, treat this invocation as single-item.
2025-12-14 00:53:52 -08:00
# - Otherwise, if piped input is a list, ingest each item.
2026-05-24 12:32:57 -07:00
if explicit_source_list_results:
items_to_process = explicit_source_list_results
debug(f"[add-file] Using {len(items_to_process)} files from source list")
2025-12-27 06:05:07 -08:00
elif dir_scan_results:
items_to_process = dir_scan_results
debug(f"[add-file] Using {len(items_to_process)} files from directory scan")
2026-05-24 12:32:57 -07:00
elif source_arg:
2025-12-14 00:53:52 -08:00
items_to_process: List[Any] = [result]
elif isinstance(result, list) and result:
items_to_process = list(result)
else:
items_to_process = [result]
2025-12-11 12:47:30 -08:00
2026-01-05 07:51:19 -08:00
total_items = len(items_to_process) if isinstance(items_to_process, list) else 0
processed_items = 0
2026-04-16 17:18:50 -07:00
try:
ui, _ = progress.ui_and_pipe_index()
if ui is not None and total_items:
preview_items = (
list(items_to_process)
if isinstance(items_to_process, list) else [items_to_process]
)
progress.begin_pipe(
total_items=total_items,
items_preview=preview_items,
)
except Exception:
pass
2026-01-05 07:51:19 -08:00
try:
if total_items:
progress.set_percent(0)
except Exception:
pass
2025-12-22 02:11:53 -08:00
# Minimal step-based progress for single-item runs.
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
use_steps = False
steps_started = False
try:
ui, _ = progress.ui_and_pipe_index()
use_steps = (ui is not None) and (len(items_to_process) == 1)
2026-01-15 00:45:42 -08:00
if use_steps:
progress.begin_steps(5)
steps_started = True
2025-12-22 02:11:53 -08:00
except Exception:
use_steps = False
2026-01-02 02:28:59 -08:00
# add-file is ingestion-only: it does not download URLs here.
2026-04-16 17:18:50 -07:00
should_present_directory_selector = bool(dir_scan_mode and not has_downstream_stage)
if dir_scan_mode and has_downstream_stage:
debug(
"[add-file] Continuing with directory batch ingest because downstream stages exist"
)
# If this invocation was terminal directory selector mode, show a selectable table and stop.
2026-05-24 12:32:57 -07:00
# The user then runs @N (optionally piped), which replays add-file with selected source paths.
2026-04-16 17:18:50 -07:00
if should_present_directory_selector:
2025-12-27 06:05:07 -08:00
try:
2026-01-18 10:50:42 -08:00
from SYS.result_table import Table
2025-12-27 06:05:07 -08:00
from pathlib import Path as _Path
base_args: List[str] = []
2026-05-24 12:32:57 -07:00
if plugin_name:
base_args.extend(["-plugin", str(plugin_name)])
if location:
base_args.extend(["-instance", str(location)])
if source_url_arg:
base_args.extend(["-url", str(source_url_arg)])
if bool(delete_after):
base_args.append("-delete")
2025-12-27 06:05:07 -08:00
2026-01-18 10:50:42 -08:00
table = Table(title="Files in Directory", preserve_order=True)
2025-12-27 06:05:07 -08:00
table.set_table("add-file.directory")
table.set_source_command("add-file", base_args)
rows: List[Dict[str, Any]] = []
2025-12-29 17:05:03 -08:00
for file_info in dir_scan_results or []:
2025-12-27 06:05:07 -08:00
p = file_info.get("path")
hp = str(file_info.get("hash") or "")
name = str(file_info.get("name") or "unknown")
try:
clean_title = _Path(name).stem
except Exception:
clean_title = name
ext = str(file_info.get("ext") or "").lstrip(".")
size = file_info.get("size", 0)
2026-03-25 22:39:30 -07:00
row_item = build_table_result_payload(
title=clean_title,
columns=[
("Title", clean_title),
("Hash", hp),
("Size", size),
("Ext", ext),
2025-12-27 06:05:07 -08:00
],
2026-05-24 12:32:57 -07:00
selection_args=[str(p) if p is not None else ""],
2026-03-25 22:39:30 -07:00
path=str(p) if p is not None else "",
hash=hp,
)
2025-12-27 06:05:07 -08:00
rows.append(row_item)
table.add_result(row_item)
ctx.set_current_stage_table(table)
ctx.set_last_result_table(
table,
rows,
subject={
"table": "add-file.directory"
}
)
2025-12-27 06:05:07 -08:00
log(f"✓ Found {len(rows)} files. Select with @N (e.g., @1 or @1-3).")
return 0
except Exception as exc:
debug(
f"[add-file] Failed to display directory scan result table: {exc}"
)
2025-12-27 06:05:07 -08:00
2025-12-14 00:53:52 -08:00
collected_payloads: List[Dict[str, Any]] = []
pending_relationship_pairs: Dict[str,
set[tuple[str,
str]]] = {}
pending_url_associations: Dict[str,
List[tuple[str,
List[str]]]] = {}
2026-01-19 03:14:30 -08:00
pending_tag_associations: Dict[str,
List[tuple[str,
List[str]]]] = {}
2025-12-14 00:53:52 -08:00
successes = 0
failures = 0
2025-12-11 12:47:30 -08:00
2026-05-03 21:20:05 -07:00
# When add-file -instance is the last stage, always show a final search-file table.
2025-12-16 01:45:01 -08:00
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
2026-01-14 21:53:07 -08:00
live_progress = None
try:
live_progress = ctx.get_live_progress()
except Exception:
live_progress = None
2025-12-30 23:19:02 -08:00
want_final_search_file = (
2026-04-30 18:56:22 -07:00
bool(is_last_stage)
and bool(effective_storage_backend_name)
and bool(live_progress)
2025-12-29 17:05:03 -08:00
)
2025-12-30 23:19:02 -08:00
auto_search_file_after_add = False
2025-12-11 12:47:30 -08:00
2025-12-20 23:57:44 -08:00
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
2025-12-29 17:05:03 -08:00
defer_url_association = (
2026-04-30 18:56:22 -07:00
bool(effective_storage_backend_name)
and len(items_to_process) > 1
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
2026-01-05 07:51:19 -08:00
for idx, item in enumerate(items_to_process, 1):
2025-12-14 00:53:52 -08:00
pipe_obj = coerce_to_pipe_object(item, path_arg)
2025-12-11 12:47:30 -08:00
2026-03-21 22:56:37 -07:00
if source_url_arg:
try:
from SYS.metadata import normalize_urls
cli_urls = [u.strip() for u in str(source_url_arg).split(",") if u and u.strip()]
merged_urls: List[str] = []
if isinstance(getattr(pipe_obj, "extra", None), dict):
existing_url = pipe_obj.extra.get("url")
if isinstance(existing_url, list):
merged_urls.extend(str(u) for u in existing_url if u)
elif isinstance(existing_url, str) and existing_url.strip():
merged_urls.append(existing_url.strip())
else:
pipe_obj.extra = {}
2026-03-25 22:39:30 -07:00
merged_urls = sh.merge_urls(merged_urls, cli_urls)
2026-03-21 22:56:37 -07:00
if merged_urls:
pipe_obj.extra["url"] = merged_urls
except Exception:
pass
2026-01-05 07:51:19 -08:00
try:
2026-01-12 04:05:52 -08:00
label = pipe_obj.title
2026-01-05 07:51:19 -08:00
if not label and pipe_obj.path:
try:
label = Path(str(pipe_obj.path)).name
except Exception:
label = pipe_obj.path
if not label:
label = "file"
if total_items:
pending_pct = int(round(((idx - 1) / max(1, total_items)) * 100))
progress.set_percent(pending_pct)
progress.set_status(f"adding {idx}/{total_items}: {label}")
except Exception:
pass
2025-12-14 00:53:52 -08:00
temp_dir_to_cleanup: Optional[Path] = None
delete_after_item = delete_after
2025-11-25 20:09:33 -08:00
try:
2026-01-15 00:45:42 -08:00
if use_steps and steps_started:
2026-01-14 18:35:06 -08:00
progress.step("resolving source")
export_destination = (
Path(local_export_destination)
if local_export_destination
else Path(location)
if location and not is_storage_backend_location
else None
)
2026-01-04 02:23:50 -08:00
media_path, file_hash, temp_dir_to_cleanup = self._resolve_source(
2026-04-17 16:17:16 -07:00
item,
2026-05-24 12:32:57 -07:00
source_arg,
2026-04-17 16:17:16 -07:00
pipe_obj,
config,
export_destination=export_destination,
2026-04-17 16:17:16 -07:00
store_instance=storage_registry,
deps=deps,
2025-12-29 17:05:03 -08:00
)
if not media_path and plugin_name:
media_path, file_hash, temp_dir_to_cleanup = Add_File._download_piped_source(
pipe_obj, config, storage_registry, deps=deps
2026-01-23 21:32:34 -08:00
)
2026-01-02 02:28:59 -08:00
if not media_path:
2025-12-14 00:53:52 -08:00
failures += 1
continue
2026-01-15 00:45:42 -08:00
# Update pipe_obj with resolved path
pipe_obj.path = str(media_path)
2026-05-24 12:32:57 -07:00
# Local/plugin exports can accept any file type.
# Storage backends stay restricted to SUPPORTED_MEDIA_EXTENSIONS.
2026-04-30 18:56:22 -07:00
allow_all_files = not bool(effective_storage_backend_name)
2026-01-15 00:45:42 -08:00
if not self._validate_source(media_path, allow_all_extensions=allow_all_files):
failures += 1
continue
2026-01-14 22:21:19 -08:00
if use_steps and steps_started:
2026-01-15 00:45:42 -08:00
if not file_hash:
progress.step("hashing file")
2026-01-14 18:35:06 -08:00
progress.step("ingesting file")
2025-12-14 00:53:52 -08:00
if plugin_name:
2026-04-30 18:56:22 -07:00
if effective_storage_backend_name:
code = self._handle_storage_backend(
item,
media_path,
effective_storage_backend_name,
pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
defer_tag_association=defer_url_association,
pending_tag_associations=pending_tag_associations,
suppress_last_stage_overlay=want_final_search_file,
auto_search_file=auto_search_file_after_add,
store_instance=storage_registry,
)
else:
code = self._handle_plugin_upload(
media_path,
plugin_name,
plugin_instance,
pipe_obj,
config,
delete_after_item
)
2025-12-14 00:53:52 -08:00
if code == 0:
successes += 1
else:
failures += 1
continue
if location:
try:
2026-05-23 13:49:47 -07:00
backend_registry = storage_registry or deps.get_backend_registry()
resolved_backend = Add_File._resolve_backend_by_name(backend_registry, str(location))
2026-03-31 23:30:57 -07:00
if resolved_backend is not None:
2025-12-14 00:53:52 -08:00
code = self._handle_storage_backend(
item,
media_path,
location,
pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
2025-12-16 01:45:01 -08:00
collect_relationship_pairs=pending_relationship_pairs,
2025-12-20 23:57:44 -08:00
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
2026-01-19 03:14:30 -08:00
defer_tag_association=defer_url_association,
pending_tag_associations=pending_tag_associations,
2025-12-30 23:19:02 -08:00
suppress_last_stage_overlay=want_final_search_file,
auto_search_file=auto_search_file_after_add,
2026-01-12 04:05:52 -08:00
store_instance=storage_registry,
2025-12-14 00:53:52 -08:00
)
else:
log(f"Invalid storage backend: {location}", file=sys.stderr)
code = 1
2025-12-14 00:53:52 -08:00
except Exception as exc:
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
failures += 1
continue
if code == 0:
successes += 1
else:
failures += 1
continue
log("No destination specified", file=sys.stderr)
failures += 1
finally:
if temp_dir_to_cleanup is not None:
try:
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
except Exception:
pass
2026-01-05 07:51:19 -08:00
processed_items += 1
try:
pct = int(round((processed_items / max(1, total_items)) * 100))
progress.set_percent(pct)
if processed_items >= total_items:
progress.clear_status()
except Exception:
pass
2025-12-14 00:53:52 -08:00
2025-12-20 23:57:44 -08:00
# Apply deferred url associations (bulk) before showing the final store table.
if pending_url_associations:
try:
Add_File._apply_pending_url_associations(
pending_url_associations,
2026-01-12 04:05:52 -08:00
config,
store_instance=storage_registry
)
2025-12-20 23:57:44 -08:00
except Exception:
pass
2026-01-19 03:14:30 -08:00
# Apply deferred tag associations (bulk) if collected
if pending_tag_associations:
try:
Add_File._apply_pending_tag_associations(
pending_tag_associations,
config,
store_instance=storage_registry
)
except Exception:
pass
2026-05-03 21:20:05 -07:00
# Always end add-file -instance (when last stage) by showing item detail panels.
2026-01-16 04:57:05 -08:00
# Legacy search-file refresh is no longer used for final display.
2025-12-30 23:19:02 -08:00
if want_final_search_file and collected_payloads:
2025-12-14 00:53:52 -08:00
try:
2026-01-16 04:57:05 -08:00
from SYS.rich_display import render_item_details_panel
2026-01-24 23:15:08 -08:00
from SYS.result_table import Table
2025-12-29 17:05:03 -08:00
2026-01-16 04:57:05 -08:00
# Stop the live pipeline progress UI before rendering the details panels.
# This prevents the progress display from lingering on screen.
2026-05-24 12:32:57 -07:00
Add_File._stop_live_progress_for_terminal_render()
2026-01-15 16:26:22 -08:00
2026-01-16 04:57:05 -08:00
subject = collected_payloads[0] if len(collected_payloads) == 1 else collected_payloads
2026-02-02 14:09:42 -08:00
# Use helper to display items and make them @-selectable
2026-05-04 18:41:01 -07:00
from .._shared import display_and_persist_items
2026-02-02 14:09:42 -08:00
display_and_persist_items(collected_payloads, title="Result", subject=subject)
2026-01-24 23:15:08 -08:00
try:
ctx.set_last_result_items_only(list(collected_payloads))
except Exception:
pass
2025-12-16 01:45:01 -08:00
except Exception:
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs:
try:
Add_File._apply_pending_relationships(
pending_relationship_pairs,
2026-01-12 04:05:52 -08:00
config,
store_instance=storage_registry,
deps=deps
)
2025-12-14 00:53:52 -08:00
except Exception:
pass
2025-12-22 02:11:53 -08:00
if use_steps and steps_started:
progress.step("finalized")
2026-01-14 18:35:06 -08:00
# Clear the status so it doesn't linger in the UI
progress.clear_status()
2025-12-22 02:11:53 -08:00
2025-12-14 00:53:52 -08:00
if successes > 0:
return 0
return 1
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
@staticmethod
2025-12-30 23:19:02 -08:00
def _try_emit_search_file_by_hashes(
*,
2026-05-03 21:20:05 -07:00
instance: str,
hash_values: List[str],
config: Dict[str,
2026-01-12 04:05:52 -08:00
Any],
2026-05-23 13:49:47 -07:00
store_instance: Optional[BackendRegistry] = None,
2025-12-29 17:05:03 -08:00
) -> Optional[List[Any]]:
2025-12-30 23:19:02 -08:00
"""Run search-file for a list of hashes and promote the table to a display overlay.
2025-12-16 01:45:01 -08:00
2025-12-30 23:19:02 -08:00
Returns the emitted search-file payload items on success, else None.
2025-12-16 01:45:01 -08:00
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
2026-05-03 21:20:05 -07:00
if not instance or not hashes:
2025-12-16 01:45:01 -08:00
return None
try:
2026-05-04 18:41:01 -07:00
from cmdlet.file.search import CMDLET as search_file_cmdlet
2025-12-16 01:45:01 -08:00
query = "hash:" + ",".join(hashes)
2026-05-03 21:20:05 -07:00
args = ["-instance", str(instance), "-internal-refresh", query]
debug(f'[add-file] Refresh: search-file -instance {instance} "{query}"')
2025-12-16 01:45:01 -08:00
2025-12-30 23:19:02 -08:00
# Run search-file under a temporary stage context so its ctx.emit() calls
2025-12-16 01:45:01 -08:00
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
2025-12-29 17:05:03 -08:00
temp_ctx = ctx.PipelineStageContext(
stage_index=0,
total_stages=1,
pipe_index=0,
worker_id=getattr(prev_ctx,
"worker_id",
None),
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
ctx.set_stage_context(temp_ctx)
try:
2025-12-30 23:19:02 -08:00
code = search_file_cmdlet.run(None, args, config)
2025-12-16 01:45:01 -08:00
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
2025-12-30 23:19:02 -08:00
# Promote the search-file result to a display overlay so the CLI prints it
2025-12-16 01:45:01 -08:00
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
is None) or bool(getattr(stage_ctx,
"is_last_stage",
False))
2025-12-16 01:45:01 -08:00
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
2026-01-15 16:26:22 -08:00
# If we have a single item refresh, render it as a panel immediately
# and suppress the table output from the CLI runner.
if len(items) == 1:
try:
from SYS.rich_display import render_item_details_panel
render_item_details_panel(items[0])
setattr(table, "_rendered_by_cmdlet", True)
except Exception as exc:
debug(f"[add-file] Item details render failed: {exc}")
2026-03-25 22:39:30 -07:00
publish_result_table(
ctx,
table,
items,
subject={
2026-05-03 21:20:05 -07:00
"store": instance,
"hash": hashes
2026-03-25 22:39:30 -07:00
},
overlay=True,
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
except Exception:
pass
return emitted_items
except Exception as exc:
2025-12-29 17:05:03 -08:00
debug(
2025-12-30 23:19:02 -08:00
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
return None
@staticmethod
def _parse_relationship_tag_king_alts(
tag_value: str
) -> tuple[Optional[str],
List[str]]:
2025-12-16 01:45:01 -08:00
"""Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance(tag_value, str):
return None, []
raw = tag_value.strip()
if not raw:
return None, []
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if ":" in raw:
prefix, rest = raw.split(":", 1)
if prefix.strip().lower() == "relationship":
rhs = rest.strip()
# Old typed format: hash(type)HEX
typed = re.findall(r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?", rhs)
if typed:
king: Optional[str] = None
alts: List[str] = []
for rel_type, h in typed:
h_norm = str(h).strip().lower()
if rel_type.strip().lower() == "king":
king = h_norm
elif rel_type.strip().lower() in {"alt",
"related"}:
2025-12-16 01:45:01 -08:00
alts.append(h_norm)
# If the tag omitted king but had hashes, fall back to first hash.
if not king:
all_hashes = [str(h).strip().lower() for _, h in typed]
king = all_hashes[0] if all_hashes else None
alts = [h for h in all_hashes[1:] if h]
# Dedupe alts while preserving order
seen: set[str] = set()
alts = [
h for h in alts
if h and len(h) == 64 and not (h in seen or seen.add(h))
]
2025-12-16 01:45:01 -08:00
if king and len(king) == 64:
return king, [h for h in alts if h != king]
return None, []
# New format: a simple list of hashes, first is king.
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return None, []
king = hashes[0]
alts = hashes[1:]
seen2: set[str] = set()
alts = [
h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))
]
2025-12-16 01:45:01 -08:00
return king, [h for h in alts if h != king]
@staticmethod
2025-12-29 17:05:03 -08:00
def _parse_relationships_king_alts(
relationships: Dict[str,
Any],
) -> tuple[Optional[str],
List[str]]:
2025-12-16 01:45:01 -08:00
"""Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- {"king": [KING], "alt": [ALT1, ALT2]}
- {"king": KING, "alt": ALT} (strings)
- Also treats "related" hashes as alts for persistence purposes.
"""
if not isinstance(relationships, dict) or not relationships:
return None, []
def _first_hash(val: Any) -> Optional[str]:
if isinstance(val, str):
h = val.strip().lower()
return h if len(h) == 64 else None
if isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
return h
return None
def _many_hashes(val: Any) -> List[str]:
out: List[str] = []
if isinstance(val, str):
h = val.strip().lower()
if len(h) == 64:
out.append(h)
elif isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
out.append(h)
return out
king = _first_hash(relationships.get("king"))
if not king:
return None, []
alts = _many_hashes(relationships.get("alt"))
alts.extend(_many_hashes(relationships.get("related")))
seen: set[str] = set()
alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))]
return king, alts
@staticmethod
2025-12-29 17:05:03 -08:00
def _apply_pending_relationships(
pending: Dict[str,
set[tuple[str,
str]]],
config: Dict[str,
2026-01-12 04:05:52 -08:00
Any],
2026-05-23 13:49:47 -07:00
store_instance: Optional[BackendRegistry] = None,
deps: Optional[_CommandDependencies] = None,
2025-12-29 17:05:03 -08:00
) -> None:
2026-01-02 02:28:59 -08:00
"""Persist relationships to backends that support relationships.
2025-12-16 01:45:01 -08:00
2026-01-02 02:28:59 -08:00
This delegates to an optional backend method: `set_relationship(alt, king, kind)`.
2025-12-16 01:45:01 -08:00
"""
if not pending:
return
if deps is None:
deps = _CommandDependencies(config)
2025-12-16 01:45:01 -08:00
try:
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else deps.get_backend_registry()
2025-12-16 01:45:01 -08:00
except Exception:
return
for backend_name, pairs in pending.items():
if not pairs:
continue
try:
2026-05-23 13:49:47 -07:00
backend = backend_registry[str(backend_name)]
2025-12-16 01:45:01 -08:00
except Exception:
continue
2026-05-04 18:41:01 -07:00
if not bool(getattr(backend, "supports_relationship_association", False)):
continue
2026-01-02 02:28:59 -08:00
setter = getattr(backend, "set_relationship", None)
if not callable(setter):
2025-12-16 01:45:01 -08:00
continue
2026-01-02 02:28:59 -08:00
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
alt_norm = str(alt_hash).strip().lower()
king_norm = str(king_hash).strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64:
continue
try:
setter(alt_norm, king_norm, "alt")
processed_pairs.add((alt_hash, king_hash))
except Exception:
2025-12-16 01:45:01 -08:00
continue
2026-01-04 02:23:50 -08:00
@staticmethod
def _maybe_download_backend_file(
backend: Any,
file_hash: str,
pipe_obj: models.PipeObject,
2026-05-24 12:32:57 -07:00
*,
output_dir: Optional[Path] = None,
2026-01-04 02:23:50 -08:00
) -> Tuple[Optional[Path], Optional[Path]]:
"""Best-effort fetch of a backend file when get_file returns a URL.
Returns (downloaded_path, temp_dir_to_cleanup).
"""
downloader = getattr(backend, "download_to_temp", None)
if not callable(downloader):
return None, None
tmp_dir: Optional[Path] = None
try:
2026-01-14 18:23:20 -08:00
# Extract suffix from pipe_obj path to avoid .tmp rejections
suffix = None
if pipe_obj.path:
try:
suffix = Path(pipe_obj.path).suffix
except Exception:
pass
# Extract suffix from metadata if available (fallback)
if not suffix:
metadata = getattr(pipe_obj, "metadata", {})
if isinstance(metadata, dict):
suffix = metadata.get("ext")
2026-01-14 21:53:07 -08:00
2026-05-24 12:32:57 -07:00
download_root = output_dir
if download_root is None:
tmp_dir = Path(tempfile.mkdtemp(prefix="add-file-src-"))
download_root = tmp_dir
if download_root is None:
return None, None
# Introspect downloader to pass supported args.
2026-01-14 18:23:20 -08:00
import inspect
2026-01-14 21:53:07 -08:00
2026-01-14 18:23:20 -08:00
sig = inspect.signature(downloader)
2026-05-24 12:32:57 -07:00
kwargs = {"temp_root": download_root}
2026-01-14 18:23:20 -08:00
if "suffix" in sig.parameters:
2026-01-14 21:53:07 -08:00
kwargs["suffix"] = suffix
2026-05-24 12:32:57 -07:00
pipeline_progress = PipelineProgress(ctx)
transfer_label = "peer transfer"
try:
transfer_label = str(getattr(pipe_obj, "title", "") or "").strip() or transfer_label
except Exception:
transfer_label = "peer transfer"
if "pipeline_progress" in sig.parameters:
kwargs["pipeline_progress"] = pipeline_progress
if "transfer_label" in sig.parameters:
kwargs["transfer_label"] = transfer_label
if "progress_callback" in sig.parameters:
2026-01-14 21:53:07 -08:00
def _cb(done, total):
2026-05-24 12:32:57 -07:00
try:
total_val = int(total) if total is not None else None
except Exception:
total_val = None
try:
if int(done or 0) <= 0:
pipeline_progress.begin_transfer(
label=transfer_label,
total=total_val,
)
except Exception:
pass
try:
pipeline_progress.update_transfer(
label=transfer_label,
completed=int(done or 0),
total=total_val,
)
except Exception:
pass
2026-01-14 21:53:07 -08:00
kwargs["progress_callback"] = _cb
downloaded = downloader(str(file_hash), **kwargs)
2026-01-04 02:23:50 -08:00
if isinstance(downloaded, Path) and downloaded.exists():
2026-05-24 12:32:57 -07:00
if output_dir is not None:
pipe_obj.is_temp = False
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra["_direct_export_download"] = True
else:
pipe_obj.extra = {"_direct_export_download": True}
return downloaded, None
2026-01-04 02:23:50 -08:00
pipe_obj.is_temp = True
return downloaded, tmp_dir
except Exception:
pass
if tmp_dir is not None:
try:
shutil.rmtree(tmp_dir, ignore_errors=True)
except Exception:
pass
return None, None
2026-04-17 16:17:16 -07:00
@staticmethod
def _download_remote_backend_url(
remote_url: str,
pipe_obj: models.PipeObject,
*,
file_hash: Optional[str] = None,
output_dir: Optional[Path] = None,
) -> Tuple[Optional[Path], Optional[Path]]:
"""Best-effort fetch of a remote backend URL.
Returns (downloaded_path, temp_dir_to_cleanup).
When ``output_dir`` is provided, the file is downloaded directly there and no
temp cleanup path is returned.
"""
url_text = str(remote_url or "").strip()
if not url_text:
return None, None
if not url_text.lower().startswith(_REMOTE_URL_PREFIXES):
return None, None
2026-05-04 15:58:24 -07:00
# This helper performs generic HTTP downloads only.
# Non-HTTP schemes (e.g. hydrus://, tidal:) should be handled by
# plugin-specific resolvers via _maybe_download_plugin_result.
if not url_text.lower().startswith(("http://", "https://")):
return None, None
2026-04-17 16:17:16 -07:00
tmp_dir: Optional[Path] = None
try:
download_root = output_dir
if download_root is None:
tmp_dir = Path(tempfile.mkdtemp(prefix="add-file-src-"))
download_root = tmp_dir
suggested_name = Add_File._build_provider_filename(
pipe_obj,
fallback_hash=file_hash,
source_url=url_text,
)
pipeline_progress = PipelineProgress(ctx)
2026-05-24 12:32:57 -07:00
try:
destination_label = str(download_root) if download_root is not None else "temporary workspace"
pipeline_progress.set_status(f"downloading {suggested_name} to {destination_label}")
except Exception:
pass
2026-04-17 16:17:16 -07:00
2026-05-26 15:32:01 -07:00
downloaded = download_direct_file(
2026-04-17 16:17:16 -07:00
url_text,
download_root,
quiet=False,
suggested_filename=suggested_name,
pipeline_progress=pipeline_progress,
)
downloaded_path = getattr(downloaded, "path", None)
if isinstance(downloaded_path, Path) and downloaded_path.exists():
if output_dir is not None:
pipe_obj.is_temp = False
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra["_direct_export_download"] = True
else:
pipe_obj.extra = {"_direct_export_download": True}
return downloaded_path, None
pipe_obj.is_temp = True
return downloaded_path, tmp_dir
except Exception:
pass
2026-05-24 12:32:57 -07:00
finally:
try:
PipelineProgress(ctx).clear_status()
except Exception:
pass
2026-04-17 16:17:16 -07:00
if tmp_dir is not None:
try:
shutil.rmtree(tmp_dir, ignore_errors=True)
except Exception:
pass
return None, None
2026-01-24 01:38:12 -08:00
@staticmethod
def _build_provider_filename(
pipe_obj: models.PipeObject,
fallback_hash: Optional[str] = None,
source_url: Optional[str] = None,
) -> str:
title_candidates: List[str] = []
title_value = getattr(pipe_obj, "title", "")
if title_value:
title_candidates.append(str(title_value))
extra = getattr(pipe_obj, "extra", {})
if isinstance(extra, dict):
candid = extra.get("name") or extra.get("title")
if candid:
title_candidates.append(str(candid))
metadata = getattr(pipe_obj, "metadata", {})
if isinstance(metadata, dict):
meta_name = metadata.get("title") or metadata.get("name")
if meta_name:
title_candidates.append(str(meta_name))
text = ""
for candidate in title_candidates:
if candidate:
text = candidate.strip()
if text:
break
if not text and fallback_hash:
text = fallback_hash[:8]
safe_name = sanitize_filename(text or "download")
ext = ""
if isinstance(metadata, dict):
ext = metadata.get("ext") or metadata.get("extension") or ""
if not ext and isinstance(extra, dict):
ext = extra.get("ext") or ""
if not ext and source_url:
try:
parsed = urlparse(source_url)
ext = Path(parsed.path).suffix.lstrip(".")
except Exception:
ext = ""
if ext:
ext_text = str(ext)
if not ext_text.startswith("."):
ext_text = "." + ext_text.lstrip(".")
if not safe_name.lower().endswith(ext_text.lower()):
safe_name = f"{safe_name}{ext_text}"
return safe_name or "download"
2026-01-23 21:32:34 -08:00
@staticmethod
2026-05-03 21:20:05 -07:00
def _resolve_backend_by_name(instance: Any, backend_name: str) -> Optional[Any]:
if not instance or not backend_name:
2026-01-23 21:32:34 -08:00
return None
try:
2026-05-03 21:20:05 -07:00
return instance[backend_name]
2026-01-23 21:32:34 -08:00
except Exception:
pass
target = str(backend_name or "").strip().lower()
if not target:
return None
try:
2026-05-03 21:20:05 -07:00
for candidate in instance.list_backends():
2026-01-23 21:32:34 -08:00
if isinstance(candidate, str) and candidate.strip().lower() == target:
try:
2026-05-03 21:20:05 -07:00
return instance[candidate]
2026-01-23 21:32:34 -08:00
except Exception:
continue
except Exception:
pass
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_source(
result: Any,
2026-05-24 12:32:57 -07:00
source_arg: Optional[str],
2025-12-11 12:47:30 -08:00
pipe_obj: models.PipeObject,
config: Dict[str,
Any],
2026-04-17 16:17:16 -07:00
export_destination: Optional[Path] = None,
2026-01-12 04:05:52 -08:00
store_instance: Optional[Any] = None,
deps: Optional[_CommandDependencies] = None,
2026-01-02 02:28:59 -08:00
) -> Tuple[Optional[Path],
2026-01-04 02:23:50 -08:00
Optional[str],
Optional[Path]]:
2026-05-24 12:32:57 -07:00
"""Resolve the source file path from the positional source arg or pipeline result.
2025-12-14 00:53:52 -08:00
2026-01-04 02:23:50 -08:00
Returns (media_path, file_hash, temp_dir_to_cleanup).
2025-12-11 12:47:30 -08:00
"""
2026-03-04 18:22:35 -08:00
# PRIORITY 1a: Prefer an explicit local path when it exists.
# This avoids unnecessary backend.get_file(hash) probes (and remote fallbacks)
# for freshly downloaded temp files that are not yet present in the source store.
2025-12-27 06:05:07 -08:00
if isinstance(result, dict):
2026-01-12 04:05:52 -08:00
r_path = result.get("path")
r_hash = result.get("hash")
2026-03-04 18:22:35 -08:00
if r_path:
2025-12-27 06:05:07 -08:00
try:
2026-01-12 04:05:52 -08:00
p = coerce_to_path(r_path)
if p.exists() and p.is_file():
pipe_obj.path = str(p)
2026-03-04 18:22:35 -08:00
return p, str(r_hash) if r_hash else None, None
2026-01-12 04:05:52 -08:00
except Exception:
pass
2025-12-27 06:05:07 -08:00
2026-01-14 18:15:00 -08:00
# PRIORITY 1b: Try hash+store from result (fetch from backend)
r_hash = get_field(result, "hash") or get_field(result, "file_hash")
r_store = get_field(result, "store")
if r_hash and r_store:
try:
if deps is None:
deps = _CommandDependencies(config)
2026-05-23 13:49:47 -07:00
backend_registry = store_instance or deps.get_backend_registry()
backend = Add_File._resolve_backend_by_name(backend_registry, r_store)
2026-01-23 21:32:34 -08:00
if backend is not None:
2026-01-14 18:15:00 -08:00
mp = backend.get_file(r_hash)
if isinstance(mp, Path) and mp.exists():
pipe_obj.path = str(mp)
return mp, str(r_hash), None
if isinstance(mp, str) and mp.strip():
2026-04-17 16:17:16 -07:00
try:
mp_path = Path(str(mp))
except Exception:
mp_path = None
if mp_path is not None and mp_path.exists() and mp_path.is_file():
pipe_obj.path = str(mp_path)
return mp_path, str(r_hash), None
2026-01-14 18:15:00 -08:00
dl_path, tmp_dir = Add_File._maybe_download_backend_file(
2026-05-24 12:32:57 -07:00
backend,
str(r_hash),
pipe_obj,
output_dir=export_destination,
2026-01-14 18:15:00 -08:00
)
if dl_path and dl_path.exists():
pipe_obj.path = str(dl_path)
return dl_path, str(r_hash), tmp_dir
2026-04-17 16:17:16 -07:00
dl_path, tmp_dir = Add_File._download_remote_backend_url(
str(mp),
pipe_obj,
file_hash=str(r_hash),
output_dir=export_destination,
)
if dl_path and dl_path.exists():
pipe_obj.path = str(dl_path)
return dl_path, str(r_hash), tmp_dir
2026-01-18 13:10:31 -08:00
except Exception as exc:
debug(f"[add-file] _resolve_source backend fetch failed for {r_store}/{r_hash}: {exc}")
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# PRIORITY 2: Generic Coercion (Path arg > PipeObject > Result)
candidate: Optional[Path] = None
2026-05-24 12:32:57 -07:00
if source_arg:
candidate = Path(source_arg)
2026-01-12 04:05:52 -08:00
elif pipe_obj.path:
candidate = Path(pipe_obj.path)
if not candidate:
# Unwrap list if needed
obj = result[0] if isinstance(result, list) and result else result
if obj:
try:
candidate = coerce_to_path(obj)
except ValueError:
pass
2025-11-25 20:09:33 -08:00
2026-01-12 04:05:52 -08:00
if candidate:
s = str(candidate).lower()
2026-02-25 17:35:38 -08:00
if s.startswith(_REMOTE_URL_PREFIXES):
2026-05-04 15:58:24 -07:00
# For remote sources, prefer plugin-specific resolvers first
# (e.g. hydrus://), then generic HTTP fallback.
downloaded_path, hash_hint, tmp_dir = Add_File._maybe_download_plugin_result(
result,
pipe_obj,
config,
deps=deps,
)
if downloaded_path:
pipe_obj.path = str(downloaded_path)
return downloaded_path, hash_hint, tmp_dir
dl_path, tmp_dir = Add_File._download_remote_backend_url(
str(candidate),
pipe_obj,
file_hash=get_field(result, "hash") or get_field(result, "file_hash"),
output_dir=export_destination,
)
if dl_path:
pipe_obj.path = str(dl_path)
hash_hint = get_field(result, "hash") or get_field(result, "file_hash")
return dl_path, hash_hint, tmp_dir
log("add-file could not auto-fetch remote source. Use download-file first.", file=sys.stderr)
return None, None, None
2026-01-12 04:05:52 -08:00
pipe_obj.path = str(candidate)
# Retain hash from input if available to avoid re-hashing
hash_hint = get_field(result, "hash") or get_field(result, "file_hash") or getattr(pipe_obj, "hash", None)
return candidate, hash_hint, None
2025-12-29 17:05:03 -08:00
downloaded_path, hash_hint, tmp_dir = Add_File._maybe_download_plugin_result(
2026-01-31 23:22:30 -08:00
result,
pipe_obj,
config,
deps=deps,
2026-01-31 23:22:30 -08:00
)
if downloaded_path:
pipe_obj.path = str(downloaded_path)
return downloaded_path, hash_hint, tmp_dir
2026-01-12 04:05:52 -08:00
debug(f"No resolution path matched. result type={type(result).__name__}")
2025-12-11 12:47:30 -08:00
log("File path could not be resolved")
2026-01-04 02:23:50 -08:00
return None, None, None
2025-12-07 00:21:30 -08:00
2026-01-31 23:22:30 -08:00
@staticmethod
def _normalize_provider_key(value: Optional[Any]) -> Optional[str]:
if value is None:
return None
try:
normalized = str(value).strip().lower()
except Exception:
return None
if not normalized:
return None
if "." in normalized:
normalized = normalized.split(".", 1)[0]
return normalized
2026-05-24 12:32:57 -07:00
@staticmethod
def validate_preflight_args(
args: Sequence[str],
config: Optional[Dict[str, Any]] = None,
) -> Optional[str]:
cfg = config if isinstance(config, dict) else {}
if Add_File._uses_legacy_path_flag(args):
return f"Pipeline error: {Add_File._legacy_path_flag_message()}"
try:
parsed = parse_cmdlet_args(args, CMDLET)
except Exception as exc:
return f"Pipeline error: invalid add-file arguments: {exc}"
deps = _CommandDependencies(cfg)
storage_registry = deps.get_backend_registry()
location = parsed.get("instance")
plugin_instance = parsed.get("instance")
plugin_name = parsed.get("plugin")
is_storage_backend_location = False
if location:
try:
backend_registry_for_lookup = storage_registry or deps.get_backend_registry()
is_storage_backend_location = Add_File._resolve_backend_by_name(
backend_registry_for_lookup,
str(location),
) is not None
except Exception:
is_storage_backend_location = False
if location and not plugin_name and not is_storage_backend_location:
resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target(
location,
cfg,
deps=deps,
require_explicit=True,
)
if resolved_local_path:
return None
return (
f"Pipeline error: storage backend '{location}' not found. "
"Use -plugin local -instance <name|path> for local export or configure that store backend."
)
normalized_plugin_name = Add_File._normalize_provider_key(plugin_name)
if normalized_plugin_name:
upload_plugin = deps.get_plugin_with_capability(normalized_plugin_name, "upload")
if upload_plugin is None:
plugin_exists = deps.get_plugin(normalized_plugin_name) is not None
if plugin_exists:
if normalized_plugin_name == "loc":
return (
"Pipeline error: plugin 'loc' does not support add-file/upload. "
"Use -plugin local -instance <name|path> for local export."
)
return f"Pipeline error: plugin '{normalized_plugin_name}' does not support add-file/upload."
return f"Pipeline error: unknown upload plugin '{plugin_name}'."
if normalized_plugin_name == "local":
requested_local = str(plugin_instance or location or "").strip() or "<default>"
resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target(
plugin_instance or location,
cfg,
deps=deps,
require_explicit=bool(plugin_instance or location),
)
if not resolved_local_path:
return (
f"Pipeline error: local destination '{requested_local}' is not configured. "
"Use -plugin local -instance <name|path>."
)
return None
2026-04-30 18:56:22 -07:00
@staticmethod
def _resolve_plugin_storage_backend(
plugin_name: Optional[Any],
instance_name: Optional[Any],
config: Dict[str, Any],
*,
store_instance: Optional[Any] = None,
deps: Optional[_CommandDependencies] = None,
2026-04-30 18:56:22 -07:00
) -> Optional[str]:
plugin_key = Add_File._normalize_provider_key(plugin_name)
if not plugin_key:
return None
if deps is None:
deps = _CommandDependencies(config)
2026-04-30 18:56:22 -07:00
file_provider = deps.get_plugin_with_capability(plugin_key, "upload")
2026-04-30 18:56:22 -07:00
if file_provider is None:
return None
resolver = getattr(file_provider, "resolve_backend", None)
if not callable(resolver):
return None
explicit_instance = str(instance_name or "").strip() or None
try:
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry(config)
2026-04-30 18:56:22 -07:00
except Exception:
2026-05-23 13:49:47 -07:00
backend_registry = None
2026-04-30 18:56:22 -07:00
try:
resolved_name, backend = resolver(
explicit_instance,
2026-05-23 13:49:47 -07:00
storage=backend_registry,
2026-04-30 18:56:22 -07:00
require_explicit=bool(explicit_instance),
)
except TypeError:
try:
resolved_name, backend = resolver(explicit_instance)
except Exception:
return None
except Exception:
return None
if backend is None:
return None
resolved_text = str(resolved_name or explicit_instance or "").strip()
if not resolved_text:
return None
checker = getattr(file_provider, "is_backend", None)
if callable(checker):
try:
if not checker(backend, resolved_text):
return None
except Exception:
return None
return resolved_text
@staticmethod
def _resolve_local_export_plugin_target(
requested: Optional[Any],
config: Dict[str, Any],
*,
deps: Optional[_CommandDependencies] = None,
require_explicit: bool = False,
) -> tuple[Optional[str], Optional[str]]:
if deps is None:
deps = _CommandDependencies(config)
file_provider = deps.get_plugin_with_capability("local", "upload")
if file_provider is None:
return None, None
resolver = getattr(file_provider, "resolve_destination", None)
if not callable(resolver):
return None, None
requested_text = str(requested or "").strip() or None
try:
resolved_name, settings = resolver(
requested_text,
require_explicit=require_explicit,
)
except TypeError:
try:
resolved_name, settings = resolver(requested_text)
except Exception:
return None, None
except Exception:
return None, None
path_value = str((settings or {}).get("path") or "").strip()
if not path_value:
return None, None
resolved_text = str(resolved_name or requested_text or "").strip() or None
return resolved_text, path_value
2026-01-31 23:22:30 -08:00
@staticmethod
def _maybe_download_plugin_result(
2026-01-31 23:22:30 -08:00
result: Any,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
deps: Optional[_CommandDependencies] = None,
2026-01-31 23:22:30 -08:00
) -> Tuple[Optional[Path], Optional[str], Optional[Path]]:
plugin_key = None
2026-01-31 23:22:30 -08:00
for source in (
2026-05-26 15:32:01 -07:00
pipe_obj.plugin,
get_field(result, "plugin"),
2026-01-31 23:22:30 -08:00
get_field(result, "table"),
):
candidate = Add_File._normalize_provider_key(source)
if candidate:
plugin_key = candidate
2026-01-31 23:22:30 -08:00
break
2026-01-31 23:41:47 -08:00
if not plugin_key:
2026-01-31 23:22:30 -08:00
return None, None, None
if deps is None:
deps = _CommandDependencies(config)
plugin = deps.get_plugin(plugin_key)
if plugin is None:
2026-01-31 23:22:30 -08:00
return None, None, None
try:
return plugin.resolve_pipe_result_download(result, pipe_obj)
except Exception as exc:
debug(f"[add-file] Plugin '{plugin_key}' download helper failed: {exc}")
2026-01-31 23:41:47 -08:00
return None, None, None
2026-01-31 23:22:30 -08:00
2026-01-23 21:32:34 -08:00
@staticmethod
def _download_piped_source(
2026-01-23 21:32:34 -08:00
pipe_obj: models.PipeObject,
config: Dict[str, Any],
store_instance: Optional[Any],
deps: Optional[_CommandDependencies] = None,
2026-01-23 21:32:34 -08:00
) -> Tuple[Optional[Path], Optional[str], Optional[Path]]:
r_hash = str(getattr(pipe_obj, "hash", None) or getattr(pipe_obj, "file_hash", None) or "").strip()
r_store = str(getattr(pipe_obj, "store", None) or "").strip()
if not (r_hash and r_store):
return None, None, None
if deps is None:
deps = _CommandDependencies(config)
2026-05-23 13:49:47 -07:00
backend_registry = store_instance or deps.get_backend_registry()
backend = Add_File._resolve_backend_by_name(backend_registry, r_store) if backend_registry is not None else None
2026-01-23 21:32:34 -08:00
if backend is None:
return None, None, None
try:
source = backend.get_file(r_hash.lower())
if isinstance(source, Path) and source.exists():
pipe_obj.path = str(source)
return source, str(r_hash), None
if isinstance(source, str) and source.strip():
dl_path, tmp_dir = Add_File._maybe_download_backend_file(
backend, str(r_hash), pipe_obj
)
if dl_path and dl_path.exists():
return dl_path, str(r_hash), tmp_dir
2026-01-24 01:38:12 -08:00
source_url = str(source).strip()
if source_url.lower().startswith(("http://", "https://")):
download_dir = Path(tempfile.mkdtemp(prefix="add-file-src-"))
try:
filename = Add_File._build_provider_filename(
pipe_obj,
str(r_hash),
source_url,
)
2026-05-26 15:32:01 -07:00
downloaded = download_direct_file(
2026-01-24 01:38:12 -08:00
source_url,
download_dir,
quiet=True,
suggested_filename=filename,
)
downloaded_path = downloaded.path
if downloaded_path and downloaded_path.exists():
pipe_obj.is_temp = True
pipe_obj.path = str(downloaded_path)
return downloaded_path, str(r_hash), download_dir
except Exception as exc:
debug(f"[add-file] Provider download failed: {exc}")
try:
shutil.rmtree(download_dir, ignore_errors=True)
except Exception:
pass
2026-01-23 21:32:34 -08:00
except Exception:
pass
return None, None, None
2025-12-27 06:05:07 -08:00
@staticmethod
2026-01-18 13:10:31 -08:00
def _scan_directory_for_files(directory: Path, compute_hash: bool = True) -> List[Dict[str, Any]]:
2025-12-27 06:05:07 -08:00
"""Scan a directory for supported media files and return list of file info dicts.
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
Each dict contains:
- path: Path object
- name: filename
2026-01-18 13:10:31 -08:00
- hash: sha256 hash (or None if compute_hash=False)
2025-12-27 06:05:07 -08:00
- size: file size in bytes
- ext: file extension
"""
if not directory.exists() or not directory.is_dir():
return []
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
files_info: List[Dict[str, Any]] = []
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
try:
for item in directory.iterdir():
if not item.is_file():
continue
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
ext = item.suffix.lower()
if ext not in SUPPORTED_MEDIA_EXTENSIONS:
continue
2025-12-29 17:05:03 -08:00
2026-01-18 13:10:31 -08:00
file_hash = None
# Compute hash if requested (computing can be expensive for large dirs)
if compute_hash:
try:
file_hash = sha256_file(item)
except Exception as exc:
debug(f"Failed to hash {item}: {exc}")
# If hashing is required, skip this file; otherwise include without hash
continue
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
# Get file size
try:
size = item.stat().st_size
except Exception:
size = 0
2025-12-29 17:05:03 -08:00
files_info.append(
{
"path": item,
"name": item.name,
"hash": file_hash,
"size": size,
"ext": ext,
}
)
2025-12-27 06:05:07 -08:00
except Exception as exc:
debug(f"Error scanning directory {directory}: {exc}")
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
return files_info
2026-01-09 01:22:06 -08:00
@staticmethod
def _validate_source(media_path: Optional[Path], allow_all_extensions: bool = False) -> bool:
"""Validate that the source file exists and is supported.
Args:
media_path: Path to the file to validate
2026-05-24 12:32:57 -07:00
allow_all_extensions: If True, skip file type filtering for non-backend exports.
If False, only allow SUPPORTED_MEDIA_EXTENSIONS for backend ingest.
2026-01-09 01:22:06 -08:00
"""
2025-12-11 12:47:30 -08:00
if media_path is None:
return False
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
2026-05-24 12:32:57 -07:00
# Validate file type only when ingesting into a storage backend.
2026-01-09 01:22:06 -08:00
if not allow_all_extensions:
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
return False
2025-12-11 12:47:30 -08:00
return True
2026-01-12 04:05:52 -08:00
@staticmethod
def _is_probable_url(s: Any) -> bool:
"""Check if a string looks like a URL/magnet/identifier (vs a tag/title)."""
if not isinstance(s, str):
return False
val = s.strip().lower()
if not val:
return False
# Obvious schemes
2026-02-25 17:35:38 -08:00
if val.startswith(_REMOTE_URL_PREFIXES):
2026-01-12 04:05:52 -08:00
return True
# Domain-like patterns or local file paths (but we want URLs here)
if "://" in val:
return True
# Hydrus hash-like search queries are NOT urls
if val.startswith("hash:"):
return False
return False
2025-12-11 12:47:30 -08:00
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
2026-01-12 04:05:52 -08:00
"""Extract valid URLs from pipe object or result dict."""
from SYS.metadata import normalize_urls
2025-11-25 20:09:33 -08:00
2026-01-12 04:05:52 -08:00
candidates: List[str] = []
# 1. Prefer explicit PipeObject top-level field
if pipe_obj.url:
candidates.append(pipe_obj.url)
if pipe_obj.source_url:
candidates.append(pipe_obj.source_url)
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 2. Check extra and metadata fields
if isinstance(pipe_obj.extra, dict):
u = pipe_obj.extra.get("url")
if isinstance(u, list):
candidates.extend(str(x) for x in u if x)
elif isinstance(u, str):
candidates.append(u)
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 3. Check result (which might be a dict or another PipeObject)
raw_from_result = extract_url_from_result(result)
if raw_from_result:
candidates.extend(raw_from_result)
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 4. Normalize and filter: MUST look like a URL to avoid tag leakage
normalized = normalize_urls(candidates)
return [u for u in normalized if Add_File._is_probable_url(u)]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _get_relationships(result: Any,
pipe_obj: models.PipeObject) -> Optional[Dict[str,
Any]]:
2025-12-11 12:47:30 -08:00
try:
rels = pipe_obj.get_relationships()
if rels:
return rels
except Exception:
pass
if isinstance(result, dict) and result.get("relationships"):
return result.get("relationships")
try:
return extract_relationships(result)
except Exception:
return None
@staticmethod
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
2025-12-14 00:53:52 -08:00
def _parse_duration(value: Any) -> Optional[float]:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value) if value > 0 else None
if isinstance(value, str):
s = value.strip()
if not s:
return None
try:
candidate = float(s)
return candidate if candidate > 0 else None
except ValueError:
pass
if ":" in s:
parts = [p.strip() for p in s.split(":") if p.strip()]
if len(parts) in {2,
3} and all(p.isdigit() for p in parts):
2025-12-14 00:53:52 -08:00
nums = [int(p) for p in parts]
if len(nums) == 2:
minutes, seconds = nums
return float(minutes * 60 + seconds)
hours, minutes, seconds = nums
return float(hours * 3600 + minutes * 60 + seconds)
return None
parsed = _parse_duration(getattr(pipe_obj, "duration", None))
if parsed is not None:
return parsed
2025-12-11 12:47:30 -08:00
try:
2025-12-14 00:53:52 -08:00
return _parse_duration(extract_duration(result))
2025-12-11 12:47:30 -08:00
except Exception:
return None
2025-12-16 23:23:43 -08:00
@staticmethod
def _get_note_text(result: Any,
pipe_obj: models.PipeObject,
note_name: str) -> Optional[str]:
2025-12-16 23:23:43 -08:00
"""Extract a named note text from a piped item.
Supports:
- pipe_obj.extra["notes"][note_name]
- result["notes"][note_name] for dict results
- pipe_obj.extra[note_name] / result[note_name] as fallback
"""
def _normalize(val: Any) -> Optional[str]:
if val is None:
return None
if isinstance(val, bytes):
try:
val = val.decode("utf-8", errors="ignore")
except Exception:
val = str(val)
if isinstance(val, str):
text = val.strip()
return text if text else None
try:
text = str(val).strip()
return text if text else None
except Exception:
return None
note_key = str(note_name or "").strip()
if not note_key:
return None
# Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
try:
if isinstance(pipe_obj.extra, dict):
notes_val = pipe_obj.extra.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in pipe_obj.extra:
return _normalize(pipe_obj.extra.get(note_key))
except Exception:
pass
# Fallback to raw result dict
if isinstance(result, dict):
try:
notes_val = result.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in result:
return _normalize(result.get(note_key))
except Exception:
pass
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
*,
2025-12-11 19:04:02 -08:00
hash_value: str,
2025-12-11 12:47:30 -08:00
store: str,
2026-05-26 15:32:01 -07:00
plugin: Optional[str] = None,
2025-12-11 19:04:02 -08:00
path: Optional[str],
2025-12-11 23:21:45 -08:00
tag: List[str],
2025-12-11 12:47:30 -08:00
title: Optional[str],
extra_updates: Optional[Dict[str,
Any]] = None,
2025-12-11 12:47:30 -08:00
) -> None:
2025-12-11 19:04:02 -08:00
pipe_obj.hash = hash_value
2025-12-11 12:47:30 -08:00
pipe_obj.store = store
2026-05-26 15:32:01 -07:00
pipe_obj.plugin = plugin
2026-04-16 17:18:50 -07:00
pipe_obj.is_temp = False
2025-12-11 19:04:02 -08:00
pipe_obj.path = path
2025-12-11 23:21:45 -08:00
pipe_obj.tag = tag
2025-12-11 12:47:30 -08:00
if title:
pipe_obj.title = title
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.update(extra_updates or {})
else:
pipe_obj.extra = dict(extra_updates or {})
@staticmethod
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
2026-04-17 16:17:16 -07:00
payload = pipe_obj.to_dict()
ctx.emit(payload)
2025-12-11 12:47:30 -08:00
ctx.set_current_stage_table(None)
2026-04-17 16:17:16 -07:00
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if not is_last:
return
try:
2026-05-24 12:32:57 -07:00
Add_File._stop_live_progress_for_terminal_render()
2026-05-04 18:41:01 -07:00
from .._shared import display_and_persist_items
2026-04-17 16:17:16 -07:00
display_and_persist_items([payload], title="Result", subject=payload)
except Exception:
pass
2026-05-24 12:32:57 -07:00
@staticmethod
def _stop_live_progress_for_terminal_render() -> None:
try:
live_progress = ctx.get_live_progress()
except Exception:
live_progress = None
if live_progress is None:
return
try:
stage_ctx = ctx.get_stage_context()
pipe_idx = getattr(stage_ctx, "pipe_index", None)
if isinstance(pipe_idx, int):
live_progress.finish_pipe(int(pipe_idx), force_complete=True)
except Exception:
pass
try:
live_progress.stop()
except Exception:
pass
try:
if hasattr(ctx, "set_live_progress"):
ctx.set_live_progress(None)
except Exception:
pass
2025-12-13 12:09:50 -08:00
@staticmethod
2025-12-29 17:05:03 -08:00
def _emit_storage_result(
payload: Dict[str,
Any],
*,
overlay: bool = True,
emit: bool = True
2025-12-29 17:05:03 -08:00
) -> None:
2025-12-13 12:09:50 -08:00
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
2025-12-30 23:19:02 -08:00
- If this is the last stage (or not in a pipeline), prints a search-file-like table
2025-12-13 12:09:50 -08:00
and sets an overlay table/items for @N selection.
"""
2025-12-14 00:53:52 -08:00
# Emit for downstream commands (no-op if not in a pipeline)
if emit:
ctx.emit(payload)
2025-12-13 12:09:50 -08:00
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
is None) or bool(getattr(stage_ctx,
"is_last_stage",
False))
2025-12-14 00:53:52 -08:00
if not is_last or not overlay:
2025-12-13 12:09:50 -08:00
return
try:
2026-01-18 10:50:42 -08:00
from SYS.result_table import Table
2025-12-29 17:05:03 -08:00
2026-01-18 10:50:42 -08:00
table = Table("Result")
2025-12-13 12:09:50 -08:00
table.add_result(payload)
# Overlay so @1 refers to this add-file result without overwriting search history
2026-03-25 22:39:30 -07:00
publish_result_table(ctx, table, [payload], subject=payload, overlay=True)
2025-12-13 12:09:50 -08:00
except Exception:
# If table rendering fails, still keep @ selection items
try:
ctx.set_last_result_items_only([payload])
except Exception:
pass
2025-12-14 00:53:52 -08:00
@staticmethod
2025-12-30 23:19:02 -08:00
def _try_emit_search_file_by_hash(
*,
2026-05-03 21:20:05 -07:00
instance: str,
hash_value: str,
config: Dict[str,
Any]
2025-12-29 17:05:03 -08:00
) -> Optional[List[Any]]:
2025-12-30 23:19:02 -08:00
"""Run search-file for a single hash so the final table/payload is consistent.
2025-12-14 00:53:52 -08:00
Important: `add-file` is treated as an action command by the CLI, so the CLI only
2025-12-30 23:19:02 -08:00
prints tables for it when a display overlay exists. After running search-file,
2025-12-14 00:53:52 -08:00
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
2025-12-30 23:19:02 -08:00
Returns the emitted search-file payload items on success, else None.
2025-12-14 00:53:52 -08:00
"""
try:
2026-05-04 18:41:01 -07:00
from cmdlet.file.search import CMDLET as search_file_cmdlet
2025-12-14 00:53:52 -08:00
2026-05-03 21:20:05 -07:00
args = ["-instance", str(instance), f"hash:{str(hash_value)}"]
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
# Run search-file under a temporary stage context so its ctx.emit() calls
2025-12-14 00:53:52 -08:00
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
2025-12-29 17:05:03 -08:00
temp_ctx = ctx.PipelineStageContext(
stage_index=0,
total_stages=1,
pipe_index=0,
worker_id=getattr(prev_ctx,
"worker_id",
None),
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
ctx.set_stage_context(temp_ctx)
try:
2025-12-30 23:19:02 -08:00
code = search_file_cmdlet.run(None, args, config)
2025-12-16 01:45:01 -08:00
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
2025-12-14 00:53:52 -08:00
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
# Promote the search-file result to a display overlay so the CLI prints it
2025-12-14 00:53:52 -08:00
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
is None) or bool(getattr(stage_ctx,
"is_last_stage",
False))
2025-12-14 00:53:52 -08:00
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
2026-03-25 22:39:30 -07:00
overlay_existing_result_table(
ctx,
subject={
2026-05-03 21:20:05 -07:00
"store": instance,
2026-03-25 22:39:30 -07:00
"hash": hash_value
},
)
2025-12-14 00:53:52 -08:00
except Exception:
pass
2025-12-16 01:45:01 -08:00
return emitted_items
2025-12-14 00:53:52 -08:00
except Exception as exc:
2025-12-29 17:05:03 -08:00
debug(
2025-12-30 23:19:02 -08:00
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _prepare_metadata(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str,
Any],
) -> Tuple[List[str],
List[str],
Optional[str],
Optional[str]]:
2025-12-11 12:47:30 -08:00
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
2025-12-11 23:21:45 -08:00
tags_from_result = list(pipe_obj.tag or [])
2025-12-11 12:47:30 -08:00
if not tags_from_result:
try:
2025-12-11 23:21:45 -08:00
tags_from_result = list(extract_tag_from_result(result) or [])
2025-12-11 12:47:30 -08:00
except Exception:
tags_from_result = []
url_from_result = Add_File._get_url(result, pipe_obj)
preferred_title = pipe_obj.title
if not preferred_title:
for t in tags_from_result:
if str(t).strip().lower().startswith("title:"):
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
if candidate:
preferred_title = candidate
break
if not preferred_title:
preferred_title = extract_title_from_result(result)
if preferred_title:
preferred_title = preferred_title.replace("_", " ").strip()
2025-12-11 19:04:02 -08:00
store = getattr(pipe_obj, "store", None)
2025-12-11 12:47:30 -08:00
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
2025-12-11 19:04:02 -08:00
media_path, store, config
2025-12-11 12:47:30 -08:00
)
def normalize_title_tag(tag: str) -> str:
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
2025-12-29 17:05:03 -08:00
tags_from_result_no_title = [
t for t in tags_from_result
if not str(t).strip().lower().startswith("title:")
2025-12-29 17:05:03 -08:00
]
2026-05-26 15:32:01 -07:00
sidecar_tags = collapse_namespace_tags(
[normalize_title_tag(t) for t in sidecar_tags],
"title",
prefer="last"
2025-12-29 17:05:03 -08:00
)
sidecar_tags_filtered = [
t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")
]
2025-12-11 12:47:30 -08:00
2025-12-29 17:05:03 -08:00
merged_tags = merge_sequences(
tags_from_result_no_title,
sidecar_tags_filtered,
case_sensitive=True
2025-12-29 17:05:03 -08:00
)
2025-12-11 12:47:30 -08:00
if preferred_title:
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
2026-01-12 04:05:52 -08:00
# Final safety filter: ensures no tags/titles leaked into URL list
merged_url = [u for u in merged_url if Add_File._is_probable_url(u)]
2025-12-11 12:47:30 -08:00
file_hash = Add_File._resolve_file_hash(
result,
media_path,
pipe_obj,
sidecar_hash
)
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
t for t in merged_tags
2025-12-16 01:45:01 -08:00
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if relationship_tags:
try:
if (not isinstance(getattr(pipe_obj,
"relationships",
None),
dict) or not pipe_obj.relationships):
2025-12-16 01:45:01 -08:00
king: Optional[str] = None
alts: List[str] = []
for rel_tag in relationship_tags:
k, a = Add_File._parse_relationship_tag_king_alts(rel_tag)
if k and not king:
king = k
if a:
alts.extend(a)
if king:
seen_alt: set[str] = set()
2025-12-29 17:05:03 -08:00
alts = [
h for h in alts if h and h != king and len(h) == 64
2025-12-29 17:05:03 -08:00
and not (h in seen_alt or seen_alt.add(h))
]
payload: Dict[str,
Any] = {
"king": [king]
}
2025-12-16 01:45:01 -08:00
if alts:
payload["alt"] = alts
pipe_obj.relationships = payload
except Exception:
pass
merged_tags = [
t for t in merged_tags if
not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
2025-12-16 01:45:01 -08:00
]
2025-12-11 12:47:30 -08:00
# Persist back to PipeObject
2025-12-11 23:21:45 -08:00
pipe_obj.tag = merged_tags
2025-12-11 12:47:30 -08:00
if preferred_title and not pipe_obj.title:
pipe_obj.title = preferred_title
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
2026-01-12 04:05:52 -08:00
# Update (don't setdefault) to ensure URLs matched from sidecars or source stores are tracked
pipe_obj.extra["url"] = merged_url
2025-12-11 12:47:30 -08:00
return merged_tags, merged_url, preferred_title, file_hash
2026-05-04 18:41:01 -07:00
@staticmethod
def _normalize_hash_candidate(value: Any) -> str:
text = str(value or "").strip().lower()
if len(text) != 64:
return ""
if any(ch not in "0123456789abcdef" for ch in text):
return ""
return text
@staticmethod
def _find_existing_hash_by_urls(
backend: Any,
urls: Sequence[str],
) -> Optional[str]:
"""Best-effort duplicate detection by URL before ingesting file bytes."""
url_candidates: List[str] = []
for raw in urls or []:
text = str(raw or "").strip()
if not text or not Add_File._is_probable_url(text):
continue
if text not in url_candidates:
url_candidates.append(text)
if not url_candidates:
return None
lookup_exact = getattr(backend, "find_hashes_by_url", None)
if callable(lookup_exact):
for candidate_url in url_candidates:
try:
hashes = lookup_exact(candidate_url) or []
except Exception:
continue
if not isinstance(hashes, (list, tuple, set)):
continue
for item in hashes:
normalized = Add_File._normalize_hash_candidate(item)
if normalized:
return normalized
searcher = getattr(backend, "search", None)
if callable(searcher):
for candidate_url in url_candidates:
try:
hits = searcher(f"url:{candidate_url}", limit=1, minimal=True) or []
except Exception:
continue
if not isinstance(hits, list) or not hits:
continue
hit = hits[0]
for key in ("hash", "file_hash", "sha256"):
normalized = Add_File._normalize_hash_candidate(get_field(hit, key))
if normalized:
return normalized
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _emit_plugin_upload_payload(
upload_payload: Dict[str, Any],
plugin_name: str,
instance_name: Optional[str],
2025-12-11 12:47:30 -08:00
pipe_obj: models.PipeObject,
media_path: Path,
2025-12-11 12:47:30 -08:00
delete_after: bool,
) -> int:
payload = dict(upload_payload or {})
extra_updates: Dict[str, Any] = {}
raw_extra = payload.get("extra")
if isinstance(raw_extra, dict):
extra_updates.update(raw_extra)
2025-11-25 20:09:33 -08:00
if plugin_name:
extra_updates.setdefault("plugin", plugin_name)
if instance_name:
extra_updates.setdefault("instance", instance_name)
2026-04-17 16:17:16 -07:00
raw_urls = payload.get("url")
if isinstance(raw_urls, str):
url_values = [raw_urls.strip()] if raw_urls.strip() else []
extra_updates["url"] = url_values
elif isinstance(raw_urls, (list, tuple, set)):
url_values = [str(item).strip() for item in raw_urls if str(item).strip()]
extra_updates["url"] = url_values
2025-12-11 12:47:30 -08:00
relationships = payload.get("relationships")
2025-12-11 12:47:30 -08:00
if relationships:
try:
pipe_obj.relationships = relationships
except Exception:
pass
2025-12-11 12:47:30 -08:00
tags = payload.get("tag")
if isinstance(tags, list):
tag_values = [str(tag) for tag in tags]
else:
tag_values = list(pipe_obj.tag or [])
title_value = str(payload.get("title") or pipe_obj.title or media_path.name).strip() or media_path.name
path_value = str(payload.get("path") or pipe_obj.path or media_path).strip()
hash_value = str(
payload.get("hash")
or payload.get("file_hash")
or getattr(pipe_obj, "hash", None)
or "unknown"
).strip() or "unknown"
store_value = str(payload.get("store") or "").strip()
2026-05-26 15:32:01 -07:00
plugin_value = payload.get("plugin")
if plugin_value is None and plugin_name:
plugin_value = plugin_name
2025-12-11 12:47:30 -08:00
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=hash_value,
store=store_value,
2026-05-26 15:32:01 -07:00
plugin=str(plugin_value) if plugin_value else None,
path=path_value,
tag=tag_values,
title=title_value,
2025-12-11 12:47:30 -08:00
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
2025-11-30 11:39:04 -08:00
return 0
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_plugin_upload(
2025-12-11 12:47:30 -08:00
media_path: Path,
plugin_name: str,
instance_name: Optional[str],
2025-12-11 12:47:30 -08:00
pipe_obj: models.PipeObject,
config: Dict[str,
Any],
2025-12-11 12:47:30 -08:00
delete_after: bool,
) -> int:
"""Handle uploading via an upload plugin (e.g. 0x0)."""
2026-05-21 16:19:17 -07:00
from PluginCore.registry import (
get_plugin_with_capability,
list_plugin_names_with_capability,
list_plugins_with_capability,
)
2025-11-25 20:09:33 -08:00
try:
file_provider = get_plugin_with_capability(plugin_name, "upload", config)
2025-12-11 12:47:30 -08:00
if not file_provider:
available_map = list_plugins_with_capability("upload", config)
known_upload_plugins = set(list_plugin_names_with_capability("upload"))
available_uploads = [name for name, enabled in available_map.items() if enabled and name in known_upload_plugins]
if str(plugin_name or "").strip().lower() in known_upload_plugins:
show_plugin_config_panel([plugin_name])
else:
log(f"Upload plugin '{plugin_name}' is not available or does not support upload", file=sys.stderr)
if available_uploads:
show_available_plugins_panel(sorted(available_uploads))
2025-12-11 12:47:30 -08:00
return 1
2025-11-25 20:09:33 -08:00
upload_kwargs: Dict[str, Any] = {
"pipe_obj": pipe_obj,
"instance": instance_name,
}
2026-05-24 12:32:57 -07:00
pipeline_progress = PipelineProgress(ctx)
normalized_plugin_name = Add_File._normalize_provider_key(plugin_name)
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if normalized_plugin_name == "local":
result = None
tags, urls, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
relationships = Add_File._get_relationships(result, pipe_obj)
direct_export_download = False
try:
if isinstance(pipe_obj.extra, dict):
direct_export_download = bool(pipe_obj.extra.pop("_direct_export_download", False))
except Exception:
direct_export_download = False
upload_kwargs.update(
{
"title": title,
"tags": tags,
"urls": urls,
"hash_value": f_hash,
"relationships": relationships,
"direct_export_download": direct_export_download,
2026-05-24 12:32:57 -07:00
"pipeline_progress": pipeline_progress,
}
)
upload_result = file_provider.upload(
str(media_path),
**upload_kwargs,
)
2026-05-04 18:41:01 -07:00
duplicate_upload = False
duplicate_rule = ""
duplicate_target = ""
try:
if isinstance(getattr(pipe_obj, "extra", None), dict):
duplicate_upload = bool(pipe_obj.extra.get("upload_duplicate"))
duplicate_rule = str(pipe_obj.extra.get("upload_duplicate_rule") or "").strip()
duplicate_target = str(pipe_obj.extra.get("upload_duplicate_target") or "").strip()
except Exception:
duplicate_upload = False
duplicate_rule = ""
duplicate_target = ""
2025-12-11 12:47:30 -08:00
except Exception as exc:
log(f"Upload failed: {exc}", file=sys.stderr)
return 1
if isinstance(upload_result, dict):
return Add_File._emit_plugin_upload_payload(
upload_result,
plugin_name,
instance_name,
pipe_obj,
media_path,
delete_after,
)
hoster_url = str(upload_result or "").strip()
2025-12-11 12:47:30 -08:00
# Update PipeObject and emit
extra_updates: Dict[str,
Any] = {
"plugin": plugin_name,
"instance": instance_name,
"plugin_url": hoster_url,
}
2025-12-11 12:47:30 -08:00
if isinstance(pipe_obj.extra, dict):
# Also track hoster URL as a url for downstream steps
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=f_hash or "unknown",
store="",
2026-05-26 15:32:01 -07:00
plugin=plugin_name or None,
2025-12-11 19:04:02 -08:00
path=file_path,
2025-12-11 23:21:45 -08:00
tag=pipe_obj.tag,
2025-12-11 12:47:30 -08:00
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
2025-11-25 20:09:33 -08:00
)
2025-12-11 12:47:30 -08:00
Add_File._emit_pipe_object(pipe_obj)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_storage_backend(
2025-12-14 00:53:52 -08:00
result: Any,
2025-12-11 12:47:30 -08:00
media_path: Path,
backend_name: str,
pipe_obj: models.PipeObject,
config: Dict[str,
Any],
2025-12-11 12:47:30 -08:00
delete_after: bool,
2025-12-14 00:53:52 -08:00
*,
collect_payloads: Optional[List[Dict[str,
Any]]] = None,
collect_relationship_pairs: Optional[Dict[str,
set[tuple[str,
str]]]] = None,
2025-12-20 23:57:44 -08:00
defer_url_association: bool = False,
pending_url_associations: Optional[Dict[str,
List[tuple[str,
List[str]]]]] = None,
2026-01-19 03:14:30 -08:00
defer_tag_association: bool = False,
pending_tag_associations: Optional[Dict[str,
List[tuple[str,
List[str]]]]] = None,
2025-12-14 00:53:52 -08:00
suppress_last_stage_overlay: bool = False,
2025-12-30 23:19:02 -08:00
auto_search_file: bool = True,
2026-05-23 13:49:47 -07:00
store_instance: Optional[BackendRegistry] = None,
2025-12-11 12:47:30 -08:00
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
2025-12-17 03:16:41 -08:00
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
2026-05-23 13:49:47 -07:00
pipeline_progress = PipelineProgress(ctx)
def _set_status(text: str) -> None:
try:
pipeline_progress.set_status(f"{backend_name}: {text}")
except Exception:
pass
def _clear_status() -> None:
try:
pipeline_progress.clear_status()
except Exception:
pass
2025-12-14 00:53:52 -08:00
delete_after_effective = bool(delete_after)
2026-01-12 04:05:52 -08:00
# ... (lines omitted for brevity but I need to keep them contextually correct)
2025-12-14 00:53:52 -08:00
if not delete_after_effective:
# When download-media is piped into add-file, the downloaded artifact is a temp file.
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
try:
if (str(backend_name or "").strip().lower() != "temp"
and getattr(pipe_obj,
"is_temp",
False)
and getattr(pipe_obj,
"action",
None) == "cmdlet:download-media"):
from SYS.config import resolve_output_dir
2025-12-29 17:05:03 -08:00
2025-12-14 00:53:52 -08:00
temp_dir = resolve_output_dir(config)
try:
if media_path.resolve().is_relative_to(
temp_dir.expanduser().resolve()):
2025-12-14 00:53:52 -08:00
delete_after_effective = True
debug(
f"[add-file] Auto-delete temp source after ingest: {media_path}"
)
2025-12-14 00:53:52 -08:00
except Exception:
# If path resolution fails, fall back to non-destructive behavior
pass
except Exception:
pass
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
try:
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry(config)
backend, backend_registry, backend_exc = sh.get_preferred_store_backend(
2026-05-04 18:41:01 -07:00
config,
backend_name,
2026-05-23 13:49:47 -07:00
store_registry=backend_registry,
2026-05-04 18:41:01 -07:00
suppress_debug=True,
)
if backend is None:
raise backend_exc or KeyError(f"Unknown store backend: {backend_name}")
2025-12-29 17:05:03 -08:00
2026-01-31 23:41:47 -08:00
# Use backend properties to drive metadata deferral behavior.
is_remote_backend = getattr(backend, "is_remote", False)
prefer_defer_tags = getattr(backend, "prefer_defer_tags", False)
2026-05-04 18:41:01 -07:00
supports_url_association = bool(getattr(backend, "supports_url_association", False))
supports_note_association = bool(getattr(backend, "supports_note_association", False))
supports_relationship_association = bool(getattr(backend, "supports_relationship_association", False))
2026-01-15 03:20:52 -08:00
2026-01-31 23:41:47 -08:00
# ...
2025-12-11 12:47:30 -08:00
# Prepare metadata from pipe_obj and sidecars
2025-12-29 17:05:03 -08:00
tags, url, title, f_hash = Add_File._prepare_metadata(
result, media_path, pipe_obj, config
)
2025-12-16 01:45:01 -08:00
2025-12-20 02:12:45 -08:00
# If we're moving/copying from one store to another, also copy the source store's
# existing associated URLs so they aren't lost.
try:
from SYS.metadata import normalize_urls
2025-12-20 02:12:45 -08:00
source_store = None
source_hash = None
if isinstance(result, dict):
source_store = result.get("store")
source_hash = result.get("hash")
if not source_store:
source_store = getattr(pipe_obj, "store", None)
if not source_hash:
source_hash = getattr(pipe_obj, "hash", None)
if (not source_hash) and isinstance(pipe_obj.extra, dict):
source_hash = pipe_obj.extra.get("hash")
source_store = str(source_store or "").strip()
source_hash = str(source_hash or "").strip().lower()
if (source_store and source_hash and len(source_hash) == 64
and source_store.lower() != str(backend_name or ""
).strip().lower()):
2025-12-20 02:12:45 -08:00
source_backend = None
try:
if source_store in store.list_backends():
source_backend = store[source_store]
except Exception:
source_backend = None
if source_backend is not None:
try:
src_urls = normalize_urls(
source_backend.get_url(source_hash) or []
)
2025-12-20 02:12:45 -08:00
except Exception:
src_urls = []
try:
dst_urls = normalize_urls(url or [])
except Exception:
dst_urls = []
merged: list[str] = []
seen: set[str] = set()
for u in list(dst_urls or []) + list(src_urls or []):
if not u:
continue
if u in seen:
continue
seen.add(u)
merged.append(u)
url = merged
except Exception:
pass
2025-12-16 01:45:01 -08:00
# Collect relationship pairs for post-ingest DB/API persistence.
2026-05-04 18:41:01 -07:00
if collect_relationship_pairs is not None and supports_relationship_association:
2025-12-16 01:45:01 -08:00
rels = Add_File._get_relationships(result, pipe_obj)
if isinstance(rels, dict) and rels:
king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels)
if king_hash and alt_hashes:
bucket = collect_relationship_pairs.setdefault(
str(backend_name),
set()
)
2025-12-16 01:45:01 -08:00
for alt_hash in alt_hashes:
if alt_hash and alt_hash != king_hash:
bucket.add((alt_hash, king_hash))
# Relationships must never be stored as tags.
if isinstance(tags, list) and tags:
2025-12-29 17:05:03 -08:00
tags = [
t for t in tags if not (
isinstance(t, str)
and t.strip().lower().startswith("relationship:")
)
2025-12-29 17:05:03 -08:00
]
2025-12-30 23:19:02 -08:00
# Auto-tag (best-effort) BEFORE uploading so tags land with the stored file.
try:
tags = _maybe_apply_florencevision_tags(media_path, list(tags or []), config, pipe_obj=pipe_obj)
pipe_obj.tag = list(tags or [])
except Exception as exc:
# strict mode raises from helper; treat here as a hard failure
log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr)
return 1
2026-01-04 02:23:50 -08:00
upload_tags = tags
2026-01-31 23:41:47 -08:00
if prefer_defer_tags and upload_tags:
2026-01-04 02:23:50 -08:00
upload_tags = []
2026-04-16 17:18:50 -07:00
try:
debug_panel(
"add-file store",
[
("backend", backend_name),
("path", media_path),
("title", title),
("hash_hint", f_hash[:12] if f_hash else "N/A"),
("defer_tags", bool(prefer_defer_tags and tags)),
],
border_style="yellow",
)
except Exception:
pass
2026-01-02 02:28:59 -08:00
2026-05-04 18:41:01 -07:00
duplicate_hash = Add_File._find_existing_hash_by_urls(backend, url)
if duplicate_hash:
debug(
f"[add-file] URL duplicate detected in '{backend_name}', skipping upload and reusing hash {duplicate_hash[:12]}..."
)
file_identifier = duplicate_hash
else:
# Call backend's add_file with full metadata.
# Backend returns hash as identifier. If we already know the hash from _resolve_source
# (which came from download-file emit), pass it to skip re-hashing large files.
file_identifier = backend.add_file(
media_path,
title=title,
tag=upload_tags,
url=[] if ((defer_url_association and url) or (not supports_url_association)) else url,
file_hash=f_hash,
2026-05-23 13:49:47 -07:00
pipeline_progress=pipeline_progress,
transfer_label=title or media_path.name,
2026-05-04 18:41:01 -07:00
)
2025-12-17 03:16:41 -08:00
##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
stored_path: Optional[str] = None
2026-01-31 23:41:47 -08:00
# IMPORTANT: avoid calling get_file() for remote backends by default to avoid
# unintended network activity or credential exposure in result payloads.
2025-12-11 19:04:02 -08:00
try:
2026-01-31 23:41:47 -08:00
if not is_remote_backend:
# For local backends, resolving the path is cheap and useful.
2025-12-16 01:45:01 -08:00
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
stored_path = maybe_path
2025-12-11 19:04:02 -08:00
except Exception:
stored_path = None
2025-12-29 17:05:03 -08:00
2026-01-18 13:10:31 -08:00
# Compute canonical hash value for downstream use (defensive against non-string returns).
if isinstance(file_identifier, str) and len(file_identifier) == 64:
chosen_hash = file_identifier
else:
chosen_hash = f_hash or (str(file_identifier) if file_identifier is not None else "unknown")
2025-12-11 12:47:30 -08:00
Add_File._update_pipe_object_destination(
pipe_obj,
2026-01-18 13:10:31 -08:00
hash_value=chosen_hash,
2025-12-11 12:47:30 -08:00
store=backend_name,
2025-12-11 19:04:02 -08:00
path=stored_path,
2025-12-11 23:21:45 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
title=title or pipe_obj.title or media_path.name,
extra_updates={
"url": url,
},
)
2025-12-13 12:09:50 -08:00
2025-12-30 23:19:02 -08:00
# Emit a search-file-like payload for consistent tables and natural piping.
2026-05-24 12:32:57 -07:00
# Keep hash/store for downstream commands (get-tag, download-file, etc.).
2026-01-18 13:10:31 -08:00
resolved_hash = chosen_hash
2025-12-13 12:09:50 -08:00
2026-01-31 23:41:47 -08:00
if prefer_defer_tags and tags:
2026-01-19 03:14:30 -08:00
# Support deferring tag application for batching bulk operations
if defer_tag_association and pending_tag_associations is not None:
try:
pending_tag_associations.setdefault(str(backend_name), []).append((str(resolved_hash), list(tags)))
except Exception:
pass
else:
try:
adder = getattr(backend, "add_tag", None)
if callable(adder):
2026-05-23 13:49:47 -07:00
_set_status("applying deferred tags")
2026-01-19 03:14:30 -08:00
adder(resolved_hash, list(tags))
except Exception as exc:
2026-01-31 23:41:47 -08:00
log(f"[add-file] Post-upload tagging failed for {backend_name}: {exc}", file=sys.stderr)
2026-01-04 02:23:50 -08:00
2025-12-14 00:53:52 -08:00
# If we have url(s), ensure they get associated with the destination file.
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
2026-05-04 18:41:01 -07:00
if url and supports_url_association:
2025-12-20 23:57:44 -08:00
if defer_url_association and pending_url_associations is not None:
try:
pending_url_associations.setdefault(
str(backend_name),
[]
).append((str(resolved_hash),
list(url)))
2025-12-20 23:57:44 -08:00
except Exception:
pass
else:
try:
2026-01-15 03:20:52 -08:00
# Folder.add_file already persists URLs, avoid extra DB traffic here.
if not is_folder_backend:
2026-05-23 13:49:47 -07:00
_set_status("associating urls")
2026-01-15 03:20:52 -08:00
backend.add_url(resolved_hash, list(url))
2025-12-20 23:57:44 -08:00
except Exception:
pass
2025-12-14 00:53:52 -08:00
2025-12-16 23:23:43 -08:00
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
2026-05-23 13:49:47 -07:00
def _write_note(note_name: str, note_text: Optional[str]) -> None:
if not note_text or not supports_note_association:
return
2025-12-16 23:23:43 -08:00
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
2026-05-23 13:49:47 -07:00
_set_status(f"writing {note_name} note")
setter(resolved_hash, note_name, note_text)
2026-01-02 02:28:59 -08:00
except Exception as exc:
debug_panel(
"add-file note write failed",
[
("store", backend_name),
("hash", resolved_hash),
2026-05-23 13:49:47 -07:00
("note", note_name),
("error", exc),
],
border_style="yellow",
)
2026-01-03 03:37:48 -08:00
2026-05-23 13:49:47 -07:00
_write_note("sub", Add_File._get_note_text(result, pipe_obj, "sub"))
_write_note("lyric", Add_File._get_note_text(result, pipe_obj, "lyric"))
_write_note("chapters", Add_File._get_note_text(result, pipe_obj, "chapters"))
_write_note("caption", Add_File._get_note_text(result, pipe_obj, "caption"))
2025-12-30 23:19:02 -08:00
meta: Dict[str,
Any] = {}
2025-12-13 12:09:50 -08:00
try:
2026-01-15 03:20:52 -08:00
if not is_folder_backend:
2026-05-23 13:49:47 -07:00
_set_status("loading stored metadata")
2026-01-15 03:20:52 -08:00
meta = backend.get_metadata(resolved_hash) or {}
2025-12-13 12:09:50 -08:00
except Exception:
meta = {}
# Determine size bytes
size_bytes: Optional[int] = None
for key in ("size_bytes", "size", "filesize", "file_size"):
try:
raw_size = meta.get(key)
if raw_size is not None:
size_bytes = int(raw_size)
break
except Exception:
pass
if size_bytes is None:
try:
size_bytes = int(media_path.stat().st_size)
except Exception:
size_bytes = None
# Determine title/ext
title_out = (
meta.get("title") or title or pipe_obj.title or media_path.stem
or media_path.name
2025-12-13 12:09:50 -08:00
)
2025-12-29 17:05:03 -08:00
ext_out = meta.get("ext") or media_path.suffix.lstrip(".")
2025-12-13 12:09:50 -08:00
payload: Dict[
str,
Any
] = {
2025-12-13 12:09:50 -08:00
"title": title_out,
"ext": str(ext_out or ""),
"size_bytes": size_bytes,
"store": backend_name,
"hash": resolved_hash,
# Preserve extra fields for downstream commands (kept hidden by default table rules)
"path": stored_path,
"tag": list(tags or []),
"url": list(url or []),
}
2025-12-14 00:53:52 -08:00
if collect_payloads is not None:
try:
collect_payloads.append(payload)
except Exception:
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
2025-12-30 23:19:02 -08:00
# canonical search-file payload/table for piping/selection consistency.
if auto_search_file and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-file provide the downstream payload.
2025-12-29 17:05:03 -08:00
Add_File._emit_storage_result(
payload,
overlay=not suppress_last_stage_overlay,
emit=False
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
refreshed_items = Add_File._try_emit_search_file_by_hash(
2026-05-03 21:20:05 -07:00
instance=backend_name,
2025-12-14 00:53:52 -08:00
hash_value=resolved_hash,
config=config,
)
2025-12-16 01:45:01 -08:00
if refreshed_items:
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items:
ctx.emit(emitted)
else:
2025-12-14 00:53:52 -08:00
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx.emit(payload)
else:
2025-12-29 17:05:03 -08:00
Add_File._emit_storage_result(
payload,
overlay=not suppress_last_stage_overlay,
emit=True
2025-12-29 17:05:03 -08:00
)
Add_File._cleanup_after_success(
media_path,
delete_source=delete_after_effective
)
2026-05-23 13:49:47 -07:00
_clear_status()
2025-12-11 12:47:30 -08:00
return 0
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
except Exception as exc:
2026-05-23 13:49:47 -07:00
_clear_status()
log(
f"❌ Failed to add file to backend '{backend_name}': {exc}",
file=sys.stderr
)
2025-12-11 12:47:30 -08:00
import traceback
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
traceback.print_exc(file=sys.stderr)
return 1
# --- Helpers ---
2025-12-20 23:57:44 -08:00
@staticmethod
2025-12-29 17:05:03 -08:00
def _apply_pending_url_associations(
pending: Dict[str,
List[tuple[str,
List[str]]]],
config: Dict[str,
2026-01-12 04:05:52 -08:00
Any],
2026-05-23 13:49:47 -07:00
store_instance: Optional[BackendRegistry] = None,
2025-12-29 17:05:03 -08:00
) -> None:
2025-12-20 23:57:44 -08:00
"""Apply deferred URL associations in bulk, grouped per backend."""
try:
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry(config)
2025-12-20 23:57:44 -08:00
except Exception:
return
for backend_name, pairs in (pending or {}).items():
if not pairs:
continue
try:
2026-05-23 13:49:47 -07:00
backend, backend_registry, _exc = sh.get_store_backend(
2026-03-25 22:39:30 -07:00
config,
backend_name,
2026-05-23 13:49:47 -07:00
store_registry=backend_registry,
2026-03-25 22:39:30 -07:00
)
if backend is None:
continue
2026-05-04 18:41:01 -07:00
if not bool(getattr(backend, "supports_url_association", False)):
continue
2026-03-25 22:39:30 -07:00
items = sh.coalesce_hash_value_pairs(pairs)
if not items:
continue
bulk = getattr(backend, "add_url_bulk", None)
if callable(bulk):
try:
bulk(items)
continue
except Exception:
pass
single = getattr(backend, "add_url", None)
if callable(single):
for h, u in items:
try:
single(h, u)
except Exception:
continue
2025-12-20 23:57:44 -08:00
except Exception:
continue
2026-01-19 03:14:30 -08:00
@staticmethod
def _apply_pending_tag_associations(
pending: Dict[str,
List[tuple[str,
List[str]]]],
config: Dict[str,
Any],
2026-05-23 13:49:47 -07:00
store_instance: Optional[BackendRegistry] = None,
2026-01-19 03:14:30 -08:00
) -> None:
"""Apply deferred tag associations in bulk, grouped per backend."""
try:
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry(config)
2026-01-19 03:14:30 -08:00
except Exception:
return
2026-03-25 22:39:30 -07:00
sh.run_store_hash_value_batches(
config,
pending or {},
bulk_method_name="add_tags_bulk",
single_method_name="add_tag",
2026-05-23 13:49:47 -07:00
store_registry=backend_registry,
2026-03-25 22:39:30 -07:00
pass_config_to_bulk=False,
pass_config_to_single=False,
)
2026-01-19 03:14:30 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _load_sidecar_bundle(
media_path: Path,
2026-05-03 21:20:05 -07:00
instance: Optional[str],
config: Dict[str,
Any],
) -> Tuple[Optional[Path],
Optional[str],
List[str],
List[str]]:
2025-12-11 12:47:30 -08:00
"""Load sidecar metadata."""
return None, None, [], []
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_file_hash(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
fallback_hash: Optional[str],
) -> Optional[str]:
if pipe_obj.hash and pipe_obj.hash != "unknown":
return pipe_obj.hash
if fallback_hash:
return fallback_hash
if isinstance(result, dict):
2025-12-29 17:05:03 -08:00
candidate = result.get("hash")
2025-12-11 12:47:30 -08:00
if candidate:
return str(candidate)
try:
return sha256_file(media_path)
except Exception:
return None
@staticmethod
def _resolve_media_kind(path: Path) -> str:
2026-01-12 04:05:52 -08:00
return resolve_media_kind_by_extension(path)
2025-12-11 12:47:30 -08:00
@staticmethod
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: List[str],
url: List[str],
f_hash: Optional[str],
relationships: Any,
duration: Any,
media_kind: str,
):
2026-01-22 03:12:16 -08:00
pass
2025-12-11 12:47:30 -08:00
@staticmethod
def _copy_sidecars(source_path: Path, target_path: Path):
possible_sidecars = [
source_path.with_suffix(source_path.suffix + ".json"),
source_path.with_name(source_path.name + ".tag"),
source_path.with_name(source_path.name + ".metadata"),
source_path.with_name(source_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(source_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(sc), dest_sidecar)
except Exception:
pass
@staticmethod
def _cleanup_after_success(media_path: Path, delete_source: bool):
2026-01-18 13:10:31 -08:00
# Determine whether this is a temporary merge/tracking file which should be
# deleted even when delete_source is False.
2025-12-11 12:47:30 -08:00
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
2026-01-18 13:10:31 -08:00
# If neither explicit delete was requested nor this looks like a temp-merge,
# avoid deleting the source file.
if not delete_source and not is_temp_merge:
return
# Attempt deletion (best-effort)
try:
media_path.unlink()
Add_File._cleanup_sidecar_files(media_path)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
@staticmethod
def _cleanup_sidecar_files(media_path: Path):
targets = [
2025-12-29 17:05:03 -08:00
media_path.parent / (media_path.name + ".metadata"),
media_path.parent / (media_path.name + ".notes"),
media_path.parent / (media_path.name + ".tag"),
2025-12-11 12:47:30 -08:00
]
for target in targets:
try:
if target.exists():
target.unlink()
except Exception:
pass
# Create and register the cmdlet
CMDLET = Add_File()