from __future__ import annotations from typing import Any, Dict, Optional, Sequence, Tuple, List from pathlib import Path import sys import shutil import tempfile import re from urllib.parse import urlparse from SYS import models from SYS import pipeline as ctx from SYS.logger import log, debug, debug_panel from SYS.payload_builders import build_table_result_payload from SYS.pipeline_progress import PipelineProgress from SYS.result_publication import overlay_existing_result_table, publish_result_table from SYS.rich_display import show_available_plugins_panel, show_plugin_config_panel from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS from PluginCore.backend_registry import BackendRegistry from API.HTTP import download_direct_file from .. import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg parse_cmdlet_args = sh.parse_cmdlet_args SharedArgs = sh.SharedArgs extract_tag_from_result = sh.extract_tag_from_result extract_title_from_result = sh.extract_title_from_result extract_url_from_result = sh.extract_url_from_result merge_sequences = sh.merge_sequences extract_relationships = sh.extract_relationships extract_duration = sh.extract_duration coerce_to_pipe_object = sh.coerce_to_pipe_object collapse_namespace_tags = sh.collapse_namespace_tags resolve_target_dir = sh.resolve_target_dir resolve_media_kind_by_extension = sh.resolve_media_kind_by_extension coerce_to_path = sh.coerce_to_path build_pipeline_preview = sh.build_pipeline_preview get_field = sh.get_field from SYS.utils import sha256_file, unique_path, sanitize_filename # Canonical supported filetypes for all stores/cmdlets SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS class _CommandDependencies: """Command-scope cache for the backend registry and plugin instances.""" def __init__(self, config: Dict[str, Any]) -> None: self.config = config self._backend_registry: Optional[BackendRegistry] = None self._plugins: Dict[str, Any] = {} def get_backend_registry(self) -> Optional[BackendRegistry]: """Lazily initialize and return the command-scope backend registry.""" if self._backend_registry is None: try: self._backend_registry = BackendRegistry(self.config) except Exception: self._backend_registry = None return self._backend_registry def get_plugin(self, name: str) -> Optional[Any]: """Cached plugin lookup by name.""" from PluginCore.registry import get_plugin norm_name = str(name or "").strip().lower() if not norm_name: return None if norm_name in self._plugins: return self._plugins[norm_name] plugin = get_plugin(norm_name, self.config) self._plugins[norm_name] = plugin return plugin def get_plugin_with_capability(self, name: str, capability: str) -> Optional[Any]: """Cached plugin lookup with capability check.""" from PluginCore.registry import get_plugin_with_capability norm_name = str(name or "").strip().lower() if not norm_name: return None cache_key = f"{norm_name}#{capability}" if cache_key in self._plugins: return self._plugins[cache_key] plugin = get_plugin_with_capability(norm_name, capability, self.config) self._plugins[cache_key] = plugin return plugin _REMOTE_URL_PREFIXES: tuple[str, ...] = ( "http://", "https://", "ftp://", "ftps://", "magnet:", "torrent:", "tidal:", "hydrus:", ) def _maybe_apply_florencevision_tags( media_path: Path, tags: List[str], config: Dict[str, Any], pipe_obj: Optional[models.PipeObject] = None, ) -> List[str]: """Optionally auto-tag images using the FlorenceVision plugin helper. Controlled via config: [plugin=florencevision] enabled=true strict=false If strict=false (default), failures log a warning and return the original tags. If strict=true, failures raise to abort the ingest. """ strict = False try: plugin_block = (config or {}).get("plugin") fv_block = plugin_block.get("florencevision") if isinstance(plugin_block, dict) else None enabled = False if isinstance(fv_block, dict): enabled = bool(fv_block.get("enabled")) strict = bool(fv_block.get("strict")) if not enabled: return tags from plugins.florencevision import FlorenceVisionTool # Special-case: if this file was produced by the `screen-shot` cmdlet, # OCR is more useful than caption/detection for tagging screenshots. cfg_for_tool: Dict[str, Any] = config try: action = str(getattr(pipe_obj, "action", "") or "") if pipe_obj is not None else "" cmdlet_name = "" if action.lower().startswith("cmdlet:"): cmdlet_name = action.split(":", 1)[1].strip().lower() if cmdlet_name in {"screen-shot", "screen_shot", "screenshot"}: plugin_block2 = dict((config or {}).get("plugin") or {}) fv_block2 = dict(plugin_block2.get("florencevision") or {}) fv_block2["task"] = "ocr" plugin_block2["florencevision"] = fv_block2 cfg_for_tool = dict(config or {}) cfg_for_tool["plugin"] = plugin_block2 except Exception: cfg_for_tool = config fv = FlorenceVisionTool(cfg_for_tool) if not fv.enabled() or not fv.applicable_path(media_path): return tags auto_tags = fv.tags_for_file(media_path) # Capture caption (if any) into PipeObject notes for downstream persistence. try: caption_text = getattr(fv, "last_caption", None) if caption_text and pipe_obj is not None: if not isinstance(pipe_obj.extra, dict): pipe_obj.extra = {} notes = pipe_obj.extra.get("notes") if not isinstance(notes, dict): notes = {} notes.setdefault("caption", caption_text) pipe_obj.extra["notes"] = notes except Exception: pass if not auto_tags: return tags merged = merge_sequences(tags or [], auto_tags, case_sensitive=False) debug(f"[add-file] FlorenceVision added {len(auto_tags)} tag(s)") return merged except Exception as exc: # Decide strictness from config if we couldn't read it above. strict2 = False try: tool_block = (config or {}).get("tool") fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None strict2 = bool(fv_block.get("strict")) if isinstance(fv_block, dict) else False except Exception: strict2 = False if strict or strict2: raise log(f"[add-file] Warning: FlorenceVision tagging failed: {exc}", file=sys.stderr) return tags class Add_File(Cmdlet): """Add file into the DB""" def __init__(self) -> None: """Initialize add-file cmdlet.""" super().__init__( name="add-file", summary= "Ingest a local media file to a configured store or plugin destination.", usage= "add-file ( | ) (-instance | -plugin [-instance ]) [-delete]", arg=[ CmdletArg( name="source", type="string", required=False, description="Local file or directory path to ingest or scan.", ), SharedArgs.INSTANCE, SharedArgs.URL, SharedArgs.PLUGIN, CmdletArg( name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del", ), ], detail=[ "Note: add-file ingests local files. To fetch remote sources, use download-file and pipe into add-file.", "- Store options (use -instance without -plugin):", " hydrus: Upload to Hydrus database with metadata tagging", "- Plugin options (use -plugin):", " local: Copy file to a configured local destination or direct path via -instance", " 0x0: Upload to 0x0.st for temporary hosting", " file.io: Upload to file.io for temporary hosting", " internetarchive: Upload to archive.org (optional tag: ia: to upload into an existing item)", "- Use a positional source path with -instance and -plugin to target a named provider config: add-file C:\\Media\\file.pdf -plugin ftp -instance archive", ], examples=[ 'download-file "https://themathesontrust.org/papers/christianity/alcock-alphabet1.pdf" | add-file -instance tutorial', '@1 | add-file -plugin local -instance C:\\Users\\Me\\Downloads', 'add-file C:\\Media\\report.pdf -plugin ftp -instance archive', ], exec=self.run, ) self.register() @staticmethod def _uses_legacy_path_flag(args: Sequence[str]) -> bool: for token in args or []: lowered = str(token or "").strip().lower() if lowered in {"-path", "--path", "-p"}: return True return False @staticmethod def _legacy_path_flag_message() -> str: return ( "add-file no longer supports -path. Pass the source file or directory as a positional argument, " "and use -plugin local -instance for local export." ) def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution entry point.""" if Add_File._uses_legacy_path_flag(args): log(Add_File._legacy_path_flag_message(), file=sys.stderr) return 1 parsed = parse_cmdlet_args(args, self) progress = PipelineProgress(ctx) # Initialize command-scope dependency context (caches Store/plugins) deps = _CommandDependencies(config) storage_registry = deps.get_backend_registry() source_arg = parsed.get("source") location = parsed.get("instance") plugin_instance = parsed.get("instance") source_url_arg = parsed.get("url") plugin_name = parsed.get("plugin") delete_after = parsed.get("delete", False) local_export_destination: Optional[str] = None if plugin_name and not plugin_instance and location: plugin_instance = location stage_ctx = ctx.get_stage_context() is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) has_downstream_stage = bool(stage_ctx is not None and not is_last_stage) # Directory-mode selector: # - Terminal use: `add-file -instance X` shows a selectable table. # - Pipelined use: `add-file -instance X | ...` processes the full batch # immediately so downstream stages receive the uploaded items. # - Selection replay: `@N` re-runs add-file with `file1,file2,...` as the source token. dir_scan_mode = False dir_scan_results: Optional[List[Dict[str, Any]]] = None explicit_source_list_results: Optional[List[Dict[str, Any]]] = None if source_arg and location and not plugin_name: # Support comma-separated source lists: "file1,file2,file3" # This is the mechanism used by @N expansion for directory tables. try: source_text = str(source_arg) except Exception: source_text = "" if "," in source_text: parts = [p.strip().strip('"') for p in source_text.split(",")] parts = [p for p in parts if p] batch: List[Dict[str, Any]] = [] for p in parts: try: file_path = Path(p) except Exception: continue if not file_path.exists() or not file_path.is_file(): continue ext = file_path.suffix.lower() if ext not in SUPPORTED_MEDIA_EXTENSIONS: continue try: hv = sha256_file(file_path) except Exception: continue try: size = file_path.stat().st_size except Exception: size = 0 batch.append( { "path": file_path, "name": file_path.name, "hash": hv, "size": size, "ext": ext, } ) if batch: explicit_source_list_results = batch # Clear source_arg so add-file doesn't treat it as a single path. source_arg = None else: # Directory scan (selector table, no ingest yet) try: candidate_dir = Path(str(source_arg)) if candidate_dir.exists() and candidate_dir.is_dir(): dir_scan_mode = True debug( f"[add-file] Scanning directory for batch add: {candidate_dir}" ) dir_scan_results = Add_File._scan_directory_for_files( candidate_dir ) if dir_scan_results: debug( f"[add-file] Found {len(dir_scan_results)} supported files in directory" ) # Clear source_arg so it doesn't trigger single-item mode. source_arg = None except Exception as exc: debug(f"[add-file] Directory scan failed: {exc}") if result is None and not source_arg and not explicit_source_list_results and not dir_scan_results: try: if ctx.get_stage_context() is not None: return 0 except Exception: pass # Determine if -instance targets a registered backend (vs a filesystem export path). is_storage_backend_location = False if location: try: backend_registry_for_lookup = storage_registry or deps.get_backend_registry() is_storage_backend_location = Add_File._resolve_backend_by_name(backend_registry_for_lookup, str(location)) is not None except Exception: is_storage_backend_location = False if location and not plugin_name and not is_storage_backend_location: resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target( location, config, deps=deps, require_explicit=True, ) if resolved_local_path: plugin_name = "local" plugin_instance = resolved_local_instance or str(location) location = None local_export_destination = resolved_local_path else: log( f"Storage backend '{location}' not found. Use -plugin local -instance for local export or configure that store backend.", file=sys.stderr, ) return 1 normalized_plugin_name = Add_File._normalize_provider_key(plugin_name) if normalized_plugin_name == "local": resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target( plugin_instance or location, config, deps=deps, require_explicit=bool(plugin_instance or location), ) if not resolved_local_path: requested_local = str(plugin_instance or location or "").strip() or "" log( f"Local destination '{requested_local}' is not configured. Use -plugin local -instance .", file=sys.stderr, ) return 1 plugin_name = "local" plugin_instance = resolved_local_instance or str(plugin_instance or location or "").strip() or None location = None local_export_destination = resolved_local_path plugin_storage_backend = None if plugin_name: plugin_storage_backend = Add_File._resolve_plugin_storage_backend( plugin_name, plugin_instance, config, store_instance=storage_registry, deps=deps, ) effective_storage_backend_name = plugin_storage_backend or ( str(location) if location and is_storage_backend_location else None ) # Decide which items to process. # - If directory scan was performed, use those results # - If user provided a positional source path, treat this invocation as single-item. # - Otherwise, if piped input is a list, ingest each item. if explicit_source_list_results: items_to_process = explicit_source_list_results debug(f"[add-file] Using {len(items_to_process)} files from source list") elif dir_scan_results: items_to_process = dir_scan_results debug(f"[add-file] Using {len(items_to_process)} files from directory scan") elif source_arg: items_to_process: List[Any] = [result] elif isinstance(result, list) and result: items_to_process = list(result) else: items_to_process = [result] total_items = len(items_to_process) if isinstance(items_to_process, list) else 0 processed_items = 0 try: ui, _ = progress.ui_and_pipe_index() if ui is not None and total_items: preview_items = ( list(items_to_process) if isinstance(items_to_process, list) else [items_to_process] ) progress.begin_pipe( total_items=total_items, items_preview=preview_items, ) except Exception: pass try: if total_items: progress.set_percent(0) except Exception: pass # Minimal step-based progress for single-item runs. # Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck". use_steps = False steps_started = False try: ui, _ = progress.ui_and_pipe_index() use_steps = (ui is not None) and (len(items_to_process) == 1) if use_steps: progress.begin_steps(5) steps_started = True except Exception: use_steps = False # add-file is ingestion-only: it does not download URLs here. should_present_directory_selector = bool(dir_scan_mode and not has_downstream_stage) if dir_scan_mode and has_downstream_stage: debug( "[add-file] Continuing with directory batch ingest because downstream stages exist" ) # If this invocation was terminal directory selector mode, show a selectable table and stop. # The user then runs @N (optionally piped), which replays add-file with selected source paths. if should_present_directory_selector: try: from SYS.result_table import Table from pathlib import Path as _Path base_args: List[str] = [] if plugin_name: base_args.extend(["-plugin", str(plugin_name)]) if location: base_args.extend(["-instance", str(location)]) if source_url_arg: base_args.extend(["-url", str(source_url_arg)]) if bool(delete_after): base_args.append("-delete") table = Table(title="Files in Directory", preserve_order=True) table.set_table("add-file.directory") table.set_source_command("add-file", base_args) rows: List[Dict[str, Any]] = [] for file_info in dir_scan_results or []: p = file_info.get("path") hp = str(file_info.get("hash") or "") name = str(file_info.get("name") or "unknown") try: clean_title = _Path(name).stem except Exception: clean_title = name ext = str(file_info.get("ext") or "").lstrip(".") size = file_info.get("size", 0) row_item = build_table_result_payload( title=clean_title, columns=[ ("Title", clean_title), ("Hash", hp), ("Size", size), ("Ext", ext), ], selection_args=[str(p) if p is not None else ""], path=str(p) if p is not None else "", hash=hp, ) rows.append(row_item) table.add_result(row_item) ctx.set_current_stage_table(table) ctx.set_last_result_table( table, rows, subject={ "table": "add-file.directory" } ) log(f"✓ Found {len(rows)} files. Select with @N (e.g., @1 or @1-3).") return 0 except Exception as exc: debug( f"[add-file] Failed to display directory scan result table: {exc}" ) collected_payloads: List[Dict[str, Any]] = [] pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {} pending_url_associations: Dict[str, List[tuple[str, List[str]]]] = {} pending_tag_associations: Dict[str, List[tuple[str, List[str]]]] = {} successes = 0 failures = 0 # When add-file -instance is the last stage, always show a final search-file table. # This is especially important for multi-item ingests (e.g., multi-clip downloads) # so the user always gets a selectable ResultTable. live_progress = None try: live_progress = ctx.get_live_progress() except Exception: live_progress = None want_final_search_file = ( bool(is_last_stage) and bool(effective_storage_backend_name) and bool(live_progress) ) auto_search_file_after_add = False # When ingesting multiple items into a backend store, defer URL association and # apply it once at the end (bulk) to avoid per-item URL API calls. defer_url_association = ( bool(effective_storage_backend_name) and len(items_to_process) > 1 ) for idx, item in enumerate(items_to_process, 1): pipe_obj = coerce_to_pipe_object(item, path_arg) if source_url_arg: try: from SYS.metadata import normalize_urls cli_urls = [u.strip() for u in str(source_url_arg).split(",") if u and u.strip()] merged_urls: List[str] = [] if isinstance(getattr(pipe_obj, "extra", None), dict): existing_url = pipe_obj.extra.get("url") if isinstance(existing_url, list): merged_urls.extend(str(u) for u in existing_url if u) elif isinstance(existing_url, str) and existing_url.strip(): merged_urls.append(existing_url.strip()) else: pipe_obj.extra = {} merged_urls = sh.merge_urls(merged_urls, cli_urls) if merged_urls: pipe_obj.extra["url"] = merged_urls except Exception: pass try: label = pipe_obj.title if not label and pipe_obj.path: try: label = Path(str(pipe_obj.path)).name except Exception: label = pipe_obj.path if not label: label = "file" if total_items: pending_pct = int(round(((idx - 1) / max(1, total_items)) * 100)) progress.set_percent(pending_pct) progress.set_status(f"adding {idx}/{total_items}: {label}") except Exception: pass temp_dir_to_cleanup: Optional[Path] = None delete_after_item = delete_after try: if use_steps and steps_started: progress.step("resolving source") export_destination = ( Path(local_export_destination) if local_export_destination else Path(location) if location and not is_storage_backend_location else None ) media_path, file_hash, temp_dir_to_cleanup = self._resolve_source( item, source_arg, pipe_obj, config, export_destination=export_destination, store_instance=storage_registry, deps=deps, ) if not media_path and plugin_name: media_path, file_hash, temp_dir_to_cleanup = Add_File._download_piped_source( pipe_obj, config, storage_registry, deps=deps ) if not media_path: failures += 1 continue # Update pipe_obj with resolved path pipe_obj.path = str(media_path) # Local/plugin exports can accept any file type. # Storage backends stay restricted to SUPPORTED_MEDIA_EXTENSIONS. allow_all_files = not bool(effective_storage_backend_name) if not self._validate_source(media_path, allow_all_extensions=allow_all_files): failures += 1 continue if use_steps and steps_started: if not file_hash: progress.step("hashing file") progress.step("ingesting file") if plugin_name: if effective_storage_backend_name: code = self._handle_storage_backend( item, media_path, effective_storage_backend_name, pipe_obj, config, delete_after_item, collect_payloads=collected_payloads, collect_relationship_pairs=pending_relationship_pairs, defer_url_association=defer_url_association, pending_url_associations=pending_url_associations, defer_tag_association=defer_url_association, pending_tag_associations=pending_tag_associations, suppress_last_stage_overlay=want_final_search_file, auto_search_file=auto_search_file_after_add, store_instance=storage_registry, ) else: code = self._handle_plugin_upload( media_path, plugin_name, plugin_instance, pipe_obj, config, delete_after_item ) if code == 0: successes += 1 else: failures += 1 continue if location: try: backend_registry = storage_registry or deps.get_backend_registry() resolved_backend = Add_File._resolve_backend_by_name(backend_registry, str(location)) if resolved_backend is not None: code = self._handle_storage_backend( item, media_path, location, pipe_obj, config, delete_after_item, collect_payloads=collected_payloads, collect_relationship_pairs=pending_relationship_pairs, defer_url_association=defer_url_association, pending_url_associations=pending_url_associations, defer_tag_association=defer_url_association, pending_tag_associations=pending_tag_associations, suppress_last_stage_overlay=want_final_search_file, auto_search_file=auto_search_file_after_add, store_instance=storage_registry, ) else: log(f"Invalid storage backend: {location}", file=sys.stderr) code = 1 except Exception as exc: debug(f"[add-file] ERROR: Failed to resolve location: {exc}") log(f"Invalid location: {location}", file=sys.stderr) failures += 1 continue if code == 0: successes += 1 else: failures += 1 continue log("No destination specified", file=sys.stderr) failures += 1 finally: if temp_dir_to_cleanup is not None: try: shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True) except Exception: pass processed_items += 1 try: pct = int(round((processed_items / max(1, total_items)) * 100)) progress.set_percent(pct) if processed_items >= total_items: progress.clear_status() except Exception: pass # Apply deferred url associations (bulk) before showing the final store table. if pending_url_associations: try: Add_File._apply_pending_url_associations( pending_url_associations, config, store_instance=storage_registry ) except Exception: pass # Apply deferred tag associations (bulk) if collected if pending_tag_associations: try: Add_File._apply_pending_tag_associations( pending_tag_associations, config, store_instance=storage_registry ) except Exception: pass # Always end add-file -instance (when last stage) by showing item detail panels. # Legacy search-file refresh is no longer used for final display. if want_final_search_file and collected_payloads: try: from SYS.rich_display import render_item_details_panel from SYS.result_table import Table # Stop the live pipeline progress UI before rendering the details panels. # This prevents the progress display from lingering on screen. Add_File._stop_live_progress_for_terminal_render() subject = collected_payloads[0] if len(collected_payloads) == 1 else collected_payloads # Use helper to display items and make them @-selectable from .._shared import display_and_persist_items display_and_persist_items(collected_payloads, title="Result", subject=subject) try: ctx.set_last_result_items_only(list(collected_payloads)) except Exception: pass except Exception: pass # Persist relationships into backend DB/API. if pending_relationship_pairs: try: Add_File._apply_pending_relationships( pending_relationship_pairs, config, store_instance=storage_registry, deps=deps ) except Exception: pass if use_steps and steps_started: progress.step("finalized") # Clear the status so it doesn't linger in the UI progress.clear_status() if successes > 0: return 0 return 1 @staticmethod def _try_emit_search_file_by_hashes( *, instance: str, hash_values: List[str], config: Dict[str, Any], store_instance: Optional[BackendRegistry] = None, ) -> Optional[List[Any]]: """Run search-file for a list of hashes and promote the table to a display overlay. Returns the emitted search-file payload items on success, else None. """ hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64] if not instance or not hashes: return None try: from cmdlet.file.search import CMDLET as search_file_cmdlet query = "hash:" + ",".join(hashes) args = ["-instance", str(instance), "-internal-refresh", query] debug(f'[add-file] Refresh: search-file -instance {instance} "{query}"') # Run search-file under a temporary stage context so its ctx.emit() calls # don't interfere with the outer add-file pipeline stage. prev_ctx = ctx.get_stage_context() temp_ctx = ctx.PipelineStageContext( stage_index=0, total_stages=1, pipe_index=0, worker_id=getattr(prev_ctx, "worker_id", None), ) ctx.set_stage_context(temp_ctx) try: code = search_file_cmdlet.run(None, args, config) emitted_items = list(getattr(temp_ctx, "emits", []) or []) finally: ctx.set_stage_context(prev_ctx) if code != 0: return None # Promote the search-file result to a display overlay so the CLI prints it # for action commands like add-file. stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if is_last: try: table = ctx.get_last_result_table() items = ctx.get_last_result_items() if table is not None and items: # If we have a single item refresh, render it as a panel immediately # and suppress the table output from the CLI runner. if len(items) == 1: try: from SYS.rich_display import render_item_details_panel render_item_details_panel(items[0]) setattr(table, "_rendered_by_cmdlet", True) except Exception as exc: debug(f"[add-file] Item details render failed: {exc}") publish_result_table( ctx, table, items, subject={ "store": instance, "hash": hashes }, overlay=True, ) except Exception: pass return emitted_items except Exception as exc: debug( f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}" ) return None @staticmethod def _parse_relationship_tag_king_alts( tag_value: str ) -> tuple[Optional[str], List[str]]: """Parse a relationship tag into (king_hash, alt_hashes). Supported formats: - New: relationship: ,, - Old: relationship: hash(king),hash(alt)... relationship: hash(king)KING,hash(alt)ALT For the local DB we treat the first hash listed as the king. """ if not isinstance(tag_value, str): return None, [] raw = tag_value.strip() if not raw: return None, [] # Normalize input: ensure we only look at the RHS after "relationship:" rhs = raw if ":" in raw: prefix, rest = raw.split(":", 1) if prefix.strip().lower() == "relationship": rhs = rest.strip() # Old typed format: hash(type)HEX typed = re.findall(r"hash\((\w+)\)?", rhs) if typed: king: Optional[str] = None alts: List[str] = [] for rel_type, h in typed: h_norm = str(h).strip().lower() if rel_type.strip().lower() == "king": king = h_norm elif rel_type.strip().lower() in {"alt", "related"}: alts.append(h_norm) # If the tag omitted king but had hashes, fall back to first hash. if not king: all_hashes = [str(h).strip().lower() for _, h in typed] king = all_hashes[0] if all_hashes else None alts = [h for h in all_hashes[1:] if h] # Dedupe alts while preserving order seen: set[str] = set() alts = [ h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h)) ] if king and len(king) == 64: return king, [h for h in alts if h != king] return None, [] # New format: a simple list of hashes, first is king. hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs) hashes = [h.strip().lower() for h in hashes if isinstance(h, str)] if not hashes: return None, [] king = hashes[0] alts = hashes[1:] seen2: set[str] = set() alts = [ h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h)) ] return king, [h for h in alts if h != king] @staticmethod def _parse_relationships_king_alts( relationships: Dict[str, Any], ) -> tuple[Optional[str], List[str]]: """Parse a PipeObject.relationships dict into (king_hash, alt_hashes). Supported shapes: - {"king": [KING], "alt": [ALT1, ALT2]} - {"king": KING, "alt": ALT} (strings) - Also treats "related" hashes as alts for persistence purposes. """ if not isinstance(relationships, dict) or not relationships: return None, [] def _first_hash(val: Any) -> Optional[str]: if isinstance(val, str): h = val.strip().lower() return h if len(h) == 64 else None if isinstance(val, list): for item in val: if isinstance(item, str): h = item.strip().lower() if len(h) == 64: return h return None def _many_hashes(val: Any) -> List[str]: out: List[str] = [] if isinstance(val, str): h = val.strip().lower() if len(h) == 64: out.append(h) elif isinstance(val, list): for item in val: if isinstance(item, str): h = item.strip().lower() if len(h) == 64: out.append(h) return out king = _first_hash(relationships.get("king")) if not king: return None, [] alts = _many_hashes(relationships.get("alt")) alts.extend(_many_hashes(relationships.get("related"))) seen: set[str] = set() alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))] return king, alts @staticmethod def _apply_pending_relationships( pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any], store_instance: Optional[BackendRegistry] = None, deps: Optional[_CommandDependencies] = None, ) -> None: """Persist relationships to backends that support relationships. This delegates to an optional backend method: `set_relationship(alt, king, kind)`. """ if not pending: return if deps is None: deps = _CommandDependencies(config) try: backend_registry = store_instance if store_instance is not None else deps.get_backend_registry() except Exception: return for backend_name, pairs in pending.items(): if not pairs: continue try: backend = backend_registry[str(backend_name)] except Exception: continue if not bool(getattr(backend, "supports_relationship_association", False)): continue setter = getattr(backend, "set_relationship", None) if not callable(setter): continue processed_pairs: set[tuple[str, str]] = set() for alt_hash, king_hash in sorted(pairs): if not alt_hash or not king_hash or alt_hash == king_hash: continue if (alt_hash, king_hash) in processed_pairs: continue alt_norm = str(alt_hash).strip().lower() king_norm = str(king_hash).strip().lower() if len(alt_norm) != 64 or len(king_norm) != 64: continue try: setter(alt_norm, king_norm, "alt") processed_pairs.add((alt_hash, king_hash)) except Exception: continue @staticmethod def _maybe_download_backend_file( backend: Any, file_hash: str, pipe_obj: models.PipeObject, *, output_dir: Optional[Path] = None, ) -> Tuple[Optional[Path], Optional[Path]]: """Best-effort fetch of a backend file when get_file returns a URL. Returns (downloaded_path, temp_dir_to_cleanup). """ downloader = getattr(backend, "download_to_temp", None) if not callable(downloader): return None, None tmp_dir: Optional[Path] = None try: # Extract suffix from pipe_obj path to avoid .tmp rejections suffix = None if pipe_obj.path: try: suffix = Path(pipe_obj.path).suffix except Exception: pass # Extract suffix from metadata if available (fallback) if not suffix: metadata = getattr(pipe_obj, "metadata", {}) if isinstance(metadata, dict): suffix = metadata.get("ext") download_root = output_dir if download_root is None: tmp_dir = Path(tempfile.mkdtemp(prefix="add-file-src-")) download_root = tmp_dir if download_root is None: return None, None # Introspect downloader to pass supported args. import inspect sig = inspect.signature(downloader) kwargs = {"temp_root": download_root} if "suffix" in sig.parameters: kwargs["suffix"] = suffix pipeline_progress = PipelineProgress(ctx) transfer_label = "peer transfer" try: transfer_label = str(getattr(pipe_obj, "title", "") or "").strip() or transfer_label except Exception: transfer_label = "peer transfer" if "pipeline_progress" in sig.parameters: kwargs["pipeline_progress"] = pipeline_progress if "transfer_label" in sig.parameters: kwargs["transfer_label"] = transfer_label if "progress_callback" in sig.parameters: def _cb(done, total): try: total_val = int(total) if total is not None else None except Exception: total_val = None try: if int(done or 0) <= 0: pipeline_progress.begin_transfer( label=transfer_label, total=total_val, ) except Exception: pass try: pipeline_progress.update_transfer( label=transfer_label, completed=int(done or 0), total=total_val, ) except Exception: pass kwargs["progress_callback"] = _cb downloaded = downloader(str(file_hash), **kwargs) if isinstance(downloaded, Path) and downloaded.exists(): if output_dir is not None: pipe_obj.is_temp = False if isinstance(pipe_obj.extra, dict): pipe_obj.extra["_direct_export_download"] = True else: pipe_obj.extra = {"_direct_export_download": True} return downloaded, None pipe_obj.is_temp = True return downloaded, tmp_dir except Exception: pass if tmp_dir is not None: try: shutil.rmtree(tmp_dir, ignore_errors=True) except Exception: pass return None, None @staticmethod def _download_remote_backend_url( remote_url: str, pipe_obj: models.PipeObject, *, file_hash: Optional[str] = None, output_dir: Optional[Path] = None, ) -> Tuple[Optional[Path], Optional[Path]]: """Best-effort fetch of a remote backend URL. Returns (downloaded_path, temp_dir_to_cleanup). When ``output_dir`` is provided, the file is downloaded directly there and no temp cleanup path is returned. """ url_text = str(remote_url or "").strip() if not url_text: return None, None if not url_text.lower().startswith(_REMOTE_URL_PREFIXES): return None, None # This helper performs generic HTTP downloads only. # Non-HTTP schemes (e.g. hydrus://, tidal:) should be handled by # plugin-specific resolvers via _maybe_download_plugin_result. if not url_text.lower().startswith(("http://", "https://")): return None, None tmp_dir: Optional[Path] = None try: download_root = output_dir if download_root is None: tmp_dir = Path(tempfile.mkdtemp(prefix="add-file-src-")) download_root = tmp_dir suggested_name = Add_File._build_provider_filename( pipe_obj, fallback_hash=file_hash, source_url=url_text, ) pipeline_progress = PipelineProgress(ctx) try: destination_label = str(download_root) if download_root is not None else "temporary workspace" pipeline_progress.set_status(f"downloading {suggested_name} to {destination_label}") except Exception: pass downloaded = download_direct_file( url_text, download_root, quiet=False, suggested_filename=suggested_name, pipeline_progress=pipeline_progress, ) downloaded_path = getattr(downloaded, "path", None) if isinstance(downloaded_path, Path) and downloaded_path.exists(): if output_dir is not None: pipe_obj.is_temp = False if isinstance(pipe_obj.extra, dict): pipe_obj.extra["_direct_export_download"] = True else: pipe_obj.extra = {"_direct_export_download": True} return downloaded_path, None pipe_obj.is_temp = True return downloaded_path, tmp_dir except Exception: pass finally: try: PipelineProgress(ctx).clear_status() except Exception: pass if tmp_dir is not None: try: shutil.rmtree(tmp_dir, ignore_errors=True) except Exception: pass return None, None @staticmethod def _build_provider_filename( pipe_obj: models.PipeObject, fallback_hash: Optional[str] = None, source_url: Optional[str] = None, ) -> str: title_candidates: List[str] = [] title_value = getattr(pipe_obj, "title", "") if title_value: title_candidates.append(str(title_value)) extra = getattr(pipe_obj, "extra", {}) if isinstance(extra, dict): candid = extra.get("name") or extra.get("title") if candid: title_candidates.append(str(candid)) metadata = getattr(pipe_obj, "metadata", {}) if isinstance(metadata, dict): meta_name = metadata.get("title") or metadata.get("name") if meta_name: title_candidates.append(str(meta_name)) text = "" for candidate in title_candidates: if candidate: text = candidate.strip() if text: break if not text and fallback_hash: text = fallback_hash[:8] safe_name = sanitize_filename(text or "download") ext = "" if isinstance(metadata, dict): ext = metadata.get("ext") or metadata.get("extension") or "" if not ext and isinstance(extra, dict): ext = extra.get("ext") or "" if not ext and source_url: try: parsed = urlparse(source_url) ext = Path(parsed.path).suffix.lstrip(".") except Exception: ext = "" if ext: ext_text = str(ext) if not ext_text.startswith("."): ext_text = "." + ext_text.lstrip(".") if not safe_name.lower().endswith(ext_text.lower()): safe_name = f"{safe_name}{ext_text}" return safe_name or "download" @staticmethod def _resolve_backend_by_name(instance: Any, backend_name: str) -> Optional[Any]: if not instance or not backend_name: return None try: return instance[backend_name] except Exception: pass target = str(backend_name or "").strip().lower() if not target: return None try: for candidate in instance.list_backends(): if isinstance(candidate, str) and candidate.strip().lower() == target: try: return instance[candidate] except Exception: continue except Exception: pass return None @staticmethod def _resolve_source( result: Any, source_arg: Optional[str], pipe_obj: models.PipeObject, config: Dict[str, Any], export_destination: Optional[Path] = None, store_instance: Optional[Any] = None, deps: Optional[_CommandDependencies] = None, ) -> Tuple[Optional[Path], Optional[str], Optional[Path]]: """Resolve the source file path from the positional source arg or pipeline result. Returns (media_path, file_hash, temp_dir_to_cleanup). """ # PRIORITY 1a: Prefer an explicit local path when it exists. # This avoids unnecessary backend.get_file(hash) probes (and remote fallbacks) # for freshly downloaded temp files that are not yet present in the source store. if isinstance(result, dict): r_path = result.get("path") r_hash = result.get("hash") if r_path: try: p = coerce_to_path(r_path) if p.exists() and p.is_file(): pipe_obj.path = str(p) return p, str(r_hash) if r_hash else None, None except Exception: pass # PRIORITY 1b: Try hash+store from result (fetch from backend) r_hash = get_field(result, "hash") or get_field(result, "file_hash") r_store = get_field(result, "store") if r_hash and r_store: try: if deps is None: deps = _CommandDependencies(config) backend_registry = store_instance or deps.get_backend_registry() backend = Add_File._resolve_backend_by_name(backend_registry, r_store) if backend is not None: mp = backend.get_file(r_hash) if isinstance(mp, Path) and mp.exists(): pipe_obj.path = str(mp) return mp, str(r_hash), None if isinstance(mp, str) and mp.strip(): try: mp_path = Path(str(mp)) except Exception: mp_path = None if mp_path is not None and mp_path.exists() and mp_path.is_file(): pipe_obj.path = str(mp_path) return mp_path, str(r_hash), None dl_path, tmp_dir = Add_File._maybe_download_backend_file( backend, str(r_hash), pipe_obj, output_dir=export_destination, ) if dl_path and dl_path.exists(): pipe_obj.path = str(dl_path) return dl_path, str(r_hash), tmp_dir dl_path, tmp_dir = Add_File._download_remote_backend_url( str(mp), pipe_obj, file_hash=str(r_hash), output_dir=export_destination, ) if dl_path and dl_path.exists(): pipe_obj.path = str(dl_path) return dl_path, str(r_hash), tmp_dir except Exception as exc: debug(f"[add-file] _resolve_source backend fetch failed for {r_store}/{r_hash}: {exc}") # PRIORITY 2: Generic Coercion (Path arg > PipeObject > Result) candidate: Optional[Path] = None if source_arg: candidate = Path(source_arg) elif pipe_obj.path: candidate = Path(pipe_obj.path) if not candidate: # Unwrap list if needed obj = result[0] if isinstance(result, list) and result else result if obj: try: candidate = coerce_to_path(obj) except ValueError: pass if candidate: s = str(candidate).lower() if s.startswith(_REMOTE_URL_PREFIXES): # For remote sources, prefer plugin-specific resolvers first # (e.g. hydrus://), then generic HTTP fallback. downloaded_path, hash_hint, tmp_dir = Add_File._maybe_download_plugin_result( result, pipe_obj, config, deps=deps, ) if downloaded_path: pipe_obj.path = str(downloaded_path) return downloaded_path, hash_hint, tmp_dir dl_path, tmp_dir = Add_File._download_remote_backend_url( str(candidate), pipe_obj, file_hash=get_field(result, "hash") or get_field(result, "file_hash"), output_dir=export_destination, ) if dl_path: pipe_obj.path = str(dl_path) hash_hint = get_field(result, "hash") or get_field(result, "file_hash") return dl_path, hash_hint, tmp_dir log("add-file could not auto-fetch remote source. Use download-file first.", file=sys.stderr) return None, None, None pipe_obj.path = str(candidate) # Retain hash from input if available to avoid re-hashing hash_hint = get_field(result, "hash") or get_field(result, "file_hash") or getattr(pipe_obj, "hash", None) return candidate, hash_hint, None downloaded_path, hash_hint, tmp_dir = Add_File._maybe_download_plugin_result( result, pipe_obj, config, deps=deps, ) if downloaded_path: pipe_obj.path = str(downloaded_path) return downloaded_path, hash_hint, tmp_dir debug(f"No resolution path matched. result type={type(result).__name__}") log("File path could not be resolved") return None, None, None @staticmethod def _normalize_provider_key(value: Optional[Any]) -> Optional[str]: if value is None: return None try: normalized = str(value).strip().lower() except Exception: return None if not normalized: return None if "." in normalized: normalized = normalized.split(".", 1)[0] return normalized @staticmethod def validate_preflight_args( args: Sequence[str], config: Optional[Dict[str, Any]] = None, ) -> Optional[str]: cfg = config if isinstance(config, dict) else {} if Add_File._uses_legacy_path_flag(args): return f"Pipeline error: {Add_File._legacy_path_flag_message()}" try: parsed = parse_cmdlet_args(args, CMDLET) except Exception as exc: return f"Pipeline error: invalid add-file arguments: {exc}" deps = _CommandDependencies(cfg) storage_registry = deps.get_backend_registry() location = parsed.get("instance") plugin_instance = parsed.get("instance") plugin_name = parsed.get("plugin") is_storage_backend_location = False if location: try: backend_registry_for_lookup = storage_registry or deps.get_backend_registry() is_storage_backend_location = Add_File._resolve_backend_by_name( backend_registry_for_lookup, str(location), ) is not None except Exception: is_storage_backend_location = False if location and not plugin_name and not is_storage_backend_location: resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target( location, cfg, deps=deps, require_explicit=True, ) if resolved_local_path: return None return ( f"Pipeline error: storage backend '{location}' not found. " "Use -plugin local -instance for local export or configure that store backend." ) normalized_plugin_name = Add_File._normalize_provider_key(plugin_name) if normalized_plugin_name: upload_plugin = deps.get_plugin_with_capability(normalized_plugin_name, "upload") if upload_plugin is None: plugin_exists = deps.get_plugin(normalized_plugin_name) is not None if plugin_exists: if normalized_plugin_name == "loc": return ( "Pipeline error: plugin 'loc' does not support add-file/upload. " "Use -plugin local -instance for local export." ) return f"Pipeline error: plugin '{normalized_plugin_name}' does not support add-file/upload." return f"Pipeline error: unknown upload plugin '{plugin_name}'." if normalized_plugin_name == "local": requested_local = str(plugin_instance or location or "").strip() or "" resolved_local_instance, resolved_local_path = Add_File._resolve_local_export_plugin_target( plugin_instance or location, cfg, deps=deps, require_explicit=bool(plugin_instance or location), ) if not resolved_local_path: return ( f"Pipeline error: local destination '{requested_local}' is not configured. " "Use -plugin local -instance ." ) return None @staticmethod def _resolve_plugin_storage_backend( plugin_name: Optional[Any], instance_name: Optional[Any], config: Dict[str, Any], *, store_instance: Optional[Any] = None, deps: Optional[_CommandDependencies] = None, ) -> Optional[str]: plugin_key = Add_File._normalize_provider_key(plugin_name) if not plugin_key: return None if deps is None: deps = _CommandDependencies(config) file_provider = deps.get_plugin_with_capability(plugin_key, "upload") if file_provider is None: return None resolver = getattr(file_provider, "resolve_backend", None) if not callable(resolver): return None explicit_instance = str(instance_name or "").strip() or None try: backend_registry = store_instance if store_instance is not None else BackendRegistry(config) except Exception: backend_registry = None try: resolved_name, backend = resolver( explicit_instance, storage=backend_registry, require_explicit=bool(explicit_instance), ) except TypeError: try: resolved_name, backend = resolver(explicit_instance) except Exception: return None except Exception: return None if backend is None: return None resolved_text = str(resolved_name or explicit_instance or "").strip() if not resolved_text: return None checker = getattr(file_provider, "is_backend", None) if callable(checker): try: if not checker(backend, resolved_text): return None except Exception: return None return resolved_text @staticmethod def _resolve_local_export_plugin_target( requested: Optional[Any], config: Dict[str, Any], *, deps: Optional[_CommandDependencies] = None, require_explicit: bool = False, ) -> tuple[Optional[str], Optional[str]]: if deps is None: deps = _CommandDependencies(config) file_provider = deps.get_plugin_with_capability("local", "upload") if file_provider is None: return None, None resolver = getattr(file_provider, "resolve_destination", None) if not callable(resolver): return None, None requested_text = str(requested or "").strip() or None try: resolved_name, settings = resolver( requested_text, require_explicit=require_explicit, ) except TypeError: try: resolved_name, settings = resolver(requested_text) except Exception: return None, None except Exception: return None, None path_value = str((settings or {}).get("path") or "").strip() if not path_value: return None, None resolved_text = str(resolved_name or requested_text or "").strip() or None return resolved_text, path_value @staticmethod def _maybe_download_plugin_result( result: Any, pipe_obj: models.PipeObject, config: Dict[str, Any], deps: Optional[_CommandDependencies] = None, ) -> Tuple[Optional[Path], Optional[str], Optional[Path]]: plugin_key = None for source in ( pipe_obj.plugin, get_field(result, "plugin"), get_field(result, "table"), ): candidate = Add_File._normalize_provider_key(source) if candidate: plugin_key = candidate break if not plugin_key: return None, None, None if deps is None: deps = _CommandDependencies(config) plugin = deps.get_plugin(plugin_key) if plugin is None: return None, None, None try: return plugin.resolve_pipe_result_download(result, pipe_obj) except Exception as exc: debug(f"[add-file] Plugin '{plugin_key}' download helper failed: {exc}") return None, None, None @staticmethod def _download_piped_source( pipe_obj: models.PipeObject, config: Dict[str, Any], store_instance: Optional[Any], deps: Optional[_CommandDependencies] = None, ) -> Tuple[Optional[Path], Optional[str], Optional[Path]]: r_hash = str(getattr(pipe_obj, "hash", None) or getattr(pipe_obj, "file_hash", None) or "").strip() r_store = str(getattr(pipe_obj, "store", None) or "").strip() if not (r_hash and r_store): return None, None, None if deps is None: deps = _CommandDependencies(config) backend_registry = store_instance or deps.get_backend_registry() backend = Add_File._resolve_backend_by_name(backend_registry, r_store) if backend_registry is not None else None if backend is None: return None, None, None try: source = backend.get_file(r_hash.lower()) if isinstance(source, Path) and source.exists(): pipe_obj.path = str(source) return source, str(r_hash), None if isinstance(source, str) and source.strip(): dl_path, tmp_dir = Add_File._maybe_download_backend_file( backend, str(r_hash), pipe_obj ) if dl_path and dl_path.exists(): return dl_path, str(r_hash), tmp_dir source_url = str(source).strip() if source_url.lower().startswith(("http://", "https://")): download_dir = Path(tempfile.mkdtemp(prefix="add-file-src-")) try: filename = Add_File._build_provider_filename( pipe_obj, str(r_hash), source_url, ) downloaded = download_direct_file( source_url, download_dir, quiet=True, suggested_filename=filename, ) downloaded_path = downloaded.path if downloaded_path and downloaded_path.exists(): pipe_obj.is_temp = True pipe_obj.path = str(downloaded_path) return downloaded_path, str(r_hash), download_dir except Exception as exc: debug(f"[add-file] Provider download failed: {exc}") try: shutil.rmtree(download_dir, ignore_errors=True) except Exception: pass except Exception: pass return None, None, None @staticmethod def _scan_directory_for_files(directory: Path, compute_hash: bool = True) -> List[Dict[str, Any]]: """Scan a directory for supported media files and return list of file info dicts. Each dict contains: - path: Path object - name: filename - hash: sha256 hash (or None if compute_hash=False) - size: file size in bytes - ext: file extension """ if not directory.exists() or not directory.is_dir(): return [] files_info: List[Dict[str, Any]] = [] try: for item in directory.iterdir(): if not item.is_file(): continue ext = item.suffix.lower() if ext not in SUPPORTED_MEDIA_EXTENSIONS: continue file_hash = None # Compute hash if requested (computing can be expensive for large dirs) if compute_hash: try: file_hash = sha256_file(item) except Exception as exc: debug(f"Failed to hash {item}: {exc}") # If hashing is required, skip this file; otherwise include without hash continue # Get file size try: size = item.stat().st_size except Exception: size = 0 files_info.append( { "path": item, "name": item.name, "hash": file_hash, "size": size, "ext": ext, } ) except Exception as exc: debug(f"Error scanning directory {directory}: {exc}") return files_info @staticmethod def _validate_source(media_path: Optional[Path], allow_all_extensions: bool = False) -> bool: """Validate that the source file exists and is supported. Args: media_path: Path to the file to validate allow_all_extensions: If True, skip file type filtering for non-backend exports. If False, only allow SUPPORTED_MEDIA_EXTENSIONS for backend ingest. """ if media_path is None: return False if not media_path.exists() or not media_path.is_file(): log(f"File not found: {media_path}") return False # Validate file type only when ingesting into a storage backend. if not allow_all_extensions: file_extension = media_path.suffix.lower() if file_extension not in SUPPORTED_MEDIA_EXTENSIONS: log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr) return False return True @staticmethod def _is_probable_url(s: Any) -> bool: """Check if a string looks like a URL/magnet/identifier (vs a tag/title).""" if not isinstance(s, str): return False val = s.strip().lower() if not val: return False # Obvious schemes if val.startswith(_REMOTE_URL_PREFIXES): return True # Domain-like patterns or local file paths (but we want URLs here) if "://" in val: return True # Hydrus hash-like search queries are NOT urls if val.startswith("hash:"): return False return False @staticmethod def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]: """Extract valid URLs from pipe object or result dict.""" from SYS.metadata import normalize_urls candidates: List[str] = [] # 1. Prefer explicit PipeObject top-level field if pipe_obj.url: candidates.append(pipe_obj.url) if pipe_obj.source_url: candidates.append(pipe_obj.source_url) # 2. Check extra and metadata fields if isinstance(pipe_obj.extra, dict): u = pipe_obj.extra.get("url") if isinstance(u, list): candidates.extend(str(x) for x in u if x) elif isinstance(u, str): candidates.append(u) # 3. Check result (which might be a dict or another PipeObject) raw_from_result = extract_url_from_result(result) if raw_from_result: candidates.extend(raw_from_result) # 4. Normalize and filter: MUST look like a URL to avoid tag leakage normalized = normalize_urls(candidates) return [u for u in normalized if Add_File._is_probable_url(u)] @staticmethod def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]: try: rels = pipe_obj.get_relationships() if rels: return rels except Exception: pass if isinstance(result, dict) and result.get("relationships"): return result.get("relationships") try: return extract_relationships(result) except Exception: return None @staticmethod def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]: def _parse_duration(value: Any) -> Optional[float]: if value is None: return None if isinstance(value, (int, float)): return float(value) if value > 0 else None if isinstance(value, str): s = value.strip() if not s: return None try: candidate = float(s) return candidate if candidate > 0 else None except ValueError: pass if ":" in s: parts = [p.strip() for p in s.split(":") if p.strip()] if len(parts) in {2, 3} and all(p.isdigit() for p in parts): nums = [int(p) for p in parts] if len(nums) == 2: minutes, seconds = nums return float(minutes * 60 + seconds) hours, minutes, seconds = nums return float(hours * 3600 + minutes * 60 + seconds) return None parsed = _parse_duration(getattr(pipe_obj, "duration", None)) if parsed is not None: return parsed try: return _parse_duration(extract_duration(result)) except Exception: return None @staticmethod def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]: """Extract a named note text from a piped item. Supports: - pipe_obj.extra["notes"][note_name] - result["notes"][note_name] for dict results - pipe_obj.extra[note_name] / result[note_name] as fallback """ def _normalize(val: Any) -> Optional[str]: if val is None: return None if isinstance(val, bytes): try: val = val.decode("utf-8", errors="ignore") except Exception: val = str(val) if isinstance(val, str): text = val.strip() return text if text else None try: text = str(val).strip() return text if text else None except Exception: return None note_key = str(note_name or "").strip() if not note_key: return None # Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts) try: if isinstance(pipe_obj.extra, dict): notes_val = pipe_obj.extra.get("notes") if isinstance(notes_val, dict) and note_key in notes_val: return _normalize(notes_val.get(note_key)) if note_key in pipe_obj.extra: return _normalize(pipe_obj.extra.get(note_key)) except Exception: pass # Fallback to raw result dict if isinstance(result, dict): try: notes_val = result.get("notes") if isinstance(notes_val, dict) and note_key in notes_val: return _normalize(notes_val.get(note_key)) if note_key in result: return _normalize(result.get(note_key)) except Exception: pass return None @staticmethod def _update_pipe_object_destination( pipe_obj: models.PipeObject, *, hash_value: str, store: str, plugin: Optional[str] = None, path: Optional[str], tag: List[str], title: Optional[str], extra_updates: Optional[Dict[str, Any]] = None, ) -> None: pipe_obj.hash = hash_value pipe_obj.store = store pipe_obj.plugin = plugin pipe_obj.is_temp = False pipe_obj.path = path pipe_obj.tag = tag if title: pipe_obj.title = title if isinstance(pipe_obj.extra, dict): pipe_obj.extra.update(extra_updates or {}) else: pipe_obj.extra = dict(extra_updates or {}) @staticmethod def _emit_pipe_object(pipe_obj: models.PipeObject) -> None: payload = pipe_obj.to_dict() ctx.emit(payload) ctx.set_current_stage_table(None) stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if not is_last: return try: Add_File._stop_live_progress_for_terminal_render() from .._shared import display_and_persist_items display_and_persist_items([payload], title="Result", subject=payload) except Exception: pass @staticmethod def _stop_live_progress_for_terminal_render() -> None: try: live_progress = ctx.get_live_progress() except Exception: live_progress = None if live_progress is None: return try: stage_ctx = ctx.get_stage_context() pipe_idx = getattr(stage_ctx, "pipe_index", None) if isinstance(pipe_idx, int): live_progress.finish_pipe(int(pipe_idx), force_complete=True) except Exception: pass try: live_progress.stop() except Exception: pass try: if hasattr(ctx, "set_live_progress"): ctx.set_live_progress(None) except Exception: pass @staticmethod def _emit_storage_result( payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True ) -> None: """Emit a storage-style result payload. - Always emits the dict downstream (when in a pipeline). - If this is the last stage (or not in a pipeline), prints a search-file-like table and sets an overlay table/items for @N selection. """ # Emit for downstream commands (no-op if not in a pipeline) if emit: ctx.emit(payload) stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if not is_last or not overlay: return try: from SYS.result_table import Table table = Table("Result") table.add_result(payload) # Overlay so @1 refers to this add-file result without overwriting search history publish_result_table(ctx, table, [payload], subject=payload, overlay=True) except Exception: # If table rendering fails, still keep @ selection items try: ctx.set_last_result_items_only([payload]) except Exception: pass @staticmethod def _try_emit_search_file_by_hash( *, instance: str, hash_value: str, config: Dict[str, Any] ) -> Optional[List[Any]]: """Run search-file for a single hash so the final table/payload is consistent. Important: `add-file` is treated as an action command by the CLI, so the CLI only prints tables for it when a display overlay exists. After running search-file, this copies the resulting table into the display overlay (when this is the last stage) so the canonical store table is what the user sees and can select from. Returns the emitted search-file payload items on success, else None. """ try: from cmdlet.file.search import CMDLET as search_file_cmdlet args = ["-instance", str(instance), f"hash:{str(hash_value)}"] # Run search-file under a temporary stage context so its ctx.emit() calls # don't interfere with the outer add-file pipeline stage. prev_ctx = ctx.get_stage_context() temp_ctx = ctx.PipelineStageContext( stage_index=0, total_stages=1, pipe_index=0, worker_id=getattr(prev_ctx, "worker_id", None), ) ctx.set_stage_context(temp_ctx) try: code = search_file_cmdlet.run(None, args, config) emitted_items = list(getattr(temp_ctx, "emits", []) or []) finally: ctx.set_stage_context(prev_ctx) if code != 0: return None # Promote the search-file result to a display overlay so the CLI prints it # for action commands like add-file. stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if is_last: try: table = ctx.get_last_result_table() items = ctx.get_last_result_items() overlay_existing_result_table( ctx, subject={ "store": instance, "hash": hash_value }, ) except Exception: pass return emitted_items except Exception as exc: debug( f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}" ) return None @staticmethod def _prepare_metadata( result: Any, media_path: Path, pipe_obj: models.PipeObject, config: Dict[str, Any], ) -> Tuple[List[str], List[str], Optional[str], Optional[str]]: """ Prepare tags, url, and title for the file. Returns (tags, url, preferred_title, file_hash) """ tags_from_result = list(pipe_obj.tag or []) if not tags_from_result: try: tags_from_result = list(extract_tag_from_result(result) or []) except Exception: tags_from_result = [] url_from_result = Add_File._get_url(result, pipe_obj) preferred_title = pipe_obj.title if not preferred_title: for t in tags_from_result: if str(t).strip().lower().startswith("title:"): candidate = t.split(":", 1)[1].strip().replace("_", " ").strip() if candidate: preferred_title = candidate break if not preferred_title: preferred_title = extract_title_from_result(result) if preferred_title: preferred_title = preferred_title.replace("_", " ").strip() store = getattr(pipe_obj, "store", None) _, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle( media_path, store, config ) def normalize_title_tag(tag: str) -> str: if str(tag).strip().lower().startswith("title:"): parts = tag.split(":", 1) if len(parts) == 2: value = parts[1].replace("_", " ").strip() return f"title:{value}" return tag tags_from_result_no_title = [ t for t in tags_from_result if not str(t).strip().lower().startswith("title:") ] sidecar_tags = collapse_namespace_tags( [normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last" ) sidecar_tags_filtered = [ t for t in sidecar_tags if not str(t).strip().lower().startswith("title:") ] merged_tags = merge_sequences( tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True ) if preferred_title: merged_tags.append(f"title:{preferred_title}") merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False) # Final safety filter: ensures no tags/titles leaked into URL list merged_url = [u for u in merged_url if Add_File._is_probable_url(u)] file_hash = Add_File._resolve_file_hash( result, media_path, pipe_obj, sidecar_hash ) # Relationships must not be stored as tags. # If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships # and strip them from the final tag list. relationship_tags = [ t for t in merged_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:") ] if relationship_tags: try: if (not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships): king: Optional[str] = None alts: List[str] = [] for rel_tag in relationship_tags: k, a = Add_File._parse_relationship_tag_king_alts(rel_tag) if k and not king: king = k if a: alts.extend(a) if king: seen_alt: set[str] = set() alts = [ h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h)) ] payload: Dict[str, Any] = { "king": [king] } if alts: payload["alt"] = alts pipe_obj.relationships = payload except Exception: pass merged_tags = [ t for t in merged_tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:")) ] # Persist back to PipeObject pipe_obj.tag = merged_tags if preferred_title and not pipe_obj.title: pipe_obj.title = preferred_title if file_hash and not pipe_obj.hash: pipe_obj.hash = file_hash if isinstance(pipe_obj.extra, dict): # Update (don't setdefault) to ensure URLs matched from sidecars or source stores are tracked pipe_obj.extra["url"] = merged_url return merged_tags, merged_url, preferred_title, file_hash @staticmethod def _normalize_hash_candidate(value: Any) -> str: text = str(value or "").strip().lower() if len(text) != 64: return "" if any(ch not in "0123456789abcdef" for ch in text): return "" return text @staticmethod def _find_existing_hash_by_urls( backend: Any, urls: Sequence[str], ) -> Optional[str]: """Best-effort duplicate detection by URL before ingesting file bytes.""" url_candidates: List[str] = [] for raw in urls or []: text = str(raw or "").strip() if not text or not Add_File._is_probable_url(text): continue if text not in url_candidates: url_candidates.append(text) if not url_candidates: return None lookup_exact = getattr(backend, "find_hashes_by_url", None) if callable(lookup_exact): for candidate_url in url_candidates: try: hashes = lookup_exact(candidate_url) or [] except Exception: continue if not isinstance(hashes, (list, tuple, set)): continue for item in hashes: normalized = Add_File._normalize_hash_candidate(item) if normalized: return normalized searcher = getattr(backend, "search", None) if callable(searcher): for candidate_url in url_candidates: try: hits = searcher(f"url:{candidate_url}", limit=1, minimal=True) or [] except Exception: continue if not isinstance(hits, list) or not hits: continue hit = hits[0] for key in ("hash", "file_hash", "sha256"): normalized = Add_File._normalize_hash_candidate(get_field(hit, key)) if normalized: return normalized return None @staticmethod def _emit_plugin_upload_payload( upload_payload: Dict[str, Any], plugin_name: str, instance_name: Optional[str], pipe_obj: models.PipeObject, media_path: Path, delete_after: bool, ) -> int: payload = dict(upload_payload or {}) extra_updates: Dict[str, Any] = {} raw_extra = payload.get("extra") if isinstance(raw_extra, dict): extra_updates.update(raw_extra) if plugin_name: extra_updates.setdefault("plugin", plugin_name) if instance_name: extra_updates.setdefault("instance", instance_name) raw_urls = payload.get("url") if isinstance(raw_urls, str): url_values = [raw_urls.strip()] if raw_urls.strip() else [] extra_updates["url"] = url_values elif isinstance(raw_urls, (list, tuple, set)): url_values = [str(item).strip() for item in raw_urls if str(item).strip()] extra_updates["url"] = url_values relationships = payload.get("relationships") if relationships: try: pipe_obj.relationships = relationships except Exception: pass tags = payload.get("tag") if isinstance(tags, list): tag_values = [str(tag) for tag in tags] else: tag_values = list(pipe_obj.tag or []) title_value = str(payload.get("title") or pipe_obj.title or media_path.name).strip() or media_path.name path_value = str(payload.get("path") or pipe_obj.path or media_path).strip() hash_value = str( payload.get("hash") or payload.get("file_hash") or getattr(pipe_obj, "hash", None) or "unknown" ).strip() or "unknown" store_value = str(payload.get("store") or "").strip() plugin_value = payload.get("plugin") if plugin_value is None and plugin_name: plugin_value = plugin_name Add_File._update_pipe_object_destination( pipe_obj, hash_value=hash_value, store=store_value, plugin=str(plugin_value) if plugin_value else None, path=path_value, tag=tag_values, title=title_value, extra_updates=extra_updates, ) Add_File._emit_pipe_object(pipe_obj) Add_File._cleanup_after_success(media_path, delete_source=delete_after) return 0 @staticmethod def _handle_plugin_upload( media_path: Path, plugin_name: str, instance_name: Optional[str], pipe_obj: models.PipeObject, config: Dict[str, Any], delete_after: bool, ) -> int: """Handle uploading via an upload plugin (e.g. 0x0).""" from PluginCore.registry import ( get_plugin_with_capability, list_plugin_names_with_capability, list_plugins_with_capability, ) try: file_provider = get_plugin_with_capability(plugin_name, "upload", config) if not file_provider: available_map = list_plugins_with_capability("upload", config) known_upload_plugins = set(list_plugin_names_with_capability("upload")) available_uploads = [name for name, enabled in available_map.items() if enabled and name in known_upload_plugins] if str(plugin_name or "").strip().lower() in known_upload_plugins: show_plugin_config_panel([plugin_name]) else: log(f"Upload plugin '{plugin_name}' is not available or does not support upload", file=sys.stderr) if available_uploads: show_available_plugins_panel(sorted(available_uploads)) return 1 upload_kwargs: Dict[str, Any] = { "pipe_obj": pipe_obj, "instance": instance_name, } pipeline_progress = PipelineProgress(ctx) normalized_plugin_name = Add_File._normalize_provider_key(plugin_name) f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None) if normalized_plugin_name == "local": result = None tags, urls, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config) relationships = Add_File._get_relationships(result, pipe_obj) direct_export_download = False try: if isinstance(pipe_obj.extra, dict): direct_export_download = bool(pipe_obj.extra.pop("_direct_export_download", False)) except Exception: direct_export_download = False upload_kwargs.update( { "title": title, "tags": tags, "urls": urls, "hash_value": f_hash, "relationships": relationships, "direct_export_download": direct_export_download, "pipeline_progress": pipeline_progress, } ) upload_result = file_provider.upload( str(media_path), **upload_kwargs, ) duplicate_upload = False duplicate_rule = "" duplicate_target = "" try: if isinstance(getattr(pipe_obj, "extra", None), dict): duplicate_upload = bool(pipe_obj.extra.get("upload_duplicate")) duplicate_rule = str(pipe_obj.extra.get("upload_duplicate_rule") or "").strip() duplicate_target = str(pipe_obj.extra.get("upload_duplicate_target") or "").strip() except Exception: duplicate_upload = False duplicate_rule = "" duplicate_target = "" except Exception as exc: log(f"Upload failed: {exc}", file=sys.stderr) return 1 if isinstance(upload_result, dict): return Add_File._emit_plugin_upload_payload( upload_result, plugin_name, instance_name, pipe_obj, media_path, delete_after, ) hoster_url = str(upload_result or "").strip() # Update PipeObject and emit extra_updates: Dict[str, Any] = { "plugin": plugin_name, "instance": instance_name, "plugin_url": hoster_url, } if isinstance(pipe_obj.extra, dict): # Also track hoster URL as a url for downstream steps existing_known = list(pipe_obj.extra.get("url") or []) if hoster_url and hoster_url not in existing_known: existing_known.append(hoster_url) extra_updates["url"] = existing_known file_path = pipe_obj.path or (str(media_path) if media_path else None) or "" Add_File._update_pipe_object_destination( pipe_obj, hash_value=f_hash or "unknown", store="", plugin=plugin_name or None, path=file_path, tag=pipe_obj.tag, title=pipe_obj.title or (media_path.name if media_path else None), extra_updates=extra_updates, ) Add_File._emit_pipe_object(pipe_obj) Add_File._cleanup_after_success(media_path, delete_source=delete_after) return 0 @staticmethod def _handle_storage_backend( result: Any, media_path: Path, backend_name: str, pipe_obj: models.PipeObject, config: Dict[str, Any], delete_after: bool, *, collect_payloads: Optional[List[Dict[str, Any]]] = None, collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None, defer_url_association: bool = False, pending_url_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None, defer_tag_association: bool = False, pending_tag_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None, suppress_last_stage_overlay: bool = False, auto_search_file: bool = True, store_instance: Optional[BackendRegistry] = None, ) -> int: """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.).""" ##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr) pipeline_progress = PipelineProgress(ctx) def _set_status(text: str) -> None: try: pipeline_progress.set_status(f"{backend_name}: {text}") except Exception: pass def _clear_status() -> None: try: pipeline_progress.clear_status() except Exception: pass delete_after_effective = bool(delete_after) # ... (lines omitted for brevity but I need to keep them contextually correct) if not delete_after_effective: # When download-media is piped into add-file, the downloaded artifact is a temp file. # After it is persisted to a storage backend, delete the temp copy to avoid duplicates. try: if (str(backend_name or "").strip().lower() != "temp" and getattr(pipe_obj, "is_temp", False) and getattr(pipe_obj, "action", None) == "cmdlet:download-media"): from SYS.config import resolve_output_dir temp_dir = resolve_output_dir(config) try: if media_path.resolve().is_relative_to( temp_dir.expanduser().resolve()): delete_after_effective = True debug( f"[add-file] Auto-delete temp source after ingest: {media_path}" ) except Exception: # If path resolution fails, fall back to non-destructive behavior pass except Exception: pass try: backend_registry = store_instance if store_instance is not None else BackendRegistry(config) backend, backend_registry, backend_exc = sh.get_preferred_store_backend( config, backend_name, store_registry=backend_registry, suppress_debug=True, ) if backend is None: raise backend_exc or KeyError(f"Unknown store backend: {backend_name}") # Use backend properties to drive metadata deferral behavior. is_remote_backend = getattr(backend, "is_remote", False) prefer_defer_tags = getattr(backend, "prefer_defer_tags", False) supports_url_association = bool(getattr(backend, "supports_url_association", False)) supports_note_association = bool(getattr(backend, "supports_note_association", False)) supports_relationship_association = bool(getattr(backend, "supports_relationship_association", False)) # ... # Prepare metadata from pipe_obj and sidecars tags, url, title, f_hash = Add_File._prepare_metadata( result, media_path, pipe_obj, config ) # If we're moving/copying from one store to another, also copy the source store's # existing associated URLs so they aren't lost. try: from SYS.metadata import normalize_urls source_store = None source_hash = None if isinstance(result, dict): source_store = result.get("store") source_hash = result.get("hash") if not source_store: source_store = getattr(pipe_obj, "store", None) if not source_hash: source_hash = getattr(pipe_obj, "hash", None) if (not source_hash) and isinstance(pipe_obj.extra, dict): source_hash = pipe_obj.extra.get("hash") source_store = str(source_store or "").strip() source_hash = str(source_hash or "").strip().lower() if (source_store and source_hash and len(source_hash) == 64 and source_store.lower() != str(backend_name or "" ).strip().lower()): source_backend = None try: if source_store in store.list_backends(): source_backend = store[source_store] except Exception: source_backend = None if source_backend is not None: try: src_urls = normalize_urls( source_backend.get_url(source_hash) or [] ) except Exception: src_urls = [] try: dst_urls = normalize_urls(url or []) except Exception: dst_urls = [] merged: list[str] = [] seen: set[str] = set() for u in list(dst_urls or []) + list(src_urls or []): if not u: continue if u in seen: continue seen.add(u) merged.append(u) url = merged except Exception: pass # Collect relationship pairs for post-ingest DB/API persistence. if collect_relationship_pairs is not None and supports_relationship_association: rels = Add_File._get_relationships(result, pipe_obj) if isinstance(rels, dict) and rels: king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels) if king_hash and alt_hashes: bucket = collect_relationship_pairs.setdefault( str(backend_name), set() ) for alt_hash in alt_hashes: if alt_hash and alt_hash != king_hash: bucket.add((alt_hash, king_hash)) # Relationships must never be stored as tags. if isinstance(tags, list) and tags: tags = [ t for t in tags if not ( isinstance(t, str) and t.strip().lower().startswith("relationship:") ) ] # Auto-tag (best-effort) BEFORE uploading so tags land with the stored file. try: tags = _maybe_apply_florencevision_tags(media_path, list(tags or []), config, pipe_obj=pipe_obj) pipe_obj.tag = list(tags or []) except Exception as exc: # strict mode raises from helper; treat here as a hard failure log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr) return 1 upload_tags = tags if prefer_defer_tags and upload_tags: upload_tags = [] try: debug_panel( "add-file store", [ ("backend", backend_name), ("path", media_path), ("title", title), ("hash_hint", f_hash[:12] if f_hash else "N/A"), ("defer_tags", bool(prefer_defer_tags and tags)), ], border_style="yellow", ) except Exception: pass duplicate_hash = Add_File._find_existing_hash_by_urls(backend, url) if duplicate_hash: debug( f"[add-file] URL duplicate detected in '{backend_name}', skipping upload and reusing hash {duplicate_hash[:12]}..." ) file_identifier = duplicate_hash else: # Call backend's add_file with full metadata. # Backend returns hash as identifier. If we already know the hash from _resolve_source # (which came from download-file emit), pass it to skip re-hashing large files. file_identifier = backend.add_file( media_path, title=title, tag=upload_tags, url=[] if ((defer_url_association and url) or (not supports_url_association)) else url, file_hash=f_hash, pipeline_progress=pipeline_progress, transfer_label=title or media_path.name, ) ##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) stored_path: Optional[str] = None # IMPORTANT: avoid calling get_file() for remote backends by default to avoid # unintended network activity or credential exposure in result payloads. try: if not is_remote_backend: # For local backends, resolving the path is cheap and useful. maybe_path = backend.get_file(file_identifier) if isinstance(maybe_path, Path): stored_path = str(maybe_path) elif isinstance(maybe_path, str) and maybe_path: stored_path = maybe_path except Exception: stored_path = None # Compute canonical hash value for downstream use (defensive against non-string returns). if isinstance(file_identifier, str) and len(file_identifier) == 64: chosen_hash = file_identifier else: chosen_hash = f_hash or (str(file_identifier) if file_identifier is not None else "unknown") Add_File._update_pipe_object_destination( pipe_obj, hash_value=chosen_hash, store=backend_name, path=stored_path, tag=tags, title=title or pipe_obj.title or media_path.name, extra_updates={ "url": url, }, ) # Emit a search-file-like payload for consistent tables and natural piping. # Keep hash/store for downstream commands (get-tag, download-file, etc.). resolved_hash = chosen_hash if prefer_defer_tags and tags: # Support deferring tag application for batching bulk operations if defer_tag_association and pending_tag_associations is not None: try: pending_tag_associations.setdefault(str(backend_name), []).append((str(resolved_hash), list(tags))) except Exception: pass else: try: adder = getattr(backend, "add_tag", None) if callable(adder): _set_status("applying deferred tags") adder(resolved_hash, list(tags)) except Exception as exc: log(f"[add-file] Post-upload tagging failed for {backend_name}: {exc}", file=sys.stderr) # If we have url(s), ensure they get associated with the destination file. # This mirrors `add-url` behavior but avoids emitting extra pipeline noise. if url and supports_url_association: if defer_url_association and pending_url_associations is not None: try: pending_url_associations.setdefault( str(backend_name), [] ).append((str(resolved_hash), list(url))) except Exception: pass else: try: # Folder.add_file already persists URLs, avoid extra DB traffic here. if not is_folder_backend: _set_status("associating urls") backend.add_url(resolved_hash, list(url)) except Exception: pass # If a subtitle note was provided upstream (e.g., download-media writes notes.sub), # persist it automatically like add-note would. def _write_note(note_name: str, note_text: Optional[str]) -> None: if not note_text or not supports_note_association: return try: setter = getattr(backend, "set_note", None) if callable(setter): _set_status(f"writing {note_name} note") setter(resolved_hash, note_name, note_text) except Exception as exc: debug_panel( "add-file note write failed", [ ("store", backend_name), ("hash", resolved_hash), ("note", note_name), ("error", exc), ], border_style="yellow", ) _write_note("sub", Add_File._get_note_text(result, pipe_obj, "sub")) _write_note("lyric", Add_File._get_note_text(result, pipe_obj, "lyric")) _write_note("chapters", Add_File._get_note_text(result, pipe_obj, "chapters")) _write_note("caption", Add_File._get_note_text(result, pipe_obj, "caption")) meta: Dict[str, Any] = {} try: if not is_folder_backend: _set_status("loading stored metadata") meta = backend.get_metadata(resolved_hash) or {} except Exception: meta = {} # Determine size bytes size_bytes: Optional[int] = None for key in ("size_bytes", "size", "filesize", "file_size"): try: raw_size = meta.get(key) if raw_size is not None: size_bytes = int(raw_size) break except Exception: pass if size_bytes is None: try: size_bytes = int(media_path.stat().st_size) except Exception: size_bytes = None # Determine title/ext title_out = ( meta.get("title") or title or pipe_obj.title or media_path.stem or media_path.name ) ext_out = meta.get("ext") or media_path.suffix.lstrip(".") payload: Dict[ str, Any ] = { "title": title_out, "ext": str(ext_out or ""), "size_bytes": size_bytes, "store": backend_name, "hash": resolved_hash, # Preserve extra fields for downstream commands (kept hidden by default table rules) "path": stored_path, "tag": list(tags or []), "url": list(url or []), } if collect_payloads is not None: try: collect_payloads.append(payload) except Exception: pass # Keep the add-file 1-row summary overlay (when last stage), then emit the # canonical search-file payload/table for piping/selection consistency. if auto_search_file and resolved_hash and resolved_hash != "unknown": # Show the add-file summary (overlay only) but let search-file provide the downstream payload. Add_File._emit_storage_result( payload, overlay=not suppress_last_stage_overlay, emit=False ) refreshed_items = Add_File._try_emit_search_file_by_hash( instance=backend_name, hash_value=resolved_hash, config=config, ) if refreshed_items: # Re-emit the canonical store rows so downstream stages receive them. for emitted in refreshed_items: ctx.emit(emitted) else: # Fall back to emitting the add-file payload so downstream stages still receive an item. ctx.emit(payload) else: Add_File._emit_storage_result( payload, overlay=not suppress_last_stage_overlay, emit=True ) Add_File._cleanup_after_success( media_path, delete_source=delete_after_effective ) _clear_status() return 0 except Exception as exc: _clear_status() log( f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr ) import traceback traceback.print_exc(file=sys.stderr) return 1 # --- Helpers --- @staticmethod def _apply_pending_url_associations( pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any], store_instance: Optional[BackendRegistry] = None, ) -> None: """Apply deferred URL associations in bulk, grouped per backend.""" try: backend_registry = store_instance if store_instance is not None else BackendRegistry(config) except Exception: return for backend_name, pairs in (pending or {}).items(): if not pairs: continue try: backend, backend_registry, _exc = sh.get_store_backend( config, backend_name, store_registry=backend_registry, ) if backend is None: continue if not bool(getattr(backend, "supports_url_association", False)): continue items = sh.coalesce_hash_value_pairs(pairs) if not items: continue bulk = getattr(backend, "add_url_bulk", None) if callable(bulk): try: bulk(items) continue except Exception: pass single = getattr(backend, "add_url", None) if callable(single): for h, u in items: try: single(h, u) except Exception: continue except Exception: continue @staticmethod def _apply_pending_tag_associations( pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any], store_instance: Optional[BackendRegistry] = None, ) -> None: """Apply deferred tag associations in bulk, grouped per backend.""" try: backend_registry = store_instance if store_instance is not None else BackendRegistry(config) except Exception: return sh.run_store_hash_value_batches( config, pending or {}, bulk_method_name="add_tags_bulk", single_method_name="add_tag", store_registry=backend_registry, pass_config_to_bulk=False, pass_config_to_single=False, ) @staticmethod def _load_sidecar_bundle( media_path: Path, instance: Optional[str], config: Dict[str, Any], ) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]: """Load sidecar metadata.""" return None, None, [], [] @staticmethod def _resolve_file_hash( result: Any, media_path: Path, pipe_obj: models.PipeObject, fallback_hash: Optional[str], ) -> Optional[str]: if pipe_obj.hash and pipe_obj.hash != "unknown": return pipe_obj.hash if fallback_hash: return fallback_hash if isinstance(result, dict): candidate = result.get("hash") if candidate: return str(candidate) try: return sha256_file(media_path) except Exception: return None @staticmethod def _resolve_media_kind(path: Path) -> str: return resolve_media_kind_by_extension(path) @staticmethod def _persist_local_metadata( library_root: Path, dest_path: Path, tags: List[str], url: List[str], f_hash: Optional[str], relationships: Any, duration: Any, media_kind: str, ): pass @staticmethod def _copy_sidecars(source_path: Path, target_path: Path): possible_sidecars = [ source_path.with_suffix(source_path.suffix + ".json"), source_path.with_name(source_path.name + ".tag"), source_path.with_name(source_path.name + ".metadata"), source_path.with_name(source_path.name + ".notes"), ] for sc in possible_sidecars: try: if sc.exists(): suffix_part = sc.name.replace(source_path.name, "", 1) dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}" dest_sidecar.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(str(sc), dest_sidecar) except Exception: pass @staticmethod def _cleanup_after_success(media_path: Path, delete_source: bool): # Determine whether this is a temporary merge/tracking file which should be # deleted even when delete_source is False. is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name # If neither explicit delete was requested nor this looks like a temp-merge, # avoid deleting the source file. if not delete_source and not is_temp_merge: return # Attempt deletion (best-effort) try: media_path.unlink() Add_File._cleanup_sidecar_files(media_path) except Exception as exc: log(f"⚠️ Could not delete file: {exc}", file=sys.stderr) @staticmethod def _cleanup_sidecar_files(media_path: Path): targets = [ media_path.parent / (media_path.name + ".metadata"), media_path.parent / (media_path.name + ".notes"), media_path.parent / (media_path.name + ".tag"), ] for target in targets: try: if target.exists(): target.unlink() except Exception: pass # Create and register the cmdlet CMDLET = Add_File()