from __future__ import annotations from typing import Any, Dict, Optional, Sequence, Tuple, List from pathlib import Path from copy import deepcopy import sys import shutil import re from SYS import models from SYS import pipeline as ctx from SYS.logger import log, debug, is_debug_enabled from SYS.pipeline_progress import PipelineProgress from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS from Store import Store from . import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg parse_cmdlet_args = sh.parse_cmdlet_args SharedArgs = sh.SharedArgs extract_tag_from_result = sh.extract_tag_from_result extract_title_from_result = sh.extract_title_from_result extract_url_from_result = sh.extract_url_from_result merge_sequences = sh.merge_sequences extract_relationships = sh.extract_relationships extract_duration = sh.extract_duration coerce_to_pipe_object = sh.coerce_to_pipe_object collapse_namespace_tag = sh.collapse_namespace_tag from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store from SYS.utils import sha256_file, unique_path from SYS.metadata import write_metadata # Canonical supported filetypes for all stores/cmdlets SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS DEBUG_PIPE_NOTE_PREVIEW_LENGTH = 256 def _truncate_debug_note_text(value: Any) -> str: raw = str(value or "") if len(raw) <= DEBUG_PIPE_NOTE_PREVIEW_LENGTH: return raw return raw[:DEBUG_PIPE_NOTE_PREVIEW_LENGTH].rstrip() + "..." def _sanitize_pipe_object_for_debug(pipe_obj: models.PipeObject) -> models.PipeObject: safe_po = deepcopy(pipe_obj) try: extra = safe_po.extra if isinstance(extra, dict): sanitized = dict(extra) notes = sanitized.get("notes") if isinstance(notes, dict): truncated_notes: Dict[str, str] = {} for note_name, note_value in notes.items(): truncated_notes[str(note_name)] = _truncate_debug_note_text(note_value) sanitized["notes"] = truncated_notes safe_po.extra = sanitized except Exception: pass return safe_po def _maybe_apply_florencevision_tags( media_path: Path, tags: List[str], config: Dict[str, Any], pipe_obj: Optional[models.PipeObject] = None, ) -> List[str]: """Optionally auto-tag images using the FlorenceVision tool. Controlled via config: [tool=florencevision] enabled=true strict=false If strict=false (default), failures log a warning and return the original tags. If strict=true, failures raise to abort the ingest. """ strict = False try: tool_block = (config or {}).get("tool") fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None enabled = False if isinstance(fv_block, dict): enabled = bool(fv_block.get("enabled")) strict = bool(fv_block.get("strict")) if not enabled: return tags from tool.florencevision import FlorenceVisionTool # Special-case: if this file was produced by the `screen-shot` cmdlet, # OCR is more useful than caption/detection for tagging screenshots. cfg_for_tool: Dict[str, Any] = config try: action = str(getattr(pipe_obj, "action", "") or "") if pipe_obj is not None else "" cmdlet_name = "" if action.lower().startswith("cmdlet:"): cmdlet_name = action.split(":", 1)[1].strip().lower() if cmdlet_name in {"screen-shot", "screen_shot", "screenshot"}: tool_block2 = dict((config or {}).get("tool") or {}) fv_block2 = dict(tool_block2.get("florencevision") or {}) fv_block2["task"] = "ocr" tool_block2["florencevision"] = fv_block2 cfg_for_tool = dict(config or {}) cfg_for_tool["tool"] = tool_block2 except Exception: cfg_for_tool = config fv = FlorenceVisionTool(cfg_for_tool) if not fv.enabled() or not fv.applicable_path(media_path): return tags auto_tags = fv.tags_for_file(media_path) # Capture caption (if any) into PipeObject notes for downstream persistence. try: caption_text = getattr(fv, "last_caption", None) if caption_text and pipe_obj is not None: if not isinstance(pipe_obj.extra, dict): pipe_obj.extra = {} notes = pipe_obj.extra.get("notes") if not isinstance(notes, dict): notes = {} notes.setdefault("caption", caption_text) pipe_obj.extra["notes"] = notes except Exception: pass if not auto_tags: return tags merged = merge_sequences(tags or [], auto_tags, case_sensitive=False) debug(f"[add-file] FlorenceVision added {len(auto_tags)} tag(s)") return merged except Exception as exc: # Decide strictness from config if we couldn't read it above. strict2 = False try: tool_block = (config or {}).get("tool") fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None strict2 = bool(fv_block.get("strict")) if isinstance(fv_block, dict) else False except Exception: strict2 = False if strict or strict2: raise log(f"[add-file] Warning: FlorenceVision tagging failed: {exc}", file=sys.stderr) return tags class Add_File(Cmdlet): """Add file into the DB""" def __init__(self) -> None: """Initialize add-file cmdlet.""" super().__init__( name="add-file", summary= "Ingest a local media file to a store backend, file provider, or local directory.", usage= "add-file (-path | ) (-storage | -provider ) [-delete]", arg=[ SharedArgs.PATH, SharedArgs.STORE, SharedArgs.PROVIDER, CmdletArg( name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del", ), ], detail=[ "Note: add-file ingests local files. To fetch remote sources, use download-file and pipe into add-file.", "- Storage location options (use -storage):", " hydrus: Upload to Hydrus database with metadata tagging", " local: Copy file to local directory", " : Copy file to specified directory", "- File provider options (use -provider):", " 0x0: Upload to 0x0.st for temporary hosting", " file.io: Upload to file.io for temporary hosting", " internetarchive: Upload to archive.org (optional tag: ia: to upload into an existing item)", ], exec=self.run, ) self.register() def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution entry point.""" parsed = parse_cmdlet_args(args, self) progress = PipelineProgress(ctx) path_arg = parsed.get("path") location = parsed.get("store") provider_name = parsed.get("provider") delete_after = parsed.get("delete", False) # Convenience: when piping a file into add-file, allow `-path ` # to act as the destination export directory. # Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop" if path_arg and not location and not provider_name: try: candidate_dir = Path(str(path_arg)) if candidate_dir.exists() and candidate_dir.is_dir(): piped_items = result if isinstance(result, list) else [result] has_local_source = False for it in piped_items: try: po = coerce_to_pipe_object(it, None) src = str(getattr(po, "path", "") or "").strip() if not src: continue if src.lower().startswith(("http://", "https://", "magnet:", "torrent:")): continue if Path(src).is_file(): has_local_source = True break except Exception: continue if has_local_source: debug( f"[add-file] Treating -path directory as destination: {candidate_dir}" ) location = str(candidate_dir) path_arg = None except Exception: pass stage_ctx = ctx.get_stage_context() is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) # Directory-mode selector: # - First pass: `add-file -store X -path ` should ONLY show a selectable table. # - Second pass (triggered by @ selection expansion): re-run add-file with `-path file1,file2,...` # and actually ingest/copy. dir_scan_mode = False dir_scan_results: Optional[List[Dict[str, Any]]] = None explicit_path_list_results: Optional[List[Dict[str, Any]]] = None if path_arg and location and not provider_name: # Support comma-separated path lists: -path "file1,file2,file3" # This is the mechanism used by @N expansion for directory tables. try: path_text = str(path_arg) except Exception: path_text = "" if "," in path_text: parts = [p.strip().strip('"') for p in path_text.split(",")] parts = [p for p in parts if p] batch: List[Dict[str, Any]] = [] for p in parts: try: file_path = Path(p) except Exception: continue if not file_path.exists() or not file_path.is_file(): continue ext = file_path.suffix.lower() if ext not in SUPPORTED_MEDIA_EXTENSIONS: continue try: hv = sha256_file(file_path) except Exception: continue try: size = file_path.stat().st_size except Exception: size = 0 batch.append( { "path": file_path, "name": file_path.name, "hash": hv, "size": size, "ext": ext, } ) if batch: explicit_path_list_results = batch # Clear path_arg so add-file doesn't treat it as a single path. path_arg = None else: # Directory scan (selector table, no ingest yet) try: candidate_dir = Path(str(path_arg)) if candidate_dir.exists() and candidate_dir.is_dir(): dir_scan_mode = True debug( f"[add-file] Scanning directory for batch add: {candidate_dir}" ) dir_scan_results = Add_File._scan_directory_for_files( candidate_dir ) if dir_scan_results: debug( f"[add-file] Found {len(dir_scan_results)} supported files in directory" ) # Clear path_arg so it doesn't trigger single-item mode. path_arg = None except Exception as exc: debug(f"[add-file] Directory scan failed: {exc}") # Determine if -store targets a registered backend (vs a filesystem export path). is_storage_backend_location = False if location: try: store_probe = Store(config) is_storage_backend_location = location in ( store_probe.list_backends() or [] ) except Exception: is_storage_backend_location = False # Decide which items to process. # - If directory scan was performed, use those results # - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item. # - Otherwise, if piped input is a list, ingest each item. if explicit_path_list_results: items_to_process = explicit_path_list_results debug(f"[add-file] Using {len(items_to_process)} files from -path list") elif dir_scan_results: items_to_process = dir_scan_results debug(f"[add-file] Using {len(items_to_process)} files from directory scan") elif path_arg: items_to_process: List[Any] = [result] elif isinstance(result, list) and result: items_to_process = list(result) else: items_to_process = [result] # Minimal step-based progress for single-item runs. # Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck". use_steps = False steps_started = False step2_done = False try: ui, _ = progress.ui_and_pipe_index() use_steps = (ui is not None) and (len(items_to_process) == 1) except Exception: use_steps = False debug(f"[add-file] INPUT result type={type(result).__name__}") if isinstance(result, list): debug(f"[add-file] INPUT result is list with {len(result)} items") debug( f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}" ) # add-file is ingestion-only: it does not download URLs here. # Show a concise PipeObject preview when debug logging is enabled to aid pipeline troubleshooting. if is_debug_enabled(): preview_items = ( items_to_process if isinstance(items_to_process, list) else [items_to_process] ) max_preview = 5 for idx, item in enumerate(preview_items[:max_preview]): po = item if isinstance(item, models.PipeObject) else None if po is None: try: po = coerce_to_pipe_object(item, path_arg) except Exception: po = None if po is None: debug(f"[add-file] PIPE item[{idx}] preview (non-PipeObject)") continue debug(f"[add-file] PIPE item[{idx}] PipeObject preview") try: safe_po = _sanitize_pipe_object_for_debug(po) safe_po.debug_table() except Exception: pass if len(preview_items) > max_preview: debug( f"[add-file] Skipping {len(preview_items) - max_preview} additional piped item(s) in debug preview" ) # If this invocation was directory selector mode, show a selectable table and stop. # The user then runs @N (optionally piped), which replays add-file with selected paths. if dir_scan_mode: try: from SYS.result_table import ResultTable from pathlib import Path as _Path # Build base args to replay: keep everything except the directory -path. base_args: List[str] = [] skip_next = False for tok in list(args or []): if skip_next: skip_next = False continue t = str(tok) if t in {"-path", "--path", "-p"}: skip_next = True continue base_args.append(t) table = ResultTable(title="Files in Directory", preserve_order=True) table.set_table("add-file.directory") table.set_source_command("add-file", base_args) rows: List[Dict[str, Any]] = [] for file_info in dir_scan_results or []: p = file_info.get("path") hp = str(file_info.get("hash") or "") name = str(file_info.get("name") or "unknown") try: clean_title = _Path(name).stem except Exception: clean_title = name ext = str(file_info.get("ext") or "").lstrip(".") size = file_info.get("size", 0) row_item = { "path": str(p) if p is not None else "", "hash": hp, "title": clean_title, "columns": [ ("Title", clean_title), ("Hash", hp), ("Size", size), ("Ext", ext), ], # Used by @N replay (CLI will combine selected rows into -path file1,file2,...) "_selection_args": ["-path", str(p) if p is not None else ""], } rows.append(row_item) table.add_result(row_item) ctx.set_current_stage_table(table) ctx.set_last_result_table( table, rows, subject={ "table": "add-file.directory" } ) log(f"✓ Found {len(rows)} files. Select with @N (e.g., @1 or @1-3).") return 0 except Exception as exc: debug( f"[add-file] Failed to display directory scan result table: {exc}" ) collected_payloads: List[Dict[str, Any]] = [] pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {} pending_url_associations: Dict[str, List[tuple[str, List[str]]]] = {} successes = 0 failures = 0 # When add-file -store is the last stage, always show a final search-file table. # This is especially important for multi-item ingests (e.g., multi-clip downloads) # so the user always gets a selectable ResultTable. want_final_search_file = ( bool(is_last_stage) and bool(is_storage_backend_location) and bool(location) ) auto_search_file_after_add = False # When ingesting multiple items into a backend store, defer URL association and # apply it once at the end (bulk) to avoid per-item URL API calls. defer_url_association = ( bool(is_storage_backend_location) and bool(location) and len(items_to_process) > 1 ) for item in items_to_process: pipe_obj = coerce_to_pipe_object(item, path_arg) temp_dir_to_cleanup: Optional[Path] = None delete_after_item = delete_after try: media_path, file_hash = self._resolve_source( item, path_arg, pipe_obj, config ) debug( f"[add-file] RESOLVED source: path={media_path}, hash={file_hash[:12] if file_hash else 'N/A'}..." ) if not media_path: failures += 1 continue if use_steps and (not steps_started): progress.begin_steps(3) progress.step("resolving source") steps_started = True # Update pipe_obj with resolved path pipe_obj.path = str(media_path) if not self._validate_source(media_path): failures += 1 continue if provider_name: if str(provider_name).strip().lower() == "matrix": log( "Matrix uploads are handled by .matrix (not add-file).", file=sys.stderr, ) failures += 1 continue code = self._handle_provider_upload( media_path, provider_name, pipe_obj, config, delete_after_item ) if code == 0: successes += 1 else: failures += 1 continue if location: try: store = Store(config) backends = store.list_backends() if location in backends: code = self._handle_storage_backend( item, media_path, location, pipe_obj, config, delete_after_item, collect_payloads=collected_payloads, collect_relationship_pairs=pending_relationship_pairs, defer_url_association=defer_url_association, pending_url_associations=pending_url_associations, suppress_last_stage_overlay=want_final_search_file, auto_search_file=auto_search_file_after_add, ) else: code = self._handle_local_export( media_path, location, pipe_obj, config, delete_after_item ) except Exception as exc: debug(f"[add-file] ERROR: Failed to resolve location: {exc}") log(f"Invalid location: {location}", file=sys.stderr) failures += 1 continue if use_steps and steps_started and (not step2_done): progress.step("writing destination") step2_done = True if code == 0: successes += 1 else: failures += 1 continue log("No destination specified", file=sys.stderr) failures += 1 finally: if temp_dir_to_cleanup is not None: try: shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True) except Exception: pass # Apply deferred url associations (bulk) before showing the final store table. if pending_url_associations: try: Add_File._apply_pending_url_associations( pending_url_associations, config ) except Exception: pass # Always end add-file -store (when last stage) by showing the canonical store table. # This keeps output consistent and ensures @N selection works for multi-item ingests. if want_final_search_file and collected_payloads: try: hashes: List[str] = [] for payload in collected_payloads: h = payload.get("hash") if isinstance(payload, dict) else None if isinstance(h, str) and len(h) == 64: hashes.append(h) # Deduplicate while preserving order seen: set[str] = set() hashes = [h for h in hashes if not (h in seen or seen.add(h))] refreshed_items = Add_File._try_emit_search_file_by_hashes( store=str(location), hash_values=hashes, config=config, ) if not refreshed_items: # Fallback: at least show the add-file payloads as a display overlay from SYS.result_table import ResultTable table = ResultTable("Result") for payload in collected_payloads: table.add_result(payload) ctx.set_last_result_table_overlay( table, collected_payloads, subject=collected_payloads ) except Exception: pass # Persist relationships into backend DB/API. if pending_relationship_pairs: try: Add_File._apply_pending_relationships( pending_relationship_pairs, config ) except Exception: pass if use_steps and steps_started: progress.step("finalized") if successes > 0: return 0 return 1 @staticmethod def _try_emit_search_file_by_hashes( *, store: str, hash_values: List[str], config: Dict[str, Any] ) -> Optional[List[Any]]: """Run search-file for a list of hashes and promote the table to a display overlay. Returns the emitted search-file payload items on success, else None. """ hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64] if not store or not hashes: return None try: from cmdlet.search_file import CMDLET as search_file_cmdlet query = "hash:" + ",".join(hashes) args = ["-store", str(store), query] debug(f'[add-file] Refresh: search-file -store {store} "{query}"') # Run search-file under a temporary stage context so its ctx.emit() calls # don't interfere with the outer add-file pipeline stage. prev_ctx = ctx.get_stage_context() temp_ctx = ctx.PipelineStageContext( stage_index=0, total_stages=1, pipe_index=0, worker_id=getattr(prev_ctx, "worker_id", None), ) ctx.set_stage_context(temp_ctx) try: code = search_file_cmdlet.run(None, args, config) emitted_items = list(getattr(temp_ctx, "emits", []) or []) finally: ctx.set_stage_context(prev_ctx) if code != 0: return None # Promote the search-file result to a display overlay so the CLI prints it # for action commands like add-file. stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if is_last: try: table = ctx.get_last_result_table() items = ctx.get_last_result_items() if table is not None and items: ctx.set_last_result_table_overlay( table, items, subject={ "store": store, "hash": hashes } ) except Exception: pass return emitted_items except Exception as exc: debug( f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}" ) return None @staticmethod def _parse_relationship_tag_king_alts( tag_value: str ) -> tuple[Optional[str], List[str]]: """Parse a relationship tag into (king_hash, alt_hashes). Supported formats: - New: relationship: ,, - Old: relationship: hash(king),hash(alt)... relationship: hash(king)KING,hash(alt)ALT For the local DB we treat the first hash listed as the king. """ if not isinstance(tag_value, str): return None, [] raw = tag_value.strip() if not raw: return None, [] # Normalize input: ensure we only look at the RHS after "relationship:" rhs = raw if ":" in raw: prefix, rest = raw.split(":", 1) if prefix.strip().lower() == "relationship": rhs = rest.strip() # Old typed format: hash(type)HEX typed = re.findall(r"hash\((\w+)\)?", rhs) if typed: king: Optional[str] = None alts: List[str] = [] for rel_type, h in typed: h_norm = str(h).strip().lower() if rel_type.strip().lower() == "king": king = h_norm elif rel_type.strip().lower() in {"alt", "related"}: alts.append(h_norm) # If the tag omitted king but had hashes, fall back to first hash. if not king: all_hashes = [str(h).strip().lower() for _, h in typed] king = all_hashes[0] if all_hashes else None alts = [h for h in all_hashes[1:] if h] # Dedupe alts while preserving order seen: set[str] = set() alts = [ h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h)) ] if king and len(king) == 64: return king, [h for h in alts if h != king] return None, [] # New format: a simple list of hashes, first is king. hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs) hashes = [h.strip().lower() for h in hashes if isinstance(h, str)] if not hashes: return None, [] king = hashes[0] alts = hashes[1:] seen2: set[str] = set() alts = [ h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h)) ] return king, [h for h in alts if h != king] @staticmethod def _parse_relationships_king_alts( relationships: Dict[str, Any], ) -> tuple[Optional[str], List[str]]: """Parse a PipeObject.relationships dict into (king_hash, alt_hashes). Supported shapes: - {"king": [KING], "alt": [ALT1, ALT2]} - {"king": KING, "alt": ALT} (strings) - Also treats "related" hashes as alts for persistence purposes. """ if not isinstance(relationships, dict) or not relationships: return None, [] def _first_hash(val: Any) -> Optional[str]: if isinstance(val, str): h = val.strip().lower() return h if len(h) == 64 else None if isinstance(val, list): for item in val: if isinstance(item, str): h = item.strip().lower() if len(h) == 64: return h return None def _many_hashes(val: Any) -> List[str]: out: List[str] = [] if isinstance(val, str): h = val.strip().lower() if len(h) == 64: out.append(h) elif isinstance(val, list): for item in val: if isinstance(item, str): h = item.strip().lower() if len(h) == 64: out.append(h) return out king = _first_hash(relationships.get("king")) if not king: return None, [] alts = _many_hashes(relationships.get("alt")) alts.extend(_many_hashes(relationships.get("related"))) seen: set[str] = set() alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))] return king, alts @staticmethod def _apply_pending_relationships( pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any] ) -> None: """Persist relationships to backends that support relationships. This delegates to an optional backend method: `set_relationship(alt, king, kind)`. """ if not pending: return try: store = Store(config) except Exception: return for backend_name, pairs in pending.items(): if not pairs: continue try: backend = store[str(backend_name)] except Exception: continue setter = getattr(backend, "set_relationship", None) if not callable(setter): continue processed_pairs: set[tuple[str, str]] = set() for alt_hash, king_hash in sorted(pairs): if not alt_hash or not king_hash or alt_hash == king_hash: continue if (alt_hash, king_hash) in processed_pairs: continue alt_norm = str(alt_hash).strip().lower() king_norm = str(king_hash).strip().lower() if len(alt_norm) != 64 or len(king_norm) != 64: continue try: setter(alt_norm, king_norm, "alt") processed_pairs.add((alt_hash, king_hash)) except Exception: continue @staticmethod def _resolve_source( result: Any, path_arg: Optional[str], pipe_obj: models.PipeObject, config: Dict[str, Any], ) -> Tuple[Optional[Path], Optional[str]]: """Resolve the source file path from args or pipeline result. Returns (media_path, file_hash). """ # PRIORITY 1a: Try hash+path from directory scan result (has 'path' and 'hash' keys) if isinstance(result, dict): result_path = result.get("path") result_hash = result.get("hash") # Check if this looks like a directory scan result (has path and hash but no 'store' key) result_store = result.get("store") if result_path and result_hash and not result_store: try: media_path = ( Path(result_path) if not isinstance(result_path, Path) else result_path ) if media_path.exists() and media_path.is_file(): debug( f"[add-file] Using path+hash from directory scan: {media_path}" ) pipe_obj.path = str(media_path) return media_path, str(result_hash) except Exception as exc: debug(f"[add-file] Failed to use directory scan result: {exc}") # PRIORITY 1b: Try hash+store from result dict (most reliable for @N selections) if isinstance(result, dict): result_hash = result.get("hash") result_store = result.get("store") if result_hash and result_store: debug( f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}" ) try: store = Store(config) if result_store in store.list_backends(): backend = store[result_store] media_path = backend.get_file(result_hash) if isinstance(media_path, Path) and media_path.exists(): pipe_obj.path = str(media_path) return media_path, str(result_hash) except Exception as exc: debug(f"[add-file] Failed to retrieve via hash+store: {exc}") # PRIORITY 2: Try explicit path argument if path_arg: media_path = Path(path_arg) pipe_obj.path = str(media_path) debug(f"[add-file] Using explicit path argument: {media_path}") return media_path, None # PRIORITY 3: Try from pipe_obj.path (check file first before URL) pipe_path = getattr(pipe_obj, "path", None) if pipe_path: pipe_path_str = str(pipe_path) debug(f"Resolved pipe_path: {pipe_path_str}") if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:", "hifi:", "hydrus:")): log( "add-file ingests local files only. Use download-file first.", file=sys.stderr, ) return None, None return Path(pipe_path_str), None # Try from result (if it's a string path or URL) if isinstance(result, str): debug(f"Checking result string: {result}") # Check if result is a URL before treating as file path if result.lower().startswith(("http://", "https://", "magnet:", "torrent:", "hifi:", "hydrus:")): log( "add-file ingests local files only. Use download-file first.", file=sys.stderr, ) return None, None media_path = Path(result) pipe_obj.path = str(media_path) return media_path, None # Try from result if it's a list (pipeline emits multiple results) if isinstance(result, list) and result: first_item = result[0] # If the first item is a string, it's either a URL or a file path if isinstance(first_item, str): debug(f"Checking result list[0]: {first_item}") if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:", "hifi:", "hydrus:")): log( "add-file ingests local files only. Use download-file first.", file=sys.stderr, ) return None, None media_path = Path(first_item) pipe_obj.path = str(media_path) return media_path, None # If the first item is a dict, interpret it as a PipeObject-style result if isinstance(first_item, dict): # Look for path or path-like keys path_candidate = ( first_item.get("path") or first_item.get("filepath") or first_item.get("file") ) # If the dict includes a 'paths' list (multi-part/section download), prefer the first file paths_val = first_item.get("paths") if not path_candidate and isinstance(paths_val, (list, tuple)) and paths_val: path_candidate = paths_val[0] if path_candidate: debug(f"Resolved path from result dict: {path_candidate}") try: media_path = Path(path_candidate) pipe_obj.path = str(media_path) return media_path, first_item.get("hash") except Exception: return None, first_item.get("hash") # If first item is a PipeObject object try: # models.PipeObject is an actual class; check attribute presence from SYS import models as _models if isinstance(first_item, _models.PipeObject): path_candidate = getattr(first_item, "path", None) if path_candidate: debug(f"Resolved path from PipeObject: {path_candidate}") media_path = Path(path_candidate) pipe_obj.path = str(media_path) return media_path, getattr(first_item, "hash", None) except Exception: pass debug( f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}" ) log("File path could not be resolved") return None, None @staticmethod def _scan_directory_for_files(directory: Path) -> List[Dict[str, Any]]: """Scan a directory for supported media files and return list of file info dicts. Each dict contains: - path: Path object - name: filename - hash: sha256 hash - size: file size in bytes - ext: file extension """ if not directory.exists() or not directory.is_dir(): return [] files_info: List[Dict[str, Any]] = [] try: for item in directory.iterdir(): if not item.is_file(): continue ext = item.suffix.lower() if ext not in SUPPORTED_MEDIA_EXTENSIONS: continue # Compute hash try: file_hash = sha256_file(item) except Exception as exc: debug(f"Failed to hash {item}: {exc}") continue # Get file size try: size = item.stat().st_size except Exception: size = 0 files_info.append( { "path": item, "name": item.name, "hash": file_hash, "size": size, "ext": ext, } ) except Exception as exc: debug(f"Error scanning directory {directory}: {exc}") return files_info @staticmethod def _validate_source(media_path: Optional[Path]) -> bool: """Validate that the source file exists and is supported.""" if media_path is None: return False target_str = str(media_path) # add-file does not accept URL inputs. if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:", "hifi:", "hydrus:")): log("add-file ingests local files only.", file=sys.stderr) return False if not media_path.exists() or not media_path.is_file(): log(f"File not found: {media_path}") return False # Validate file type file_extension = media_path.suffix.lower() if file_extension not in SUPPORTED_MEDIA_EXTENSIONS: log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr) return False return True @staticmethod def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]: from SYS.metadata import normalize_urls # Prefer explicit PipeObject.url if present urls: List[str] = [] try: urls = normalize_urls(getattr(pipe_obj, "url", None)) except Exception: urls = [] # Then check extra.url if not urls: try: if isinstance(pipe_obj.extra, dict): urls = normalize_urls(pipe_obj.extra.get("url")) except Exception: pass # Then check result dict if not urls and isinstance(result, dict): urls = normalize_urls(result.get("url")) # Finally, try extractor helper if not urls: urls = normalize_urls(extract_url_from_result(result)) return urls @staticmethod def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]: try: rels = pipe_obj.get_relationships() if rels: return rels except Exception: pass if isinstance(result, dict) and result.get("relationships"): return result.get("relationships") try: return extract_relationships(result) except Exception: return None @staticmethod def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]: def _parse_duration(value: Any) -> Optional[float]: if value is None: return None if isinstance(value, (int, float)): return float(value) if value > 0 else None if isinstance(value, str): s = value.strip() if not s: return None try: candidate = float(s) return candidate if candidate > 0 else None except ValueError: pass if ":" in s: parts = [p.strip() for p in s.split(":") if p.strip()] if len(parts) in {2, 3} and all(p.isdigit() for p in parts): nums = [int(p) for p in parts] if len(nums) == 2: minutes, seconds = nums return float(minutes * 60 + seconds) hours, minutes, seconds = nums return float(hours * 3600 + minutes * 60 + seconds) return None parsed = _parse_duration(getattr(pipe_obj, "duration", None)) if parsed is not None: return parsed try: return _parse_duration(extract_duration(result)) except Exception: return None @staticmethod def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]: """Extract a named note text from a piped item. Supports: - pipe_obj.extra["notes"][note_name] - result["notes"][note_name] for dict results - pipe_obj.extra[note_name] / result[note_name] as fallback """ def _normalize(val: Any) -> Optional[str]: if val is None: return None if isinstance(val, bytes): try: val = val.decode("utf-8", errors="ignore") except Exception: val = str(val) if isinstance(val, str): text = val.strip() return text if text else None try: text = str(val).strip() return text if text else None except Exception: return None note_key = str(note_name or "").strip() if not note_key: return None # Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts) try: if isinstance(pipe_obj.extra, dict): notes_val = pipe_obj.extra.get("notes") if isinstance(notes_val, dict) and note_key in notes_val: return _normalize(notes_val.get(note_key)) if note_key in pipe_obj.extra: return _normalize(pipe_obj.extra.get(note_key)) except Exception: pass # Fallback to raw result dict if isinstance(result, dict): try: notes_val = result.get("notes") if isinstance(notes_val, dict) and note_key in notes_val: return _normalize(notes_val.get(note_key)) if note_key in result: return _normalize(result.get(note_key)) except Exception: pass return None @staticmethod def _update_pipe_object_destination( pipe_obj: models.PipeObject, *, hash_value: str, store: str, path: Optional[str], tag: List[str], title: Optional[str], extra_updates: Optional[Dict[str, Any]] = None, ) -> None: pipe_obj.hash = hash_value pipe_obj.store = store pipe_obj.path = path pipe_obj.tag = tag if title: pipe_obj.title = title if isinstance(pipe_obj.extra, dict): pipe_obj.extra.update(extra_updates or {}) else: pipe_obj.extra = dict(extra_updates or {}) @staticmethod def _emit_pipe_object(pipe_obj: models.PipeObject) -> None: from SYS.result_table import format_result log(format_result(pipe_obj, title="Result"), file=sys.stderr) ctx.emit(pipe_obj.to_dict()) ctx.set_current_stage_table(None) @staticmethod def _emit_storage_result( payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True ) -> None: """Emit a storage-style result payload. - Always emits the dict downstream (when in a pipeline). - If this is the last stage (or not in a pipeline), prints a search-file-like table and sets an overlay table/items for @N selection. """ # Emit for downstream commands (no-op if not in a pipeline) if emit: ctx.emit(payload) stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if not is_last or not overlay: return try: from SYS.result_table import ResultTable table = ResultTable("Result") table.add_result(payload) # Overlay so @1 refers to this add-file result without overwriting search history ctx.set_last_result_table_overlay(table, [payload], subject=payload) except Exception: # If table rendering fails, still keep @ selection items try: ctx.set_last_result_items_only([payload]) except Exception: pass @staticmethod def _try_emit_search_file_by_hash( *, store: str, hash_value: str, config: Dict[str, Any] ) -> Optional[List[Any]]: """Run search-file for a single hash so the final table/payload is consistent. Important: `add-file` is treated as an action command by the CLI, so the CLI only prints tables for it when a display overlay exists. After running search-file, this copies the resulting table into the display overlay (when this is the last stage) so the canonical store table is what the user sees and can select from. Returns the emitted search-file payload items on success, else None. """ try: from cmdlet.search_file import CMDLET as search_file_cmdlet args = ["-store", str(store), f"hash:{str(hash_value)}"] # Run search-file under a temporary stage context so its ctx.emit() calls # don't interfere with the outer add-file pipeline stage. prev_ctx = ctx.get_stage_context() temp_ctx = ctx.PipelineStageContext( stage_index=0, total_stages=1, pipe_index=0, worker_id=getattr(prev_ctx, "worker_id", None), ) ctx.set_stage_context(temp_ctx) try: code = search_file_cmdlet.run(None, args, config) emitted_items = list(getattr(temp_ctx, "emits", []) or []) finally: ctx.set_stage_context(prev_ctx) if code != 0: return None # Promote the search-file result to a display overlay so the CLI prints it # for action commands like add-file. stage_ctx = ctx.get_stage_context() is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if is_last: try: table = ctx.get_last_result_table() items = ctx.get_last_result_items() if table is not None and items: ctx.set_last_result_table_overlay( table, items, subject={ "store": store, "hash": hash_value } ) except Exception: pass return emitted_items except Exception as exc: debug( f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}" ) return None @staticmethod def _prepare_metadata( result: Any, media_path: Path, pipe_obj: models.PipeObject, config: Dict[str, Any], ) -> Tuple[List[str], List[str], Optional[str], Optional[str]]: """ Prepare tags, url, and title for the file. Returns (tags, url, preferred_title, file_hash) """ tags_from_result = list(pipe_obj.tag or []) if not tags_from_result: try: tags_from_result = list(extract_tag_from_result(result) or []) except Exception: tags_from_result = [] url_from_result = Add_File._get_url(result, pipe_obj) preferred_title = pipe_obj.title if not preferred_title: for t in tags_from_result: if str(t).strip().lower().startswith("title:"): candidate = t.split(":", 1)[1].strip().replace("_", " ").strip() if candidate: preferred_title = candidate break if not preferred_title: preferred_title = extract_title_from_result(result) if preferred_title: preferred_title = preferred_title.replace("_", " ").strip() store = getattr(pipe_obj, "store", None) _, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle( media_path, store, config ) def normalize_title_tag(tag: str) -> str: if str(tag).strip().lower().startswith("title:"): parts = tag.split(":", 1) if len(parts) == 2: value = parts[1].replace("_", " ").strip() return f"title:{value}" return tag tags_from_result_no_title = [ t for t in tags_from_result if not str(t).strip().lower().startswith("title:") ] sidecar_tags = collapse_namespace_tag( [normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last" ) sidecar_tags_filtered = [ t for t in sidecar_tags if not str(t).strip().lower().startswith("title:") ] merged_tags = merge_sequences( tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True ) if preferred_title: merged_tags.append(f"title:{preferred_title}") merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False) file_hash = Add_File._resolve_file_hash( result, media_path, pipe_obj, sidecar_hash ) # Relationships must not be stored as tags. # If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships # and strip them from the final tag list. relationship_tags = [ t for t in merged_tags if isinstance(t, str) and t.strip().lower().startswith("relationship:") ] if relationship_tags: try: if (not isinstance(getattr(pipe_obj, "relationships", None), dict) or not pipe_obj.relationships): king: Optional[str] = None alts: List[str] = [] for rel_tag in relationship_tags: k, a = Add_File._parse_relationship_tag_king_alts(rel_tag) if k and not king: king = k if a: alts.extend(a) if king: seen_alt: set[str] = set() alts = [ h for h in alts if h and h != king and len(h) == 64 and not (h in seen_alt or seen_alt.add(h)) ] payload: Dict[str, Any] = { "king": [king] } if alts: payload["alt"] = alts pipe_obj.relationships = payload except Exception: pass merged_tags = [ t for t in merged_tags if not (isinstance(t, str) and t.strip().lower().startswith("relationship:")) ] # Persist back to PipeObject pipe_obj.tag = merged_tags if preferred_title and not pipe_obj.title: pipe_obj.title = preferred_title if file_hash and not pipe_obj.hash: pipe_obj.hash = file_hash if isinstance(pipe_obj.extra, dict): pipe_obj.extra.setdefault("url", merged_url) return merged_tags, merged_url, preferred_title, file_hash @staticmethod def _handle_local_export( media_path: Path, location: str, pipe_obj: models.PipeObject, config: Dict[str, Any], delete_after: bool, ) -> int: """Handle exporting to a specific local path (Copy).""" try: destination_root = Path(location) except Exception as exc: log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr) return 1 log(f"Exporting to local path: {destination_root}", file=sys.stderr) result = None tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config) # Determine Filename (Title-based) title_value = title if not title_value: # Try to find title in tags title_tag = next( (t for t in tags if str(t).strip().lower().startswith("title:")), None ) if title_tag: title_value = title_tag.split(":", 1)[1].strip() if not title_value: title_value = media_path.stem.replace("_", " ").strip() safe_title = "".join( c for c in title_value if c.isalnum() or c in " ._-()[]{}'`" ).strip() base_name = safe_title or media_path.stem new_name = base_name + media_path.suffix destination_root.mkdir(parents=True, exist_ok=True) target_path = destination_root / new_name if target_path.exists(): target_path = unique_path(target_path) # COPY Operation (Safe Export) try: shutil.copy2(str(media_path), target_path) except Exception as exc: log(f"❌ Failed to export file: {exc}", file=sys.stderr) return 1 # Copy Sidecars Add_File._copy_sidecars(media_path, target_path) # Ensure hash for exported copy if not f_hash: try: f_hash = sha256_file(target_path) except Exception: f_hash = None # Write Metadata Sidecars (since it's an export) relationships = Add_File._get_relationships(result, pipe_obj) try: write_sidecar(target_path, tags, url, f_hash) write_metadata( target_path, hash_value=f_hash, url=url, relationships=relationships or [] ) except Exception: pass # Update PipeObject and emit extra_updates = { "url": url, "export_path": str(destination_root), } if relationships: extra_updates["relationships"] = relationships chosen_title = title or title_value or pipe_obj.title or target_path.name Add_File._update_pipe_object_destination( pipe_obj, hash_value=f_hash or "unknown", store="local", path=str(target_path), tag=tags, title=chosen_title, extra_updates=extra_updates, ) Add_File._emit_pipe_object(pipe_obj) # Cleanup # Only delete if explicitly requested! Add_File._cleanup_after_success(media_path, delete_source=delete_after) return 0 @staticmethod def _handle_provider_upload( media_path: Path, provider_name: str, pipe_obj: models.PipeObject, config: Dict[str, Any], delete_after: bool, ) -> int: """Handle uploading to a file provider (e.g. 0x0).""" from ProviderCore.registry import get_file_provider log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr) try: file_provider = get_file_provider(provider_name, config) if not file_provider: log(f"File provider '{provider_name}' not available", file=sys.stderr) return 1 hoster_url = file_provider.upload(str(media_path), pipe_obj=pipe_obj) log(f"File uploaded: {hoster_url}", file=sys.stderr) f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None) except Exception as exc: log(f"Upload failed: {exc}", file=sys.stderr) return 1 # Update PipeObject and emit extra_updates: Dict[str, Any] = { "provider": provider_name, "provider_url": hoster_url, } if isinstance(pipe_obj.extra, dict): # Also track hoster URL as a url for downstream steps existing_known = list(pipe_obj.extra.get("url") or []) if hoster_url and hoster_url not in existing_known: existing_known.append(hoster_url) extra_updates["url"] = existing_known file_path = pipe_obj.path or (str(media_path) if media_path else None) or "" Add_File._update_pipe_object_destination( pipe_obj, hash_value=f_hash or "unknown", store=provider_name or "provider", path=file_path, tag=pipe_obj.tag, title=pipe_obj.title or (media_path.name if media_path else None), extra_updates=extra_updates, ) Add_File._emit_pipe_object(pipe_obj) Add_File._cleanup_after_success(media_path, delete_source=delete_after) return 0 @staticmethod def _handle_storage_backend( result: Any, media_path: Path, backend_name: str, pipe_obj: models.PipeObject, config: Dict[str, Any], delete_after: bool, *, collect_payloads: Optional[List[Dict[str, Any]]] = None, collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None, defer_url_association: bool = False, pending_url_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None, suppress_last_stage_overlay: bool = False, auto_search_file: bool = True, ) -> int: """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.).""" ##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr) delete_after_effective = bool(delete_after) if not delete_after_effective: # When download-media is piped into add-file, the downloaded artifact is a temp file. # After it is persisted to a storage backend, delete the temp copy to avoid duplicates. try: if (str(backend_name or "").strip().lower() != "temp" and getattr(pipe_obj, "is_temp", False) and getattr(pipe_obj, "action", None) == "cmdlet:download-media"): from SYS.config import resolve_output_dir temp_dir = resolve_output_dir(config) try: if media_path.resolve().is_relative_to( temp_dir.expanduser().resolve()): delete_after_effective = True debug( f"[add-file] Auto-delete temp source after ingest: {media_path}" ) except Exception: # If path resolution fails, fall back to non-destructive behavior pass except Exception: pass try: store = Store(config) backend = store[backend_name] # Prepare metadata from pipe_obj and sidecars tags, url, title, f_hash = Add_File._prepare_metadata( result, media_path, pipe_obj, config ) # If we're moving/copying from one store to another, also copy the source store's # existing associated URLs so they aren't lost. try: from SYS.metadata import normalize_urls source_store = None source_hash = None if isinstance(result, dict): source_store = result.get("store") source_hash = result.get("hash") if not source_store: source_store = getattr(pipe_obj, "store", None) if not source_hash: source_hash = getattr(pipe_obj, "hash", None) if (not source_hash) and isinstance(pipe_obj.extra, dict): source_hash = pipe_obj.extra.get("hash") source_store = str(source_store or "").strip() source_hash = str(source_hash or "").strip().lower() if (source_store and source_hash and len(source_hash) == 64 and source_store.lower() != str(backend_name or "" ).strip().lower()): source_backend = None try: if source_store in store.list_backends(): source_backend = store[source_store] except Exception: source_backend = None if source_backend is not None: try: src_urls = normalize_urls( source_backend.get_url(source_hash) or [] ) except Exception: src_urls = [] try: dst_urls = normalize_urls(url or []) except Exception: dst_urls = [] merged: list[str] = [] seen: set[str] = set() for u in list(dst_urls or []) + list(src_urls or []): if not u: continue if u in seen: continue seen.add(u) merged.append(u) url = merged except Exception: pass # Collect relationship pairs for post-ingest DB/API persistence. if collect_relationship_pairs is not None: rels = Add_File._get_relationships(result, pipe_obj) if isinstance(rels, dict) and rels: king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels) if king_hash and alt_hashes: bucket = collect_relationship_pairs.setdefault( str(backend_name), set() ) for alt_hash in alt_hashes: if alt_hash and alt_hash != king_hash: bucket.add((alt_hash, king_hash)) # Relationships must never be stored as tags. if isinstance(tags, list) and tags: tags = [ t for t in tags if not ( isinstance(t, str) and t.strip().lower().startswith("relationship:") ) ] # Auto-tag (best-effort) BEFORE uploading so tags land with the stored file. try: tags = _maybe_apply_florencevision_tags(media_path, list(tags or []), config, pipe_obj=pipe_obj) pipe_obj.tag = list(tags or []) except Exception as exc: # strict mode raises from helper; treat here as a hard failure log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr) return 1 debug( f"[add-file] Storing into backend '{backend_name}' path='{media_path}' title='{title}'" ) # Call backend's add_file with full metadata # Backend returns hash as identifier file_identifier = backend.add_file( media_path, title=title, tag=tags, url=[] if (defer_url_association and url) else url, ) debug( f"[add-file] backend.add_file returned identifier {file_identifier} (len={len(str(file_identifier)) if file_identifier is not None else 'None'})" ) ##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) stored_path: Optional[str] = None # IMPORTANT: avoid calling get_file() for remote backends. # For Hydrus, get_file() returns a browser URL (often with an access key) and should # only be invoked by explicit user commands (e.g. get-file). try: if type(backend).__name__ == "Folder": maybe_path = backend.get_file(file_identifier) if isinstance(maybe_path, Path): stored_path = str(maybe_path) elif isinstance(maybe_path, str) and maybe_path: stored_path = maybe_path except Exception: stored_path = None Add_File._update_pipe_object_destination( pipe_obj, hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown", store=backend_name, path=stored_path, tag=tags, title=title or pipe_obj.title or media_path.name, extra_updates={ "url": url, }, ) # Emit a search-file-like payload for consistent tables and natural piping. # Keep hash/store for downstream commands (get-tag, get-file, etc.). resolved_hash = ( file_identifier if len(file_identifier) == 64 else (f_hash or file_identifier or "unknown") ) # If we have url(s), ensure they get associated with the destination file. # This mirrors `add-url` behavior but avoids emitting extra pipeline noise. if url: if defer_url_association and pending_url_associations is not None: try: pending_url_associations.setdefault( str(backend_name), [] ).append((str(resolved_hash), list(url))) except Exception: pass else: try: backend.add_url(resolved_hash, list(url)) except Exception: pass # If a subtitle note was provided upstream (e.g., download-media writes notes.sub), # persist it automatically like add-note would. sub_note = Add_File._get_note_text(result, pipe_obj, "sub") if sub_note: try: setter = getattr(backend, "set_note", None) if callable(setter): debug( f"[add-file] Writing sub note (len={len(str(sub_note))}) to {backend_name}:{resolved_hash}" ) setter(resolved_hash, "sub", sub_note) except Exception as exc: debug(f"[add-file] sub note write failed: {exc}") chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters") if chapters_note: try: setter = getattr(backend, "set_note", None) if callable(setter): debug( f"[add-file] Writing chapters note (len={len(str(chapters_note))}) to {backend_name}:{resolved_hash}" ) setter(resolved_hash, "chapters", chapters_note) except Exception as exc: debug(f"[add-file] chapters note write failed: {exc}") caption_note = Add_File._get_note_text(result, pipe_obj, "caption") if caption_note: try: setter = getattr(backend, "set_note", None) if callable(setter): debug( f"[add-file] Writing caption note (len={len(str(caption_note))}) to {backend_name}:{resolved_hash}" ) setter(resolved_hash, "caption", caption_note) except Exception as exc: debug(f"[add-file] caption note write failed: {exc}") meta: Dict[str, Any] = {} try: meta = backend.get_metadata(resolved_hash) or {} except Exception: meta = {} # Determine size bytes size_bytes: Optional[int] = None for key in ("size_bytes", "size", "filesize", "file_size"): try: raw_size = meta.get(key) if raw_size is not None: size_bytes = int(raw_size) break except Exception: pass if size_bytes is None: try: size_bytes = int(media_path.stat().st_size) except Exception: size_bytes = None # Determine title/ext title_out = ( meta.get("title") or title or pipe_obj.title or media_path.stem or media_path.name ) ext_out = meta.get("ext") or media_path.suffix.lstrip(".") payload: Dict[ str, Any ] = { "title": title_out, "ext": str(ext_out or ""), "size_bytes": size_bytes, "store": backend_name, "hash": resolved_hash, # Preserve extra fields for downstream commands (kept hidden by default table rules) "path": stored_path, "tag": list(tags or []), "url": list(url or []), } if collect_payloads is not None: try: collect_payloads.append(payload) except Exception: pass # Keep the add-file 1-row summary overlay (when last stage), then emit the # canonical search-file payload/table for piping/selection consistency. if auto_search_file and resolved_hash and resolved_hash != "unknown": # Show the add-file summary (overlay only) but let search-file provide the downstream payload. Add_File._emit_storage_result( payload, overlay=not suppress_last_stage_overlay, emit=False ) refreshed_items = Add_File._try_emit_search_file_by_hash( store=backend_name, hash_value=resolved_hash, config=config, ) if refreshed_items: # Re-emit the canonical store rows so downstream stages receive them. for emitted in refreshed_items: ctx.emit(emitted) else: # Fall back to emitting the add-file payload so downstream stages still receive an item. ctx.emit(payload) else: Add_File._emit_storage_result( payload, overlay=not suppress_last_stage_overlay, emit=True ) Add_File._cleanup_after_success( media_path, delete_source=delete_after_effective ) return 0 except Exception as exc: log( f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr ) import traceback traceback.print_exc(file=sys.stderr) return 1 # --- Helpers --- @staticmethod def _apply_pending_url_associations( pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any] ) -> None: """Apply deferred URL associations in bulk, grouped per backend.""" try: store = Store(config) except Exception: return for backend_name, pairs in (pending or {}).items(): if not pairs: continue try: backend = store[backend_name] except Exception: continue # Merge URLs per hash and de-duplicate. merged: Dict[str, List[str]] = {} for file_hash, urls in pairs: h = str(file_hash or "").strip().lower() if len(h) != 64: continue url_list: List[str] = [] try: for u in urls or []: s = str(u or "").strip() if s: url_list.append(s) except Exception: url_list = [] if not url_list: continue bucket = merged.setdefault(h, []) seen = set(bucket) for u in url_list: if u in seen: continue seen.add(u) bucket.append(u) items: List[tuple[str, List[str]]] = [(h, u) for h, u in merged.items() if u] if not items: continue bulk = getattr(backend, "add_url_bulk", None) if callable(bulk): try: bulk(items) continue except Exception: pass single = getattr(backend, "add_url", None) if callable(single): for h, u in items: try: single(h, u) except Exception: continue @staticmethod def _load_sidecar_bundle( media_path: Path, store: Optional[str], config: Dict[str, Any], ) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]: """Load sidecar metadata.""" if store and store.lower() == "local": try: from SYS.config import get_local_storage_path db_root = get_local_storage_path(config) if db_root: with API_folder_store(Path(db_root)) as db: file_hash = db.get_file_hash(media_path) if file_hash: tags = db.get_tags(file_hash) or [] metadata = db.get_metadata(file_hash) or {} url = metadata.get("url") or [] f_hash = metadata.get("hash") or file_hash if tags or url or f_hash: return None, f_hash, tags, url except Exception: pass try: sidecar_path = find_sidecar(media_path) if sidecar_path and sidecar_path.exists(): h, t, u = read_sidecar(sidecar_path) return sidecar_path, h, t or [], u or [] except Exception: pass return None, None, [], [] @staticmethod def _resolve_file_hash( result: Any, media_path: Path, pipe_obj: models.PipeObject, fallback_hash: Optional[str], ) -> Optional[str]: if pipe_obj.hash and pipe_obj.hash != "unknown": return pipe_obj.hash if fallback_hash: return fallback_hash if isinstance(result, dict): candidate = result.get("hash") if candidate: return str(candidate) try: return sha256_file(media_path) except Exception: return None @staticmethod def _resolve_media_kind(path: Path) -> str: # Reusing logic suffix = path.suffix.lower() if suffix in {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka"}: return "audio" if suffix in { ".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", }: return "video" if suffix in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}: return "image" if suffix in {".pdf", ".epub", ".txt", ".mobi", ".azw3", ".cbz", ".cbr", ".doc", ".docx"}: return "document" return "other" @staticmethod def _persist_local_metadata( library_root: Path, dest_path: Path, tags: List[str], url: List[str], f_hash: Optional[str], relationships: Any, duration: Any, media_kind: str, ): payload = { "hash": f_hash, "url": url, "relationships": relationships or [], "duration": duration, "size": None, "ext": dest_path.suffix.lower(), "media_type": media_kind, "media_kind": media_kind, } try: payload["size"] = dest_path.stat().st_size except OSError: payload["size"] = None with API_folder_store(library_root) as db: try: db.save_file_info(dest_path, payload, tags) except Exception as exc: log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr) @staticmethod def _copy_sidecars(source_path: Path, target_path: Path): possible_sidecars = [ source_path.with_suffix(source_path.suffix + ".json"), source_path.with_name(source_path.name + ".tag"), source_path.with_name(source_path.name + ".metadata"), source_path.with_name(source_path.name + ".notes"), ] for sc in possible_sidecars: try: if sc.exists(): suffix_part = sc.name.replace(source_path.name, "", 1) dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}" dest_sidecar.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(str(sc), dest_sidecar) except Exception: pass @staticmethod def _cleanup_after_success(media_path: Path, delete_source: bool): if not delete_source: return # Check if it's a temp file that should always be deleted is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name if delete_source or is_temp_merge: ##log(f"Deleting source file...", file=sys.stderr) try: media_path.unlink() Add_File._cleanup_sidecar_files(media_path) except Exception as exc: log(f"⚠️ Could not delete file: {exc}", file=sys.stderr) @staticmethod def _cleanup_sidecar_files(media_path: Path): targets = [ media_path.parent / (media_path.name + ".metadata"), media_path.parent / (media_path.name + ".notes"), media_path.parent / (media_path.name + ".tag"), ] for target in targets: try: if target.exists(): target.unlink() except Exception: pass # Create and register the cmdlet CMDLET = Add_File()