2025-11-25 20:09:33 -08:00
from __future__ import annotations
2026-01-02 02:28:59 -08:00
from typing import Any , Dict , Optional , Sequence , Tuple , List
2025-11-25 20:09:33 -08:00
from pathlib import Path
import sys
2025-12-11 12:47:30 -08:00
import shutil
2026-01-04 02:23:50 -08:00
import tempfile
2025-12-16 01:45:01 -08:00
import re
2026-01-24 01:38:12 -08:00
from urllib . parse import urlparse
2025-11-25 20:09:33 -08:00
2025-12-29 23:28:15 -08:00
from SYS import models
from SYS import pipeline as ctx
2026-05-14 17:15:13 -07:00
from SYS . logger import log , debug , debug_panel
2026-03-25 22:39:30 -07:00
from SYS . payload_builders import build_table_result_payload
2025-12-22 02:11:53 -08:00
from SYS . pipeline_progress import PipelineProgress
2026-03-25 22:39:30 -07:00
from SYS . result_publication import overlay_existing_result_table , publish_result_table
2026-04-28 22:20:54 -07:00
from SYS . rich_display import show_available_plugins_panel , show_plugin_config_panel
2025-12-14 00:53:52 -08:00
from SYS . utils_constant import ALL_SUPPORTED_EXTENSIONS
2026-05-23 13:49:47 -07:00
from PluginCore . backend_registry import BackendRegistry
2026-01-24 01:38:12 -08:00
from API . HTTP import _download_direct_file
2026-05-04 18:41:01 -07:00
from . . import _shared as sh
2025-12-16 23:23:43 -08:00
Cmdlet = sh . Cmdlet
CmdletArg = sh . CmdletArg
parse_cmdlet_args = sh . parse_cmdlet_args
SharedArgs = sh . SharedArgs
extract_tag_from_result = sh . extract_tag_from_result
extract_title_from_result = sh . extract_title_from_result
extract_url_from_result = sh . extract_url_from_result
merge_sequences = sh . merge_sequences
extract_relationships = sh . extract_relationships
extract_duration = sh . extract_duration
coerce_to_pipe_object = sh . coerce_to_pipe_object
collapse_namespace_tag = sh . collapse_namespace_tag
2026-01-12 04:05:52 -08:00
resolve_target_dir = sh . resolve_target_dir
resolve_media_kind_by_extension = sh . resolve_media_kind_by_extension
coerce_to_path = sh . coerce_to_path
build_pipeline_preview = sh . build_pipeline_preview
get_field = sh . get_field
2026-01-24 01:38:12 -08:00
from SYS . utils import sha256_file , unique_path , sanitize_filename
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
# Canonical supported filetypes for all stores/cmdlets
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
2025-11-25 20:09:33 -08:00
2026-05-03 17:29:32 -07:00
class _CommandDependencies :
2026-05-23 13:49:47 -07:00
""" Command-scope cache for the backend registry and plugin instances. """
2026-05-03 17:29:32 -07:00
def __init__ ( self , config : Dict [ str , Any ] ) - > None :
self . config = config
2026-05-23 13:49:47 -07:00
self . _backend_registry : Optional [ BackendRegistry ] = None
2026-05-03 17:29:32 -07:00
self . _plugins : Dict [ str , Any ] = { }
2026-05-23 13:49:47 -07:00
def get_backend_registry ( self ) - > Optional [ BackendRegistry ] :
""" Lazily initialize and return the command-scope backend registry. """
if self . _backend_registry is None :
2026-05-03 17:29:32 -07:00
try :
2026-05-23 13:49:47 -07:00
self . _backend_registry = BackendRegistry ( self . config )
2026-05-03 17:29:32 -07:00
except Exception :
2026-05-23 13:49:47 -07:00
self . _backend_registry = None
return self . _backend_registry
2026-05-03 17:29:32 -07:00
def get_plugin ( self , name : str ) - > Optional [ Any ] :
""" Cached plugin lookup by name. """
2026-05-21 16:19:17 -07:00
from PluginCore . registry import get_plugin
2026-05-03 17:29:32 -07:00
norm_name = str ( name or " " ) . strip ( ) . lower ( )
if not norm_name :
return None
if norm_name in self . _plugins :
return self . _plugins [ norm_name ]
plugin = get_plugin ( norm_name , self . config )
self . _plugins [ norm_name ] = plugin
return plugin
def get_plugin_with_capability ( self , name : str , capability : str ) - > Optional [ Any ] :
""" Cached plugin lookup with capability check. """
2026-05-21 16:19:17 -07:00
from PluginCore . registry import get_plugin_with_capability
2026-05-03 17:29:32 -07:00
norm_name = str ( name or " " ) . strip ( ) . lower ( )
if not norm_name :
return None
cache_key = f " { norm_name } # { capability } "
if cache_key in self . _plugins :
return self . _plugins [ cache_key ]
plugin = get_plugin_with_capability ( norm_name , capability , self . config )
self . _plugins [ cache_key ] = plugin
return plugin
2026-02-25 17:35:38 -08:00
_REMOTE_URL_PREFIXES : tuple [ str , . . . ] = (
2026-05-04 15:58:24 -07:00
" http:// " , " https:// " , " ftp:// " , " ftps:// " , " magnet: " , " torrent: " , " tidal: " , " hydrus: " ,
2026-02-25 17:35:38 -08:00
)
2026-01-02 02:28:59 -08:00
2025-12-30 23:19:02 -08:00
def _maybe_apply_florencevision_tags (
media_path : Path ,
tags : List [ str ] ,
config : Dict [ str , Any ] ,
pipe_obj : Optional [ models . PipeObject ] = None ,
) - > List [ str ] :
""" Optionally auto-tag images using the FlorenceVision tool.
Controlled via config:
[tool=florencevision]
enabled=true
strict=false
If strict=false (default), failures log a warning and return the original tags.
If strict=true, failures raise to abort the ingest.
"""
2026-01-02 02:28:59 -08:00
strict = False
2025-12-30 23:19:02 -08:00
try :
tool_block = ( config or { } ) . get ( " tool " )
fv_block = tool_block . get ( " florencevision " ) if isinstance ( tool_block , dict ) else None
enabled = False
if isinstance ( fv_block , dict ) :
enabled = bool ( fv_block . get ( " enabled " ) )
strict = bool ( fv_block . get ( " strict " ) )
if not enabled :
return tags
from tool . florencevision import FlorenceVisionTool
# Special-case: if this file was produced by the `screen-shot` cmdlet,
# OCR is more useful than caption/detection for tagging screenshots.
cfg_for_tool : Dict [ str , Any ] = config
try :
action = str ( getattr ( pipe_obj , " action " , " " ) or " " ) if pipe_obj is not None else " "
cmdlet_name = " "
if action . lower ( ) . startswith ( " cmdlet: " ) :
cmdlet_name = action . split ( " : " , 1 ) [ 1 ] . strip ( ) . lower ( )
if cmdlet_name in { " screen-shot " , " screen_shot " , " screenshot " } :
tool_block2 = dict ( ( config or { } ) . get ( " tool " ) or { } )
fv_block2 = dict ( tool_block2 . get ( " florencevision " ) or { } )
fv_block2 [ " task " ] = " ocr "
tool_block2 [ " florencevision " ] = fv_block2
cfg_for_tool = dict ( config or { } )
cfg_for_tool [ " tool " ] = tool_block2
except Exception :
cfg_for_tool = config
fv = FlorenceVisionTool ( cfg_for_tool )
if not fv . enabled ( ) or not fv . applicable_path ( media_path ) :
return tags
auto_tags = fv . tags_for_file ( media_path )
# Capture caption (if any) into PipeObject notes for downstream persistence.
try :
caption_text = getattr ( fv , " last_caption " , None )
if caption_text and pipe_obj is not None :
if not isinstance ( pipe_obj . extra , dict ) :
pipe_obj . extra = { }
notes = pipe_obj . extra . get ( " notes " )
if not isinstance ( notes , dict ) :
notes = { }
notes . setdefault ( " caption " , caption_text )
pipe_obj . extra [ " notes " ] = notes
except Exception :
pass
if not auto_tags :
return tags
merged = merge_sequences ( tags or [ ] , auto_tags , case_sensitive = False )
debug ( f " [add-file] FlorenceVision added { len ( auto_tags ) } tag(s) " )
return merged
except Exception as exc :
# Decide strictness from config if we couldn't read it above.
strict2 = False
try :
tool_block = ( config or { } ) . get ( " tool " )
fv_block = tool_block . get ( " florencevision " ) if isinstance ( tool_block , dict ) else None
strict2 = bool ( fv_block . get ( " strict " ) ) if isinstance ( fv_block , dict ) else False
except Exception :
strict2 = False
if strict or strict2 :
raise
log ( f " [add-file] Warning: FlorenceVision tagging failed: { exc } " , file = sys . stderr )
return tags
2025-12-11 12:47:30 -08:00
class Add_File ( Cmdlet ) :
""" Add file into the DB """
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
def __init__ ( self ) - > None :
""" Initialize add-file cmdlet. """
super ( ) . __init__ (
name = " add-file " ,
2025-12-29 18:42:02 -08:00
summary =
2026-05-14 17:15:13 -07:00
" Ingest a local media file to a configured store or plugin destination. " ,
2025-12-29 18:42:02 -08:00
usage =
2026-05-24 12:32:57 -07:00
" add-file (<source> | <piped>) (-instance <store-name> | -plugin <plugin> [-instance <name|path>]) [-delete] " ,
2025-12-11 12:47:30 -08:00
arg = [
2026-05-24 12:32:57 -07:00
CmdletArg (
name = " source " ,
type = " string " ,
required = False ,
description = " Local file or directory path to ingest or scan. " ,
) ,
2026-04-28 22:20:54 -07:00
SharedArgs . INSTANCE ,
2026-03-21 22:56:37 -07:00
SharedArgs . URL ,
2026-04-19 00:41:09 -07:00
SharedArgs . PLUGIN ,
2025-12-29 17:05:03 -08:00
CmdletArg (
name = " delete " ,
type = " flag " ,
required = False ,
description = " Delete file after successful upload " ,
alias = " del " ,
) ,
2025-12-11 12:47:30 -08:00
] ,
detail = [
2026-01-02 02:28:59 -08:00
" Note: add-file ingests local files. To fetch remote sources, use download-file and pipe into add-file. " ,
2026-05-14 17:15:13 -07:00
" - Store options (use -instance without -plugin): " ,
2025-12-11 12:47:30 -08:00
" hydrus: Upload to Hydrus database with metadata tagging " ,
2026-05-14 17:15:13 -07:00
" - Plugin options (use -plugin): " ,
" local: Copy file to a configured local destination or direct path via -instance " ,
2025-12-11 12:47:30 -08:00
" 0x0: Upload to 0x0.st for temporary hosting " ,
2025-12-26 21:04:09 -08:00
" file.io: Upload to file.io for temporary hosting " ,
" internetarchive: Upload to archive.org (optional tag: ia:<identifier> to upload into an existing item) " ,
2026-05-24 12:32:57 -07:00
" - Use a positional source path with -instance and -plugin to target a named provider config: add-file C: \\ Media \\ file.pdf -plugin ftp -instance archive " ,
2025-12-11 12:47:30 -08:00
] ,
2026-01-03 03:37:48 -08:00
examples = [
2026-05-03 21:20:05 -07:00
' download-file " https://themathesontrust.org/papers/christianity/alcock-alphabet1.pdf " | add-file -instance tutorial ' ,
2026-05-14 17:15:13 -07:00
' @1 | add-file -plugin local -instance C: \\ Users \\ Me \\ Downloads ' ,
2026-05-24 12:32:57 -07:00
' add-file C: \\ Media \\ report.pdf -plugin ftp -instance archive ' ,
2026-01-03 03:37:48 -08:00
] ,
2025-12-11 12:47:30 -08:00
exec = self . run ,
)
self . register ( )
2025-11-25 20:09:33 -08:00
2026-05-24 12:32:57 -07:00
@staticmethod
def _uses_legacy_path_flag ( args : Sequence [ str ] ) - > bool :
for token in args or [ ] :
lowered = str ( token or " " ) . strip ( ) . lower ( )
if lowered in { " -path " , " --path " , " -p " } :
return True
return False
@staticmethod
def _legacy_path_flag_message ( ) - > str :
return (
" add-file no longer supports -path. Pass the source file or directory as a positional argument, "
" and use -plugin local -instance <name|path> for local export. "
)
2025-12-11 12:47:30 -08:00
def run ( self , result : Any , args : Sequence [ str ] , config : Dict [ str , Any ] ) - > int :
""" Main execution entry point. """
2026-05-24 12:32:57 -07:00
if Add_File . _uses_legacy_path_flag ( args ) :
log ( Add_File . _legacy_path_flag_message ( ) , file = sys . stderr )
return 1
2025-12-11 12:47:30 -08:00
parsed = parse_cmdlet_args ( args , self )
2025-12-22 02:11:53 -08:00
progress = PipelineProgress ( ctx )
2025-11-25 20:09:33 -08:00
2026-05-03 17:29:32 -07:00
# Initialize command-scope dependency context (caches Store/plugins)
deps = _CommandDependencies ( config )
2026-05-23 13:49:47 -07:00
storage_registry = deps . get_backend_registry ( )
2026-01-12 04:05:52 -08:00
2026-05-24 12:32:57 -07:00
source_arg = parsed . get ( " source " )
2026-05-03 21:20:05 -07:00
location = parsed . get ( " instance " )
2026-04-28 22:20:54 -07:00
plugin_instance = parsed . get ( " instance " )
2026-03-21 22:56:37 -07:00
source_url_arg = parsed . get ( " url " )
2026-04-19 00:41:09 -07:00
plugin_name = parsed . get ( " plugin " )
2025-12-11 12:47:30 -08:00
delete_after = parsed . get ( " delete " , False )
2026-05-14 17:15:13 -07:00
local_export_destination : Optional [ str ] = None
2026-04-28 22:20:54 -07:00
if plugin_name and not plugin_instance and location :
plugin_instance = location
2025-11-25 20:09:33 -08:00
2025-12-14 00:53:52 -08:00
stage_ctx = ctx . get_stage_context ( )
2025-12-29 18:42:02 -08:00
is_last_stage = ( stage_ctx
is None ) or bool ( getattr ( stage_ctx ,
" is_last_stage " ,
False ) )
2026-04-16 17:18:50 -07:00
has_downstream_stage = bool ( stage_ctx is not None and not is_last_stage )
2025-12-14 00:53:52 -08:00
2025-12-27 06:05:07 -08:00
# Directory-mode selector:
2026-05-24 12:32:57 -07:00
# - Terminal use: `add-file <DIR> -instance X` shows a selectable table.
# - Pipelined use: `add-file <DIR> -instance X | ...` processes the full batch
2026-04-16 17:18:50 -07:00
# immediately so downstream stages receive the uploaded items.
2026-05-24 12:32:57 -07:00
# - Selection replay: `@N` re-runs add-file with `file1,file2,...` as the source token.
2025-12-27 06:05:07 -08:00
dir_scan_mode = False
dir_scan_results : Optional [ List [ Dict [ str , Any ] ] ] = None
2026-05-24 12:32:57 -07:00
explicit_source_list_results : Optional [ List [ Dict [ str , Any ] ] ] = None
2025-12-27 06:05:07 -08:00
2026-05-24 12:32:57 -07:00
if source_arg and location and not plugin_name :
# Support comma-separated source lists: "file1,file2,file3"
2025-12-27 06:05:07 -08:00
# This is the mechanism used by @N expansion for directory tables.
try :
2026-05-24 12:32:57 -07:00
source_text = str ( source_arg )
2025-12-27 06:05:07 -08:00
except Exception :
2026-05-24 12:32:57 -07:00
source_text = " "
2025-12-27 06:05:07 -08:00
2026-05-24 12:32:57 -07:00
if " , " in source_text :
parts = [ p . strip ( ) . strip ( ' " ' ) for p in source_text . split ( " , " ) ]
2025-12-27 06:05:07 -08:00
parts = [ p for p in parts if p ]
batch : List [ Dict [ str , Any ] ] = [ ]
for p in parts :
try :
file_path = Path ( p )
except Exception :
continue
if not file_path . exists ( ) or not file_path . is_file ( ) :
continue
ext = file_path . suffix . lower ( )
if ext not in SUPPORTED_MEDIA_EXTENSIONS :
continue
try :
hv = sha256_file ( file_path )
except Exception :
continue
try :
size = file_path . stat ( ) . st_size
except Exception :
size = 0
2025-12-29 17:05:03 -08:00
batch . append (
{
" path " : file_path ,
" name " : file_path . name ,
" hash " : hv ,
" size " : size ,
" ext " : ext ,
}
)
2025-12-27 06:05:07 -08:00
if batch :
2026-05-24 12:32:57 -07:00
explicit_source_list_results = batch
# Clear source_arg so add-file doesn't treat it as a single path.
source_arg = None
2025-12-27 06:05:07 -08:00
else :
# Directory scan (selector table, no ingest yet)
try :
2026-05-24 12:32:57 -07:00
candidate_dir = Path ( str ( source_arg ) )
2025-12-27 06:05:07 -08:00
if candidate_dir . exists ( ) and candidate_dir . is_dir ( ) :
dir_scan_mode = True
2025-12-29 18:42:02 -08:00
debug (
f " [add-file] Scanning directory for batch add: { candidate_dir } "
)
dir_scan_results = Add_File . _scan_directory_for_files (
candidate_dir
)
2025-12-27 06:05:07 -08:00
if dir_scan_results :
2025-12-29 17:05:03 -08:00
debug (
f " [add-file] Found { len ( dir_scan_results ) } supported files in directory "
)
2026-05-24 12:32:57 -07:00
# Clear source_arg so it doesn't trigger single-item mode.
source_arg = None
2025-12-27 06:05:07 -08:00
except Exception as exc :
debug ( f " [add-file] Directory scan failed: { exc } " )
2026-05-24 12:32:57 -07:00
if result is None and not source_arg and not explicit_source_list_results and not dir_scan_results :
2026-04-30 18:56:22 -07:00
try :
if ctx . get_stage_context ( ) is not None :
return 0
except Exception :
pass
2026-05-03 21:20:05 -07:00
# Determine if -instance targets a registered backend (vs a filesystem export path).
2025-12-16 01:45:01 -08:00
is_storage_backend_location = False
if location :
try :
2026-05-23 13:49:47 -07:00
backend_registry_for_lookup = storage_registry or deps . get_backend_registry ( )
is_storage_backend_location = Add_File . _resolve_backend_by_name ( backend_registry_for_lookup , str ( location ) ) is not None
2025-12-16 01:45:01 -08:00
except Exception :
is_storage_backend_location = False
2026-04-21 10:31:38 -07:00
if location and not plugin_name and not is_storage_backend_location :
2026-05-14 17:15:13 -07:00
resolved_local_instance , resolved_local_path = Add_File . _resolve_local_export_plugin_target (
location ,
config ,
deps = deps ,
require_explicit = True ,
)
if resolved_local_path :
plugin_name = " local "
plugin_instance = resolved_local_instance or str ( location )
location = None
local_export_destination = resolved_local_path
else :
2026-04-21 10:31:38 -07:00
log (
2026-05-14 17:15:13 -07:00
f " Storage backend ' { location } ' not found. Use -plugin local -instance <name|path> for local export or configure that store backend. " ,
2026-04-21 10:31:38 -07:00
file = sys . stderr ,
)
return 1
2026-05-14 17:15:13 -07:00
normalized_plugin_name = Add_File . _normalize_provider_key ( plugin_name )
if normalized_plugin_name == " local " :
resolved_local_instance , resolved_local_path = Add_File . _resolve_local_export_plugin_target (
plugin_instance or location ,
config ,
deps = deps ,
require_explicit = bool ( plugin_instance or location ) ,
)
if not resolved_local_path :
requested_local = str ( plugin_instance or location or " " ) . strip ( ) or " <default> "
log (
f " Local destination ' { requested_local } ' is not configured. Use -plugin local -instance <name|path>. " ,
file = sys . stderr ,
)
return 1
plugin_name = " local "
plugin_instance = resolved_local_instance or str ( plugin_instance or location or " " ) . strip ( ) or None
location = None
local_export_destination = resolved_local_path
2026-04-30 18:56:22 -07:00
plugin_storage_backend = None
if plugin_name :
plugin_storage_backend = Add_File . _resolve_plugin_storage_backend (
plugin_name ,
plugin_instance ,
config ,
store_instance = storage_registry ,
2026-05-03 17:29:32 -07:00
deps = deps ,
2026-04-30 18:56:22 -07:00
)
effective_storage_backend_name = plugin_storage_backend or (
str ( location ) if location and is_storage_backend_location else None
)
2025-12-14 00:53:52 -08:00
# Decide which items to process.
2025-12-27 06:05:07 -08:00
# - If directory scan was performed, use those results
2026-05-24 12:32:57 -07:00
# - If user provided a positional source path, treat this invocation as single-item.
2025-12-14 00:53:52 -08:00
# - Otherwise, if piped input is a list, ingest each item.
2026-05-24 12:32:57 -07:00
if explicit_source_list_results :
items_to_process = explicit_source_list_results
debug ( f " [add-file] Using { len ( items_to_process ) } files from source list " )
2025-12-27 06:05:07 -08:00
elif dir_scan_results :
items_to_process = dir_scan_results
debug ( f " [add-file] Using { len ( items_to_process ) } files from directory scan " )
2026-05-24 12:32:57 -07:00
elif source_arg :
2025-12-14 00:53:52 -08:00
items_to_process : List [ Any ] = [ result ]
elif isinstance ( result , list ) and result :
items_to_process = list ( result )
else :
items_to_process = [ result ]
2025-12-11 12:47:30 -08:00
2026-01-05 07:51:19 -08:00
total_items = len ( items_to_process ) if isinstance ( items_to_process , list ) else 0
processed_items = 0
2026-04-16 17:18:50 -07:00
try :
ui , _ = progress . ui_and_pipe_index ( )
if ui is not None and total_items :
preview_items = (
list ( items_to_process )
if isinstance ( items_to_process , list ) else [ items_to_process ]
)
progress . begin_pipe (
total_items = total_items ,
items_preview = preview_items ,
)
except Exception :
pass
2026-01-05 07:51:19 -08:00
try :
if total_items :
progress . set_percent ( 0 )
except Exception :
pass
2025-12-22 02:11:53 -08:00
# Minimal step-based progress for single-item runs.
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
use_steps = False
steps_started = False
try :
ui , _ = progress . ui_and_pipe_index ( )
use_steps = ( ui is not None ) and ( len ( items_to_process ) == 1 )
2026-01-15 00:45:42 -08:00
if use_steps :
progress . begin_steps ( 5 )
steps_started = True
2025-12-22 02:11:53 -08:00
except Exception :
use_steps = False
2026-01-02 02:28:59 -08:00
# add-file is ingestion-only: it does not download URLs here.
2026-04-16 17:18:50 -07:00
should_present_directory_selector = bool ( dir_scan_mode and not has_downstream_stage )
if dir_scan_mode and has_downstream_stage :
debug (
" [add-file] Continuing with directory batch ingest because downstream stages exist "
)
# If this invocation was terminal directory selector mode, show a selectable table and stop.
2026-05-24 12:32:57 -07:00
# The user then runs @N (optionally piped), which replays add-file with selected source paths.
2026-04-16 17:18:50 -07:00
if should_present_directory_selector :
2025-12-27 06:05:07 -08:00
try :
2026-01-18 10:50:42 -08:00
from SYS . result_table import Table
2025-12-27 06:05:07 -08:00
from pathlib import Path as _Path
base_args : List [ str ] = [ ]
2026-05-24 12:32:57 -07:00
if plugin_name :
base_args . extend ( [ " -plugin " , str ( plugin_name ) ] )
if location :
base_args . extend ( [ " -instance " , str ( location ) ] )
if source_url_arg :
base_args . extend ( [ " -url " , str ( source_url_arg ) ] )
if bool ( delete_after ) :
base_args . append ( " -delete " )
2025-12-27 06:05:07 -08:00
2026-01-18 10:50:42 -08:00
table = Table ( title = " Files in Directory " , preserve_order = True )
2025-12-27 06:05:07 -08:00
table . set_table ( " add-file.directory " )
table . set_source_command ( " add-file " , base_args )
rows : List [ Dict [ str , Any ] ] = [ ]
2025-12-29 17:05:03 -08:00
for file_info in dir_scan_results or [ ] :
2025-12-27 06:05:07 -08:00
p = file_info . get ( " path " )
hp = str ( file_info . get ( " hash " ) or " " )
name = str ( file_info . get ( " name " ) or " unknown " )
try :
clean_title = _Path ( name ) . stem
except Exception :
clean_title = name
ext = str ( file_info . get ( " ext " ) or " " ) . lstrip ( " . " )
size = file_info . get ( " size " , 0 )
2026-03-25 22:39:30 -07:00
row_item = build_table_result_payload (
title = clean_title ,
columns = [
( " Title " , clean_title ) ,
( " Hash " , hp ) ,
( " Size " , size ) ,
( " Ext " , ext ) ,
2025-12-27 06:05:07 -08:00
] ,
2026-05-24 12:32:57 -07:00
selection_args = [ str ( p ) if p is not None else " " ] ,
2026-03-25 22:39:30 -07:00
path = str ( p ) if p is not None else " " ,
hash = hp ,
)
2025-12-27 06:05:07 -08:00
rows . append ( row_item )
table . add_result ( row_item )
ctx . set_current_stage_table ( table )
2025-12-29 18:42:02 -08:00
ctx . set_last_result_table (
table ,
rows ,
subject = {
" table " : " add-file.directory "
}
)
2025-12-27 06:05:07 -08:00
log ( f " ✓ Found { len ( rows ) } files. Select with @N (e.g., @1 or @1-3). " )
return 0
except Exception as exc :
2025-12-29 18:42:02 -08:00
debug (
f " [add-file] Failed to display directory scan result table: { exc } "
)
2025-12-27 06:05:07 -08:00
2025-12-14 00:53:52 -08:00
collected_payloads : List [ Dict [ str , Any ] ] = [ ]
2025-12-29 18:42:02 -08:00
pending_relationship_pairs : Dict [ str ,
set [ tuple [ str ,
str ] ] ] = { }
pending_url_associations : Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] = { }
2026-01-19 03:14:30 -08:00
pending_tag_associations : Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] = { }
2025-12-14 00:53:52 -08:00
successes = 0
failures = 0
2025-12-11 12:47:30 -08:00
2026-05-03 21:20:05 -07:00
# When add-file -instance is the last stage, always show a final search-file table.
2025-12-16 01:45:01 -08:00
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
2026-01-14 21:53:07 -08:00
live_progress = None
try :
live_progress = ctx . get_live_progress ( )
except Exception :
live_progress = None
2025-12-30 23:19:02 -08:00
want_final_search_file = (
2026-04-30 18:56:22 -07:00
bool ( is_last_stage )
and bool ( effective_storage_backend_name )
and bool ( live_progress )
2025-12-29 17:05:03 -08:00
)
2025-12-30 23:19:02 -08:00
auto_search_file_after_add = False
2025-12-11 12:47:30 -08:00
2025-12-20 23:57:44 -08:00
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
2025-12-29 17:05:03 -08:00
defer_url_association = (
2026-04-30 18:56:22 -07:00
bool ( effective_storage_backend_name )
2025-12-29 18:42:02 -08:00
and len ( items_to_process ) > 1
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
2026-01-05 07:51:19 -08:00
for idx , item in enumerate ( items_to_process , 1 ) :
2025-12-14 00:53:52 -08:00
pipe_obj = coerce_to_pipe_object ( item , path_arg )
2025-12-11 12:47:30 -08:00
2026-03-21 22:56:37 -07:00
if source_url_arg :
try :
from SYS . metadata import normalize_urls
cli_urls = [ u . strip ( ) for u in str ( source_url_arg ) . split ( " , " ) if u and u . strip ( ) ]
merged_urls : List [ str ] = [ ]
if isinstance ( getattr ( pipe_obj , " extra " , None ) , dict ) :
existing_url = pipe_obj . extra . get ( " url " )
if isinstance ( existing_url , list ) :
merged_urls . extend ( str ( u ) for u in existing_url if u )
elif isinstance ( existing_url , str ) and existing_url . strip ( ) :
merged_urls . append ( existing_url . strip ( ) )
else :
pipe_obj . extra = { }
2026-03-25 22:39:30 -07:00
merged_urls = sh . merge_urls ( merged_urls , cli_urls )
2026-03-21 22:56:37 -07:00
if merged_urls :
pipe_obj . extra [ " url " ] = merged_urls
except Exception :
pass
2026-01-05 07:51:19 -08:00
try :
2026-01-12 04:05:52 -08:00
label = pipe_obj . title
2026-01-05 07:51:19 -08:00
if not label and pipe_obj . path :
try :
label = Path ( str ( pipe_obj . path ) ) . name
except Exception :
label = pipe_obj . path
if not label :
label = " file "
if total_items :
pending_pct = int ( round ( ( ( idx - 1 ) / max ( 1 , total_items ) ) * 100 ) )
progress . set_percent ( pending_pct )
progress . set_status ( f " adding { idx } / { total_items } : { label } " )
except Exception :
pass
2025-12-14 00:53:52 -08:00
temp_dir_to_cleanup : Optional [ Path ] = None
delete_after_item = delete_after
2025-11-25 20:09:33 -08:00
try :
2026-01-15 00:45:42 -08:00
if use_steps and steps_started :
2026-01-14 18:35:06 -08:00
progress . step ( " resolving source " )
2026-05-14 17:15:13 -07:00
export_destination = (
Path ( local_export_destination )
if local_export_destination
else Path ( location )
if location and not is_storage_backend_location
else None
)
2026-01-04 02:23:50 -08:00
media_path , file_hash , temp_dir_to_cleanup = self . _resolve_source (
2026-04-17 16:17:16 -07:00
item ,
2026-05-24 12:32:57 -07:00
source_arg ,
2026-04-17 16:17:16 -07:00
pipe_obj ,
config ,
2026-05-14 17:15:13 -07:00
export_destination = export_destination ,
2026-04-17 16:17:16 -07:00
store_instance = storage_registry ,
2026-05-03 17:29:32 -07:00
deps = deps ,
2025-12-29 17:05:03 -08:00
)
2026-04-19 00:41:09 -07:00
if not media_path and plugin_name :
media_path , file_hash , temp_dir_to_cleanup = Add_File . _download_piped_source (
2026-05-03 17:29:32 -07:00
pipe_obj , config , storage_registry , deps = deps
2026-01-23 21:32:34 -08:00
)
2026-01-02 02:28:59 -08:00
if not media_path :
2025-12-14 00:53:52 -08:00
failures + = 1
continue
2026-01-15 00:45:42 -08:00
# Update pipe_obj with resolved path
pipe_obj . path = str ( media_path )
2026-05-24 12:32:57 -07:00
# Local/plugin exports can accept any file type.
# Storage backends stay restricted to SUPPORTED_MEDIA_EXTENSIONS.
2026-04-30 18:56:22 -07:00
allow_all_files = not bool ( effective_storage_backend_name )
2026-01-15 00:45:42 -08:00
if not self . _validate_source ( media_path , allow_all_extensions = allow_all_files ) :
failures + = 1
continue
2026-01-14 22:21:19 -08:00
if use_steps and steps_started :
2026-01-15 00:45:42 -08:00
if not file_hash :
progress . step ( " hashing file " )
2026-01-14 18:35:06 -08:00
progress . step ( " ingesting file " )
2025-12-14 00:53:52 -08:00
2026-04-19 00:41:09 -07:00
if plugin_name :
2026-04-30 18:56:22 -07:00
if effective_storage_backend_name :
code = self . _handle_storage_backend (
item ,
media_path ,
effective_storage_backend_name ,
pipe_obj ,
config ,
delete_after_item ,
collect_payloads = collected_payloads ,
collect_relationship_pairs = pending_relationship_pairs ,
defer_url_association = defer_url_association ,
pending_url_associations = pending_url_associations ,
defer_tag_association = defer_url_association ,
pending_tag_associations = pending_tag_associations ,
suppress_last_stage_overlay = want_final_search_file ,
auto_search_file = auto_search_file_after_add ,
store_instance = storage_registry ,
)
else :
code = self . _handle_plugin_upload (
media_path ,
plugin_name ,
plugin_instance ,
pipe_obj ,
config ,
delete_after_item
)
2025-12-14 00:53:52 -08:00
if code == 0 :
successes + = 1
else :
failures + = 1
continue
if location :
try :
2026-05-23 13:49:47 -07:00
backend_registry = storage_registry or deps . get_backend_registry ( )
resolved_backend = Add_File . _resolve_backend_by_name ( backend_registry , str ( location ) )
2026-03-31 23:30:57 -07:00
if resolved_backend is not None :
2025-12-14 00:53:52 -08:00
code = self . _handle_storage_backend (
item ,
media_path ,
location ,
pipe_obj ,
config ,
delete_after_item ,
collect_payloads = collected_payloads ,
2025-12-16 01:45:01 -08:00
collect_relationship_pairs = pending_relationship_pairs ,
2025-12-20 23:57:44 -08:00
defer_url_association = defer_url_association ,
pending_url_associations = pending_url_associations ,
2026-01-19 03:14:30 -08:00
defer_tag_association = defer_url_association ,
pending_tag_associations = pending_tag_associations ,
2025-12-30 23:19:02 -08:00
suppress_last_stage_overlay = want_final_search_file ,
auto_search_file = auto_search_file_after_add ,
2026-01-12 04:05:52 -08:00
store_instance = storage_registry ,
2025-12-14 00:53:52 -08:00
)
else :
2026-05-14 17:15:13 -07:00
log ( f " Invalid storage backend: { location } " , file = sys . stderr )
code = 1
2025-12-14 00:53:52 -08:00
except Exception as exc :
debug ( f " [add-file] ERROR: Failed to resolve location: { exc } " )
log ( f " Invalid location: { location } " , file = sys . stderr )
failures + = 1
continue
if code == 0 :
successes + = 1
else :
failures + = 1
continue
log ( " No destination specified " , file = sys . stderr )
failures + = 1
finally :
if temp_dir_to_cleanup is not None :
try :
shutil . rmtree ( temp_dir_to_cleanup , ignore_errors = True )
except Exception :
pass
2026-01-05 07:51:19 -08:00
processed_items + = 1
try :
pct = int ( round ( ( processed_items / max ( 1 , total_items ) ) * 100 ) )
progress . set_percent ( pct )
if processed_items > = total_items :
progress . clear_status ( )
except Exception :
pass
2025-12-14 00:53:52 -08:00
2025-12-20 23:57:44 -08:00
# Apply deferred url associations (bulk) before showing the final store table.
if pending_url_associations :
try :
2025-12-29 18:42:02 -08:00
Add_File . _apply_pending_url_associations (
pending_url_associations ,
2026-01-12 04:05:52 -08:00
config ,
store_instance = storage_registry
2025-12-29 18:42:02 -08:00
)
2025-12-20 23:57:44 -08:00
except Exception :
pass
2026-01-19 03:14:30 -08:00
# Apply deferred tag associations (bulk) if collected
if pending_tag_associations :
try :
Add_File . _apply_pending_tag_associations (
pending_tag_associations ,
config ,
store_instance = storage_registry
)
except Exception :
pass
2026-05-03 21:20:05 -07:00
# Always end add-file -instance (when last stage) by showing item detail panels.
2026-01-16 04:57:05 -08:00
# Legacy search-file refresh is no longer used for final display.
2025-12-30 23:19:02 -08:00
if want_final_search_file and collected_payloads :
2025-12-14 00:53:52 -08:00
try :
2026-01-16 04:57:05 -08:00
from SYS . rich_display import render_item_details_panel
2026-01-24 23:15:08 -08:00
from SYS . result_table import Table
2025-12-29 17:05:03 -08:00
2026-01-16 04:57:05 -08:00
# Stop the live pipeline progress UI before rendering the details panels.
# This prevents the progress display from lingering on screen.
2026-05-24 12:32:57 -07:00
Add_File . _stop_live_progress_for_terminal_render ( )
2026-01-15 16:26:22 -08:00
2026-01-16 04:57:05 -08:00
subject = collected_payloads [ 0 ] if len ( collected_payloads ) == 1 else collected_payloads
2026-02-02 14:09:42 -08:00
# Use helper to display items and make them @-selectable
2026-05-04 18:41:01 -07:00
from . . _shared import display_and_persist_items
2026-02-02 14:09:42 -08:00
display_and_persist_items ( collected_payloads , title = " Result " , subject = subject )
2026-01-24 23:15:08 -08:00
try :
ctx . set_last_result_items_only ( list ( collected_payloads ) )
except Exception :
pass
2025-12-16 01:45:01 -08:00
except Exception :
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs :
try :
2025-12-29 18:42:02 -08:00
Add_File . _apply_pending_relationships (
pending_relationship_pairs ,
2026-01-12 04:05:52 -08:00
config ,
2026-05-03 17:29:32 -07:00
store_instance = storage_registry ,
deps = deps
2025-12-29 18:42:02 -08:00
)
2025-12-14 00:53:52 -08:00
except Exception :
pass
2025-12-22 02:11:53 -08:00
if use_steps and steps_started :
progress . step ( " finalized " )
2026-01-14 18:35:06 -08:00
# Clear the status so it doesn't linger in the UI
progress . clear_status ( )
2025-12-22 02:11:53 -08:00
2025-12-14 00:53:52 -08:00
if successes > 0 :
return 0
return 1
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
@staticmethod
2025-12-30 23:19:02 -08:00
def _try_emit_search_file_by_hashes (
2025-12-29 18:42:02 -08:00
* ,
2026-05-03 21:20:05 -07:00
instance : str ,
2025-12-29 18:42:02 -08:00
hash_values : List [ str ] ,
config : Dict [ str ,
2026-01-12 04:05:52 -08:00
Any ] ,
2026-05-23 13:49:47 -07:00
store_instance : Optional [ BackendRegistry ] = None ,
2025-12-29 17:05:03 -08:00
) - > Optional [ List [ Any ] ] :
2025-12-30 23:19:02 -08:00
""" Run search-file for a list of hashes and promote the table to a display overlay.
2025-12-16 01:45:01 -08:00
2025-12-30 23:19:02 -08:00
Returns the emitted search-file payload items on success, else None.
2025-12-16 01:45:01 -08:00
"""
hashes = [ h for h in ( hash_values or [ ] ) if isinstance ( h , str ) and len ( h ) == 64 ]
2026-05-03 21:20:05 -07:00
if not instance or not hashes :
2025-12-16 01:45:01 -08:00
return None
try :
2026-05-04 18:41:01 -07:00
from cmdlet . file . search import CMDLET as search_file_cmdlet
2025-12-16 01:45:01 -08:00
query = " hash: " + " , " . join ( hashes )
2026-05-03 21:20:05 -07:00
args = [ " -instance " , str ( instance ) , " -internal-refresh " , query ]
debug ( f ' [add-file] Refresh: search-file -instance { instance } " { query } " ' )
2025-12-16 01:45:01 -08:00
2025-12-30 23:19:02 -08:00
# Run search-file under a temporary stage context so its ctx.emit() calls
2025-12-16 01:45:01 -08:00
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx . get_stage_context ( )
2025-12-29 17:05:03 -08:00
temp_ctx = ctx . PipelineStageContext (
stage_index = 0 ,
total_stages = 1 ,
pipe_index = 0 ,
2025-12-29 18:42:02 -08:00
worker_id = getattr ( prev_ctx ,
" worker_id " ,
None ) ,
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
ctx . set_stage_context ( temp_ctx )
try :
2025-12-30 23:19:02 -08:00
code = search_file_cmdlet . run ( None , args , config )
2025-12-16 01:45:01 -08:00
emitted_items = list ( getattr ( temp_ctx , " emits " , [ ] ) or [ ] )
finally :
ctx . set_stage_context ( prev_ctx )
if code != 0 :
return None
2025-12-30 23:19:02 -08:00
# Promote the search-file result to a display overlay so the CLI prints it
2025-12-16 01:45:01 -08:00
# for action commands like add-file.
stage_ctx = ctx . get_stage_context ( )
2025-12-29 18:42:02 -08:00
is_last = ( stage_ctx
is None ) or bool ( getattr ( stage_ctx ,
" is_last_stage " ,
False ) )
2025-12-16 01:45:01 -08:00
if is_last :
try :
table = ctx . get_last_result_table ( )
items = ctx . get_last_result_items ( )
if table is not None and items :
2026-01-15 16:26:22 -08:00
# If we have a single item refresh, render it as a panel immediately
# and suppress the table output from the CLI runner.
if len ( items ) == 1 :
try :
from SYS . rich_display import render_item_details_panel
render_item_details_panel ( items [ 0 ] )
setattr ( table , " _rendered_by_cmdlet " , True )
except Exception as exc :
debug ( f " [add-file] Item details render failed: { exc } " )
2026-03-25 22:39:30 -07:00
publish_result_table (
ctx ,
2025-12-29 18:42:02 -08:00
table ,
items ,
subject = {
2026-05-03 21:20:05 -07:00
" store " : instance ,
2025-12-29 18:42:02 -08:00
" hash " : hashes
2026-03-25 22:39:30 -07:00
} ,
overlay = True ,
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
except Exception :
pass
return emitted_items
except Exception as exc :
2025-12-29 17:05:03 -08:00
debug (
2025-12-30 23:19:02 -08:00
f " [add-file] Failed to run search-file after add-file: { type ( exc ) . __name__ } : { exc } "
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
return None
@staticmethod
2025-12-29 18:42:02 -08:00
def _parse_relationship_tag_king_alts (
tag_value : str
) - > tuple [ Optional [ str ] ,
List [ str ] ] :
2025-12-16 01:45:01 -08:00
""" Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance ( tag_value , str ) :
return None , [ ]
raw = tag_value . strip ( )
if not raw :
return None , [ ]
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if " : " in raw :
prefix , rest = raw . split ( " : " , 1 )
if prefix . strip ( ) . lower ( ) == " relationship " :
rhs = rest . strip ( )
# Old typed format: hash(type)HEX
typed = re . findall ( r " hash \ (( \ w+) \ )<?([a-fA-F0-9] {64} )>? " , rhs )
if typed :
king : Optional [ str ] = None
alts : List [ str ] = [ ]
for rel_type , h in typed :
h_norm = str ( h ) . strip ( ) . lower ( )
if rel_type . strip ( ) . lower ( ) == " king " :
king = h_norm
2025-12-29 18:42:02 -08:00
elif rel_type . strip ( ) . lower ( ) in { " alt " ,
" related " } :
2025-12-16 01:45:01 -08:00
alts . append ( h_norm )
# If the tag omitted king but had hashes, fall back to first hash.
if not king :
all_hashes = [ str ( h ) . strip ( ) . lower ( ) for _ , h in typed ]
king = all_hashes [ 0 ] if all_hashes else None
alts = [ h for h in all_hashes [ 1 : ] if h ]
# Dedupe alts while preserving order
seen : set [ str ] = set ( )
2025-12-29 18:42:02 -08:00
alts = [
h for h in alts
if h and len ( h ) == 64 and not ( h in seen or seen . add ( h ) )
]
2025-12-16 01:45:01 -08:00
if king and len ( king ) == 64 :
return king , [ h for h in alts if h != king ]
return None , [ ]
# New format: a simple list of hashes, first is king.
hashes = re . findall ( r " \ b[a-fA-F0-9] {64} \ b " , rhs )
hashes = [ h . strip ( ) . lower ( ) for h in hashes if isinstance ( h , str ) ]
if not hashes :
return None , [ ]
king = hashes [ 0 ]
alts = hashes [ 1 : ]
seen2 : set [ str ] = set ( )
2025-12-29 18:42:02 -08:00
alts = [
h for h in alts if h and len ( h ) == 64 and not ( h in seen2 or seen2 . add ( h ) )
]
2025-12-16 01:45:01 -08:00
return king , [ h for h in alts if h != king ]
@staticmethod
2025-12-29 17:05:03 -08:00
def _parse_relationships_king_alts (
2025-12-29 18:42:02 -08:00
relationships : Dict [ str ,
Any ] ,
) - > tuple [ Optional [ str ] ,
List [ str ] ] :
2025-12-16 01:45:01 -08:00
""" Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- { " king " : [KING], " alt " : [ALT1, ALT2]}
- { " king " : KING, " alt " : ALT} (strings)
- Also treats " related " hashes as alts for persistence purposes.
"""
if not isinstance ( relationships , dict ) or not relationships :
return None , [ ]
def _first_hash ( val : Any ) - > Optional [ str ] :
if isinstance ( val , str ) :
h = val . strip ( ) . lower ( )
return h if len ( h ) == 64 else None
if isinstance ( val , list ) :
for item in val :
if isinstance ( item , str ) :
h = item . strip ( ) . lower ( )
if len ( h ) == 64 :
return h
return None
def _many_hashes ( val : Any ) - > List [ str ] :
out : List [ str ] = [ ]
if isinstance ( val , str ) :
h = val . strip ( ) . lower ( )
if len ( h ) == 64 :
out . append ( h )
elif isinstance ( val , list ) :
for item in val :
if isinstance ( item , str ) :
h = item . strip ( ) . lower ( )
if len ( h ) == 64 :
out . append ( h )
return out
king = _first_hash ( relationships . get ( " king " ) )
if not king :
return None , [ ]
alts = _many_hashes ( relationships . get ( " alt " ) )
alts . extend ( _many_hashes ( relationships . get ( " related " ) ) )
seen : set [ str ] = set ( )
alts = [ h for h in alts if h and h != king and not ( h in seen or seen . add ( h ) ) ]
return king , alts
@staticmethod
2025-12-29 17:05:03 -08:00
def _apply_pending_relationships (
2025-12-29 18:42:02 -08:00
pending : Dict [ str ,
set [ tuple [ str ,
str ] ] ] ,
config : Dict [ str ,
2026-01-12 04:05:52 -08:00
Any ] ,
2026-05-23 13:49:47 -07:00
store_instance : Optional [ BackendRegistry ] = None ,
2026-05-03 17:29:32 -07:00
deps : Optional [ _CommandDependencies ] = None ,
2025-12-29 17:05:03 -08:00
) - > None :
2026-01-02 02:28:59 -08:00
""" Persist relationships to backends that support relationships.
2025-12-16 01:45:01 -08:00
2026-01-02 02:28:59 -08:00
This delegates to an optional backend method: `set_relationship(alt, king, kind)`.
2025-12-16 01:45:01 -08:00
"""
if not pending :
return
2026-05-03 17:29:32 -07:00
if deps is None :
deps = _CommandDependencies ( config )
2025-12-16 01:45:01 -08:00
try :
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else deps . get_backend_registry ( )
2025-12-16 01:45:01 -08:00
except Exception :
return
for backend_name , pairs in pending . items ( ) :
if not pairs :
continue
try :
2026-05-23 13:49:47 -07:00
backend = backend_registry [ str ( backend_name ) ]
2025-12-16 01:45:01 -08:00
except Exception :
continue
2026-05-04 18:41:01 -07:00
if not bool ( getattr ( backend , " supports_relationship_association " , False ) ) :
continue
2026-01-02 02:28:59 -08:00
setter = getattr ( backend , " set_relationship " , None )
if not callable ( setter ) :
2025-12-16 01:45:01 -08:00
continue
2026-01-02 02:28:59 -08:00
processed_pairs : set [ tuple [ str , str ] ] = set ( )
for alt_hash , king_hash in sorted ( pairs ) :
if not alt_hash or not king_hash or alt_hash == king_hash :
continue
if ( alt_hash , king_hash ) in processed_pairs :
continue
alt_norm = str ( alt_hash ) . strip ( ) . lower ( )
king_norm = str ( king_hash ) . strip ( ) . lower ( )
if len ( alt_norm ) != 64 or len ( king_norm ) != 64 :
continue
try :
setter ( alt_norm , king_norm , " alt " )
processed_pairs . add ( ( alt_hash , king_hash ) )
except Exception :
2025-12-16 01:45:01 -08:00
continue
2026-01-04 02:23:50 -08:00
@staticmethod
def _maybe_download_backend_file (
backend : Any ,
file_hash : str ,
pipe_obj : models . PipeObject ,
2026-05-24 12:32:57 -07:00
* ,
output_dir : Optional [ Path ] = None ,
2026-01-04 02:23:50 -08:00
) - > Tuple [ Optional [ Path ] , Optional [ Path ] ] :
""" Best-effort fetch of a backend file when get_file returns a URL.
Returns (downloaded_path, temp_dir_to_cleanup).
"""
downloader = getattr ( backend , " download_to_temp " , None )
if not callable ( downloader ) :
return None , None
tmp_dir : Optional [ Path ] = None
try :
2026-01-14 18:23:20 -08:00
# Extract suffix from pipe_obj path to avoid .tmp rejections
suffix = None
if pipe_obj . path :
try :
suffix = Path ( pipe_obj . path ) . suffix
except Exception :
pass
# Extract suffix from metadata if available (fallback)
if not suffix :
metadata = getattr ( pipe_obj , " metadata " , { } )
if isinstance ( metadata , dict ) :
suffix = metadata . get ( " ext " )
2026-01-14 21:53:07 -08:00
2026-05-24 12:32:57 -07:00
download_root = output_dir
if download_root is None :
tmp_dir = Path ( tempfile . mkdtemp ( prefix = " add-file-src- " ) )
download_root = tmp_dir
if download_root is None :
return None , None
# Introspect downloader to pass supported args.
2026-01-14 18:23:20 -08:00
import inspect
2026-01-14 21:53:07 -08:00
2026-01-14 18:23:20 -08:00
sig = inspect . signature ( downloader )
2026-05-24 12:32:57 -07:00
kwargs = { " temp_root " : download_root }
2026-01-14 18:23:20 -08:00
if " suffix " in sig . parameters :
2026-01-14 21:53:07 -08:00
kwargs [ " suffix " ] = suffix
2026-05-24 12:32:57 -07:00
pipeline_progress = PipelineProgress ( ctx )
transfer_label = " peer transfer "
try :
transfer_label = str ( getattr ( pipe_obj , " title " , " " ) or " " ) . strip ( ) or transfer_label
except Exception :
transfer_label = " peer transfer "
if " pipeline_progress " in sig . parameters :
kwargs [ " pipeline_progress " ] = pipeline_progress
if " transfer_label " in sig . parameters :
kwargs [ " transfer_label " ] = transfer_label
if " progress_callback " in sig . parameters :
2026-01-14 21:53:07 -08:00
def _cb ( done , total ) :
2026-05-24 12:32:57 -07:00
try :
total_val = int ( total ) if total is not None else None
except Exception :
total_val = None
try :
if int ( done or 0 ) < = 0 :
pipeline_progress . begin_transfer (
label = transfer_label ,
total = total_val ,
)
except Exception :
pass
try :
pipeline_progress . update_transfer (
label = transfer_label ,
completed = int ( done or 0 ) ,
total = total_val ,
)
except Exception :
pass
2026-01-14 21:53:07 -08:00
kwargs [ " progress_callback " ] = _cb
downloaded = downloader ( str ( file_hash ) , * * kwargs )
2026-01-04 02:23:50 -08:00
if isinstance ( downloaded , Path ) and downloaded . exists ( ) :
2026-05-24 12:32:57 -07:00
if output_dir is not None :
pipe_obj . is_temp = False
if isinstance ( pipe_obj . extra , dict ) :
pipe_obj . extra [ " _direct_export_download " ] = True
else :
pipe_obj . extra = { " _direct_export_download " : True }
return downloaded , None
2026-01-04 02:23:50 -08:00
pipe_obj . is_temp = True
return downloaded , tmp_dir
except Exception :
pass
if tmp_dir is not None :
try :
shutil . rmtree ( tmp_dir , ignore_errors = True )
except Exception :
pass
return None , None
2026-04-17 16:17:16 -07:00
@staticmethod
def _download_remote_backend_url (
remote_url : str ,
pipe_obj : models . PipeObject ,
* ,
file_hash : Optional [ str ] = None ,
output_dir : Optional [ Path ] = None ,
) - > Tuple [ Optional [ Path ] , Optional [ Path ] ] :
""" Best-effort fetch of a remote backend URL.
Returns (downloaded_path, temp_dir_to_cleanup).
When ``output_dir`` is provided, the file is downloaded directly there and no
temp cleanup path is returned.
"""
url_text = str ( remote_url or " " ) . strip ( )
if not url_text :
return None , None
if not url_text . lower ( ) . startswith ( _REMOTE_URL_PREFIXES ) :
return None , None
2026-05-04 15:58:24 -07:00
# This helper performs generic HTTP downloads only.
# Non-HTTP schemes (e.g. hydrus://, tidal:) should be handled by
# plugin-specific resolvers via _maybe_download_plugin_result.
if not url_text . lower ( ) . startswith ( ( " http:// " , " https:// " ) ) :
return None , None
2026-04-17 16:17:16 -07:00
tmp_dir : Optional [ Path ] = None
try :
download_root = output_dir
if download_root is None :
tmp_dir = Path ( tempfile . mkdtemp ( prefix = " add-file-src- " ) )
download_root = tmp_dir
suggested_name = Add_File . _build_provider_filename (
pipe_obj ,
fallback_hash = file_hash ,
source_url = url_text ,
)
pipeline_progress = PipelineProgress ( ctx )
2026-05-24 12:32:57 -07:00
try :
destination_label = str ( download_root ) if download_root is not None else " temporary workspace "
pipeline_progress . set_status ( f " downloading { suggested_name } to { destination_label } " )
except Exception :
pass
2026-04-17 16:17:16 -07:00
downloaded = _download_direct_file (
url_text ,
download_root ,
quiet = False ,
suggested_filename = suggested_name ,
pipeline_progress = pipeline_progress ,
)
downloaded_path = getattr ( downloaded , " path " , None )
if isinstance ( downloaded_path , Path ) and downloaded_path . exists ( ) :
if output_dir is not None :
pipe_obj . is_temp = False
if isinstance ( pipe_obj . extra , dict ) :
pipe_obj . extra [ " _direct_export_download " ] = True
else :
pipe_obj . extra = { " _direct_export_download " : True }
return downloaded_path , None
pipe_obj . is_temp = True
return downloaded_path , tmp_dir
except Exception :
pass
2026-05-24 12:32:57 -07:00
finally :
try :
PipelineProgress ( ctx ) . clear_status ( )
except Exception :
pass
2026-04-17 16:17:16 -07:00
if tmp_dir is not None :
try :
shutil . rmtree ( tmp_dir , ignore_errors = True )
except Exception :
pass
return None , None
2026-01-24 01:38:12 -08:00
@staticmethod
def _build_provider_filename (
pipe_obj : models . PipeObject ,
fallback_hash : Optional [ str ] = None ,
source_url : Optional [ str ] = None ,
) - > str :
title_candidates : List [ str ] = [ ]
title_value = getattr ( pipe_obj , " title " , " " )
if title_value :
title_candidates . append ( str ( title_value ) )
extra = getattr ( pipe_obj , " extra " , { } )
if isinstance ( extra , dict ) :
candid = extra . get ( " name " ) or extra . get ( " title " )
if candid :
title_candidates . append ( str ( candid ) )
metadata = getattr ( pipe_obj , " metadata " , { } )
if isinstance ( metadata , dict ) :
meta_name = metadata . get ( " title " ) or metadata . get ( " name " )
if meta_name :
title_candidates . append ( str ( meta_name ) )
text = " "
for candidate in title_candidates :
if candidate :
text = candidate . strip ( )
if text :
break
if not text and fallback_hash :
text = fallback_hash [ : 8 ]
safe_name = sanitize_filename ( text or " download " )
ext = " "
if isinstance ( metadata , dict ) :
ext = metadata . get ( " ext " ) or metadata . get ( " extension " ) or " "
if not ext and isinstance ( extra , dict ) :
ext = extra . get ( " ext " ) or " "
if not ext and source_url :
try :
parsed = urlparse ( source_url )
ext = Path ( parsed . path ) . suffix . lstrip ( " . " )
except Exception :
ext = " "
if ext :
ext_text = str ( ext )
if not ext_text . startswith ( " . " ) :
ext_text = " . " + ext_text . lstrip ( " . " )
if not safe_name . lower ( ) . endswith ( ext_text . lower ( ) ) :
safe_name = f " { safe_name } { ext_text } "
return safe_name or " download "
2026-01-23 21:32:34 -08:00
@staticmethod
2026-05-03 21:20:05 -07:00
def _resolve_backend_by_name ( instance : Any , backend_name : str ) - > Optional [ Any ] :
if not instance or not backend_name :
2026-01-23 21:32:34 -08:00
return None
try :
2026-05-03 21:20:05 -07:00
return instance [ backend_name ]
2026-01-23 21:32:34 -08:00
except Exception :
pass
target = str ( backend_name or " " ) . strip ( ) . lower ( )
if not target :
return None
try :
2026-05-03 21:20:05 -07:00
for candidate in instance . list_backends ( ) :
2026-01-23 21:32:34 -08:00
if isinstance ( candidate , str ) and candidate . strip ( ) . lower ( ) == target :
try :
2026-05-03 21:20:05 -07:00
return instance [ candidate ]
2026-01-23 21:32:34 -08:00
except Exception :
continue
except Exception :
pass
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_source (
result : Any ,
2026-05-24 12:32:57 -07:00
source_arg : Optional [ str ] ,
2025-12-11 12:47:30 -08:00
pipe_obj : models . PipeObject ,
2025-12-29 18:42:02 -08:00
config : Dict [ str ,
Any ] ,
2026-04-17 16:17:16 -07:00
export_destination : Optional [ Path ] = None ,
2026-01-12 04:05:52 -08:00
store_instance : Optional [ Any ] = None ,
2026-05-03 17:29:32 -07:00
deps : Optional [ _CommandDependencies ] = None ,
2026-01-02 02:28:59 -08:00
) - > Tuple [ Optional [ Path ] ,
2026-01-04 02:23:50 -08:00
Optional [ str ] ,
Optional [ Path ] ] :
2026-05-24 12:32:57 -07:00
""" Resolve the source file path from the positional source arg or pipeline result.
2025-12-14 00:53:52 -08:00
2026-01-04 02:23:50 -08:00
Returns (media_path, file_hash, temp_dir_to_cleanup).
2025-12-11 12:47:30 -08:00
"""
2026-03-04 18:22:35 -08:00
# PRIORITY 1a: Prefer an explicit local path when it exists.
# This avoids unnecessary backend.get_file(hash) probes (and remote fallbacks)
# for freshly downloaded temp files that are not yet present in the source store.
2025-12-27 06:05:07 -08:00
if isinstance ( result , dict ) :
2026-01-12 04:05:52 -08:00
r_path = result . get ( " path " )
r_hash = result . get ( " hash " )
2026-03-04 18:22:35 -08:00
if r_path :
2025-12-27 06:05:07 -08:00
try :
2026-01-12 04:05:52 -08:00
p = coerce_to_path ( r_path )
if p . exists ( ) and p . is_file ( ) :
pipe_obj . path = str ( p )
2026-03-04 18:22:35 -08:00
return p , str ( r_hash ) if r_hash else None , None
2026-01-12 04:05:52 -08:00
except Exception :
pass
2025-12-27 06:05:07 -08:00
2026-01-14 18:15:00 -08:00
# PRIORITY 1b: Try hash+store from result (fetch from backend)
r_hash = get_field ( result , " hash " ) or get_field ( result , " file_hash " )
r_store = get_field ( result , " store " )
if r_hash and r_store :
try :
2026-05-03 17:29:32 -07:00
if deps is None :
deps = _CommandDependencies ( config )
2026-05-23 13:49:47 -07:00
backend_registry = store_instance or deps . get_backend_registry ( )
backend = Add_File . _resolve_backend_by_name ( backend_registry , r_store )
2026-01-23 21:32:34 -08:00
if backend is not None :
2026-01-14 18:15:00 -08:00
mp = backend . get_file ( r_hash )
if isinstance ( mp , Path ) and mp . exists ( ) :
pipe_obj . path = str ( mp )
return mp , str ( r_hash ) , None
if isinstance ( mp , str ) and mp . strip ( ) :
2026-04-17 16:17:16 -07:00
try :
mp_path = Path ( str ( mp ) )
except Exception :
mp_path = None
if mp_path is not None and mp_path . exists ( ) and mp_path . is_file ( ) :
pipe_obj . path = str ( mp_path )
return mp_path , str ( r_hash ) , None
2026-01-14 18:15:00 -08:00
dl_path , tmp_dir = Add_File . _maybe_download_backend_file (
2026-05-24 12:32:57 -07:00
backend ,
str ( r_hash ) ,
pipe_obj ,
output_dir = export_destination ,
2026-01-14 18:15:00 -08:00
)
if dl_path and dl_path . exists ( ) :
pipe_obj . path = str ( dl_path )
return dl_path , str ( r_hash ) , tmp_dir
2026-04-17 16:17:16 -07:00
dl_path , tmp_dir = Add_File . _download_remote_backend_url (
str ( mp ) ,
pipe_obj ,
file_hash = str ( r_hash ) ,
output_dir = export_destination ,
)
if dl_path and dl_path . exists ( ) :
pipe_obj . path = str ( dl_path )
return dl_path , str ( r_hash ) , tmp_dir
2026-01-18 13:10:31 -08:00
except Exception as exc :
debug ( f " [add-file] _resolve_source backend fetch failed for { r_store } / { r_hash } : { exc } " )
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# PRIORITY 2: Generic Coercion (Path arg > PipeObject > Result)
candidate : Optional [ Path ] = None
2026-05-24 12:32:57 -07:00
if source_arg :
candidate = Path ( source_arg )
2026-01-12 04:05:52 -08:00
elif pipe_obj . path :
candidate = Path ( pipe_obj . path )
if not candidate :
# Unwrap list if needed
obj = result [ 0 ] if isinstance ( result , list ) and result else result
if obj :
try :
candidate = coerce_to_path ( obj )
except ValueError :
pass
2025-11-25 20:09:33 -08:00
2026-01-12 04:05:52 -08:00
if candidate :
s = str ( candidate ) . lower ( )
2026-02-25 17:35:38 -08:00
if s . startswith ( _REMOTE_URL_PREFIXES ) :
2026-05-04 15:58:24 -07:00
# For remote sources, prefer plugin-specific resolvers first
# (e.g. hydrus://), then generic HTTP fallback.
downloaded_path , hash_hint , tmp_dir = Add_File . _maybe_download_plugin_result (
result ,
pipe_obj ,
config ,
deps = deps ,
)
if downloaded_path :
pipe_obj . path = str ( downloaded_path )
return downloaded_path , hash_hint , tmp_dir
dl_path , tmp_dir = Add_File . _download_remote_backend_url (
str ( candidate ) ,
pipe_obj ,
file_hash = get_field ( result , " hash " ) or get_field ( result , " file_hash " ) ,
output_dir = export_destination ,
)
if dl_path :
pipe_obj . path = str ( dl_path )
hash_hint = get_field ( result , " hash " ) or get_field ( result , " file_hash " )
return dl_path , hash_hint , tmp_dir
log ( " add-file could not auto-fetch remote source. Use download-file first. " , file = sys . stderr )
return None , None , None
2026-01-12 04:05:52 -08:00
pipe_obj . path = str ( candidate )
# Retain hash from input if available to avoid re-hashing
hash_hint = get_field ( result , " hash " ) or get_field ( result , " file_hash " ) or getattr ( pipe_obj , " hash " , None )
return candidate , hash_hint , None
2025-12-29 17:05:03 -08:00
2026-04-19 00:41:09 -07:00
downloaded_path , hash_hint , tmp_dir = Add_File . _maybe_download_plugin_result (
2026-01-31 23:22:30 -08:00
result ,
pipe_obj ,
config ,
2026-05-03 17:29:32 -07:00
deps = deps ,
2026-01-31 23:22:30 -08:00
)
if downloaded_path :
pipe_obj . path = str ( downloaded_path )
return downloaded_path , hash_hint , tmp_dir
2026-01-12 04:05:52 -08:00
debug ( f " No resolution path matched. result type= { type ( result ) . __name__ } " )
2025-12-11 12:47:30 -08:00
log ( " File path could not be resolved " )
2026-01-04 02:23:50 -08:00
return None , None , None
2025-12-07 00:21:30 -08:00
2026-01-31 23:22:30 -08:00
@staticmethod
def _normalize_provider_key ( value : Optional [ Any ] ) - > Optional [ str ] :
if value is None :
return None
try :
normalized = str ( value ) . strip ( ) . lower ( )
except Exception :
return None
if not normalized :
return None
if " . " in normalized :
normalized = normalized . split ( " . " , 1 ) [ 0 ]
return normalized
2026-05-24 12:32:57 -07:00
@staticmethod
def validate_preflight_args (
args : Sequence [ str ] ,
config : Optional [ Dict [ str , Any ] ] = None ,
) - > Optional [ str ] :
cfg = config if isinstance ( config , dict ) else { }
if Add_File . _uses_legacy_path_flag ( args ) :
return f " Pipeline error: { Add_File . _legacy_path_flag_message ( ) } "
try :
parsed = parse_cmdlet_args ( args , CMDLET )
except Exception as exc :
return f " Pipeline error: invalid add-file arguments: { exc } "
deps = _CommandDependencies ( cfg )
storage_registry = deps . get_backend_registry ( )
location = parsed . get ( " instance " )
plugin_instance = parsed . get ( " instance " )
plugin_name = parsed . get ( " plugin " )
is_storage_backend_location = False
if location :
try :
backend_registry_for_lookup = storage_registry or deps . get_backend_registry ( )
is_storage_backend_location = Add_File . _resolve_backend_by_name (
backend_registry_for_lookup ,
str ( location ) ,
) is not None
except Exception :
is_storage_backend_location = False
if location and not plugin_name and not is_storage_backend_location :
resolved_local_instance , resolved_local_path = Add_File . _resolve_local_export_plugin_target (
location ,
cfg ,
deps = deps ,
require_explicit = True ,
)
if resolved_local_path :
return None
return (
f " Pipeline error: storage backend ' { location } ' not found. "
" Use -plugin local -instance <name|path> for local export or configure that store backend. "
)
normalized_plugin_name = Add_File . _normalize_provider_key ( plugin_name )
if normalized_plugin_name :
upload_plugin = deps . get_plugin_with_capability ( normalized_plugin_name , " upload " )
if upload_plugin is None :
plugin_exists = deps . get_plugin ( normalized_plugin_name ) is not None
if plugin_exists :
if normalized_plugin_name == " loc " :
return (
" Pipeline error: plugin ' loc ' does not support add-file/upload. "
" Use -plugin local -instance <name|path> for local export. "
)
return f " Pipeline error: plugin ' { normalized_plugin_name } ' does not support add-file/upload. "
return f " Pipeline error: unknown upload plugin ' { plugin_name } ' . "
if normalized_plugin_name == " local " :
requested_local = str ( plugin_instance or location or " " ) . strip ( ) or " <default> "
resolved_local_instance , resolved_local_path = Add_File . _resolve_local_export_plugin_target (
plugin_instance or location ,
cfg ,
deps = deps ,
require_explicit = bool ( plugin_instance or location ) ,
)
if not resolved_local_path :
return (
f " Pipeline error: local destination ' { requested_local } ' is not configured. "
" Use -plugin local -instance <name|path>. "
)
return None
2026-04-30 18:56:22 -07:00
@staticmethod
def _resolve_plugin_storage_backend (
plugin_name : Optional [ Any ] ,
instance_name : Optional [ Any ] ,
config : Dict [ str , Any ] ,
* ,
store_instance : Optional [ Any ] = None ,
2026-05-03 17:29:32 -07:00
deps : Optional [ _CommandDependencies ] = None ,
2026-04-30 18:56:22 -07:00
) - > Optional [ str ] :
plugin_key = Add_File . _normalize_provider_key ( plugin_name )
if not plugin_key :
return None
2026-05-03 17:29:32 -07:00
if deps is None :
deps = _CommandDependencies ( config )
2026-04-30 18:56:22 -07:00
2026-05-03 17:29:32 -07:00
file_provider = deps . get_plugin_with_capability ( plugin_key , " upload " )
2026-04-30 18:56:22 -07:00
if file_provider is None :
return None
resolver = getattr ( file_provider , " resolve_backend " , None )
if not callable ( resolver ) :
return None
explicit_instance = str ( instance_name or " " ) . strip ( ) or None
try :
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry ( config )
2026-04-30 18:56:22 -07:00
except Exception :
2026-05-23 13:49:47 -07:00
backend_registry = None
2026-04-30 18:56:22 -07:00
try :
resolved_name , backend = resolver (
explicit_instance ,
2026-05-23 13:49:47 -07:00
storage = backend_registry ,
2026-04-30 18:56:22 -07:00
require_explicit = bool ( explicit_instance ) ,
)
except TypeError :
try :
resolved_name , backend = resolver ( explicit_instance )
except Exception :
return None
except Exception :
return None
if backend is None :
return None
resolved_text = str ( resolved_name or explicit_instance or " " ) . strip ( )
if not resolved_text :
return None
checker = getattr ( file_provider , " is_backend " , None )
if callable ( checker ) :
try :
if not checker ( backend , resolved_text ) :
return None
except Exception :
return None
return resolved_text
2026-05-14 17:15:13 -07:00
@staticmethod
def _resolve_local_export_plugin_target (
requested : Optional [ Any ] ,
config : Dict [ str , Any ] ,
* ,
deps : Optional [ _CommandDependencies ] = None ,
require_explicit : bool = False ,
) - > tuple [ Optional [ str ] , Optional [ str ] ] :
if deps is None :
deps = _CommandDependencies ( config )
file_provider = deps . get_plugin_with_capability ( " local " , " upload " )
if file_provider is None :
return None , None
resolver = getattr ( file_provider , " resolve_destination " , None )
if not callable ( resolver ) :
return None , None
requested_text = str ( requested or " " ) . strip ( ) or None
try :
resolved_name , settings = resolver (
requested_text ,
require_explicit = require_explicit ,
)
except TypeError :
try :
resolved_name , settings = resolver ( requested_text )
except Exception :
return None , None
except Exception :
return None , None
path_value = str ( ( settings or { } ) . get ( " path " ) or " " ) . strip ( )
if not path_value :
return None , None
resolved_text = str ( resolved_name or requested_text or " " ) . strip ( ) or None
return resolved_text , path_value
2026-01-31 23:22:30 -08:00
@staticmethod
2026-04-19 00:41:09 -07:00
def _maybe_download_plugin_result (
2026-01-31 23:22:30 -08:00
result : Any ,
pipe_obj : models . PipeObject ,
config : Dict [ str , Any ] ,
2026-05-03 17:29:32 -07:00
deps : Optional [ _CommandDependencies ] = None ,
2026-01-31 23:22:30 -08:00
) - > Tuple [ Optional [ Path ] , Optional [ str ] , Optional [ Path ] ] :
2026-04-19 00:41:09 -07:00
plugin_key = None
2026-01-31 23:22:30 -08:00
for source in (
pipe_obj . provider ,
2026-04-19 00:41:09 -07:00
get_field ( result , " plugin " ) ,
2026-01-31 23:22:30 -08:00
get_field ( result , " provider " ) ,
get_field ( result , " table " ) ,
) :
candidate = Add_File . _normalize_provider_key ( source )
if candidate :
2026-04-19 00:41:09 -07:00
plugin_key = candidate
2026-01-31 23:22:30 -08:00
break
2026-01-31 23:41:47 -08:00
2026-04-19 00:41:09 -07:00
if not plugin_key :
2026-01-31 23:22:30 -08:00
return None , None , None
2026-05-03 17:29:32 -07:00
if deps is None :
deps = _CommandDependencies ( config )
2026-04-19 00:41:09 -07:00
2026-05-03 17:29:32 -07:00
plugin = deps . get_plugin ( plugin_key )
2026-04-19 00:41:09 -07:00
if plugin is None :
2026-01-31 23:22:30 -08:00
return None , None , None
2026-04-19 00:41:09 -07:00
try :
return plugin . resolve_pipe_result_download ( result , pipe_obj )
except Exception as exc :
debug ( f " [add-file] Plugin ' { plugin_key } ' download helper failed: { exc } " )
2026-01-31 23:41:47 -08:00
return None , None , None
2026-01-31 23:22:30 -08:00
2026-01-23 21:32:34 -08:00
@staticmethod
2026-04-19 00:41:09 -07:00
def _download_piped_source (
2026-01-23 21:32:34 -08:00
pipe_obj : models . PipeObject ,
config : Dict [ str , Any ] ,
store_instance : Optional [ Any ] ,
2026-05-03 17:29:32 -07:00
deps : Optional [ _CommandDependencies ] = None ,
2026-01-23 21:32:34 -08:00
) - > Tuple [ Optional [ Path ] , Optional [ str ] , Optional [ Path ] ] :
r_hash = str ( getattr ( pipe_obj , " hash " , None ) or getattr ( pipe_obj , " file_hash " , None ) or " " ) . strip ( )
r_store = str ( getattr ( pipe_obj , " store " , None ) or " " ) . strip ( )
if not ( r_hash and r_store ) :
return None , None , None
2026-05-03 17:29:32 -07:00
if deps is None :
deps = _CommandDependencies ( config )
2026-05-23 13:49:47 -07:00
backend_registry = store_instance or deps . get_backend_registry ( )
backend = Add_File . _resolve_backend_by_name ( backend_registry , r_store ) if backend_registry is not None else None
2026-01-23 21:32:34 -08:00
if backend is None :
return None , None , None
try :
source = backend . get_file ( r_hash . lower ( ) )
if isinstance ( source , Path ) and source . exists ( ) :
pipe_obj . path = str ( source )
return source , str ( r_hash ) , None
if isinstance ( source , str ) and source . strip ( ) :
dl_path , tmp_dir = Add_File . _maybe_download_backend_file (
backend , str ( r_hash ) , pipe_obj
)
if dl_path and dl_path . exists ( ) :
return dl_path , str ( r_hash ) , tmp_dir
2026-01-24 01:38:12 -08:00
source_url = str ( source ) . strip ( )
if source_url . lower ( ) . startswith ( ( " http:// " , " https:// " ) ) :
download_dir = Path ( tempfile . mkdtemp ( prefix = " add-file-src- " ) )
try :
filename = Add_File . _build_provider_filename (
pipe_obj ,
str ( r_hash ) ,
source_url ,
)
downloaded = _download_direct_file (
source_url ,
download_dir ,
quiet = True ,
suggested_filename = filename ,
)
downloaded_path = downloaded . path
if downloaded_path and downloaded_path . exists ( ) :
pipe_obj . is_temp = True
pipe_obj . path = str ( downloaded_path )
return downloaded_path , str ( r_hash ) , download_dir
except Exception as exc :
debug ( f " [add-file] Provider download failed: { exc } " )
try :
shutil . rmtree ( download_dir , ignore_errors = True )
except Exception :
pass
2026-01-23 21:32:34 -08:00
except Exception :
pass
return None , None , None
2025-12-27 06:05:07 -08:00
@staticmethod
2026-01-18 13:10:31 -08:00
def _scan_directory_for_files ( directory : Path , compute_hash : bool = True ) - > List [ Dict [ str , Any ] ] :
2025-12-27 06:05:07 -08:00
""" Scan a directory for supported media files and return list of file info dicts.
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
Each dict contains:
- path: Path object
- name: filename
2026-01-18 13:10:31 -08:00
- hash: sha256 hash (or None if compute_hash=False)
2025-12-27 06:05:07 -08:00
- size: file size in bytes
- ext: file extension
"""
if not directory . exists ( ) or not directory . is_dir ( ) :
return [ ]
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
files_info : List [ Dict [ str , Any ] ] = [ ]
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
try :
for item in directory . iterdir ( ) :
if not item . is_file ( ) :
continue
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
ext = item . suffix . lower ( )
if ext not in SUPPORTED_MEDIA_EXTENSIONS :
continue
2025-12-29 17:05:03 -08:00
2026-01-18 13:10:31 -08:00
file_hash = None
# Compute hash if requested (computing can be expensive for large dirs)
if compute_hash :
try :
file_hash = sha256_file ( item )
except Exception as exc :
debug ( f " Failed to hash { item } : { exc } " )
# If hashing is required, skip this file; otherwise include without hash
continue
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
# Get file size
try :
size = item . stat ( ) . st_size
except Exception :
size = 0
2025-12-29 17:05:03 -08:00
files_info . append (
{
" path " : item ,
" name " : item . name ,
" hash " : file_hash ,
" size " : size ,
" ext " : ext ,
}
)
2025-12-27 06:05:07 -08:00
except Exception as exc :
debug ( f " Error scanning directory { directory } : { exc } " )
2025-12-29 17:05:03 -08:00
2025-12-27 06:05:07 -08:00
return files_info
2026-01-09 01:22:06 -08:00
@staticmethod
def _validate_source ( media_path : Optional [ Path ] , allow_all_extensions : bool = False ) - > bool :
""" Validate that the source file exists and is supported.
Args:
media_path: Path to the file to validate
2026-05-24 12:32:57 -07:00
allow_all_extensions: If True, skip file type filtering for non-backend exports.
If False, only allow SUPPORTED_MEDIA_EXTENSIONS for backend ingest.
2026-01-09 01:22:06 -08:00
"""
2025-12-11 12:47:30 -08:00
if media_path is None :
return False
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
if not media_path . exists ( ) or not media_path . is_file ( ) :
log ( f " File not found: { media_path } " )
return False
2026-05-24 12:32:57 -07:00
# Validate file type only when ingesting into a storage backend.
2026-01-09 01:22:06 -08:00
if not allow_all_extensions :
file_extension = media_path . suffix . lower ( )
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS :
log ( f " ❌ Unsupported file type: { file_extension } " , file = sys . stderr )
return False
2025-12-11 12:47:30 -08:00
return True
2026-01-12 04:05:52 -08:00
@staticmethod
def _is_probable_url ( s : Any ) - > bool :
""" Check if a string looks like a URL/magnet/identifier (vs a tag/title). """
if not isinstance ( s , str ) :
return False
val = s . strip ( ) . lower ( )
if not val :
return False
# Obvious schemes
2026-02-25 17:35:38 -08:00
if val . startswith ( _REMOTE_URL_PREFIXES ) :
2026-01-12 04:05:52 -08:00
return True
# Domain-like patterns or local file paths (but we want URLs here)
if " :// " in val :
return True
# Hydrus hash-like search queries are NOT urls
if val . startswith ( " hash: " ) :
return False
return False
2025-12-11 12:47:30 -08:00
@staticmethod
def _get_url ( result : Any , pipe_obj : models . PipeObject ) - > List [ str ] :
2026-01-12 04:05:52 -08:00
""" Extract valid URLs from pipe object or result dict. """
2025-12-29 23:28:15 -08:00
from SYS . metadata import normalize_urls
2025-11-25 20:09:33 -08:00
2026-01-12 04:05:52 -08:00
candidates : List [ str ] = [ ]
# 1. Prefer explicit PipeObject top-level field
if pipe_obj . url :
candidates . append ( pipe_obj . url )
if pipe_obj . source_url :
candidates . append ( pipe_obj . source_url )
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 2. Check extra and metadata fields
if isinstance ( pipe_obj . extra , dict ) :
u = pipe_obj . extra . get ( " url " )
if isinstance ( u , list ) :
candidates . extend ( str ( x ) for x in u if x )
elif isinstance ( u , str ) :
candidates . append ( u )
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 3. Check result (which might be a dict or another PipeObject)
raw_from_result = extract_url_from_result ( result )
if raw_from_result :
candidates . extend ( raw_from_result )
2025-12-14 00:53:52 -08:00
2026-01-12 04:05:52 -08:00
# 4. Normalize and filter: MUST look like a URL to avoid tag leakage
normalized = normalize_urls ( candidates )
return [ u for u in normalized if Add_File . _is_probable_url ( u ) ]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
2025-12-29 18:42:02 -08:00
def _get_relationships ( result : Any ,
pipe_obj : models . PipeObject ) - > Optional [ Dict [ str ,
Any ] ] :
2025-12-11 12:47:30 -08:00
try :
rels = pipe_obj . get_relationships ( )
if rels :
return rels
except Exception :
pass
if isinstance ( result , dict ) and result . get ( " relationships " ) :
return result . get ( " relationships " )
try :
return extract_relationships ( result )
except Exception :
return None
@staticmethod
def _get_duration ( result : Any , pipe_obj : models . PipeObject ) - > Optional [ float ] :
2025-12-29 18:42:02 -08:00
2025-12-14 00:53:52 -08:00
def _parse_duration ( value : Any ) - > Optional [ float ] :
if value is None :
return None
if isinstance ( value , ( int , float ) ) :
return float ( value ) if value > 0 else None
if isinstance ( value , str ) :
s = value . strip ( )
if not s :
return None
try :
candidate = float ( s )
return candidate if candidate > 0 else None
except ValueError :
pass
if " : " in s :
parts = [ p . strip ( ) for p in s . split ( " : " ) if p . strip ( ) ]
2025-12-29 18:42:02 -08:00
if len ( parts ) in { 2 ,
3 } and all ( p . isdigit ( ) for p in parts ) :
2025-12-14 00:53:52 -08:00
nums = [ int ( p ) for p in parts ]
if len ( nums ) == 2 :
minutes , seconds = nums
return float ( minutes * 60 + seconds )
hours , minutes , seconds = nums
return float ( hours * 3600 + minutes * 60 + seconds )
return None
parsed = _parse_duration ( getattr ( pipe_obj , " duration " , None ) )
if parsed is not None :
return parsed
2025-12-11 12:47:30 -08:00
try :
2025-12-14 00:53:52 -08:00
return _parse_duration ( extract_duration ( result ) )
2025-12-11 12:47:30 -08:00
except Exception :
return None
2025-12-16 23:23:43 -08:00
@staticmethod
2025-12-29 18:42:02 -08:00
def _get_note_text ( result : Any ,
pipe_obj : models . PipeObject ,
note_name : str ) - > Optional [ str ] :
2025-12-16 23:23:43 -08:00
""" Extract a named note text from a piped item.
Supports:
- pipe_obj.extra[ " notes " ][note_name]
- result[ " notes " ][note_name] for dict results
- pipe_obj.extra[note_name] / result[note_name] as fallback
"""
def _normalize ( val : Any ) - > Optional [ str ] :
if val is None :
return None
if isinstance ( val , bytes ) :
try :
val = val . decode ( " utf-8 " , errors = " ignore " )
except Exception :
val = str ( val )
if isinstance ( val , str ) :
text = val . strip ( )
return text if text else None
try :
text = str ( val ) . strip ( )
return text if text else None
except Exception :
return None
note_key = str ( note_name or " " ) . strip ( )
if not note_key :
return None
# Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
try :
if isinstance ( pipe_obj . extra , dict ) :
notes_val = pipe_obj . extra . get ( " notes " )
if isinstance ( notes_val , dict ) and note_key in notes_val :
return _normalize ( notes_val . get ( note_key ) )
if note_key in pipe_obj . extra :
return _normalize ( pipe_obj . extra . get ( note_key ) )
except Exception :
pass
# Fallback to raw result dict
if isinstance ( result , dict ) :
try :
notes_val = result . get ( " notes " )
if isinstance ( notes_val , dict ) and note_key in notes_val :
return _normalize ( notes_val . get ( note_key ) )
if note_key in result :
return _normalize ( result . get ( note_key ) )
except Exception :
pass
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _update_pipe_object_destination (
pipe_obj : models . PipeObject ,
* ,
2025-12-11 19:04:02 -08:00
hash_value : str ,
2025-12-11 12:47:30 -08:00
store : str ,
2026-04-28 22:20:54 -07:00
provider : Optional [ str ] = None ,
2025-12-11 19:04:02 -08:00
path : Optional [ str ] ,
2025-12-11 23:21:45 -08:00
tag : List [ str ] ,
2025-12-11 12:47:30 -08:00
title : Optional [ str ] ,
2025-12-29 18:42:02 -08:00
extra_updates : Optional [ Dict [ str ,
Any ] ] = None ,
2025-12-11 12:47:30 -08:00
) - > None :
2025-12-11 19:04:02 -08:00
pipe_obj . hash = hash_value
2025-12-11 12:47:30 -08:00
pipe_obj . store = store
2026-04-28 22:20:54 -07:00
pipe_obj . provider = provider
2026-04-16 17:18:50 -07:00
pipe_obj . is_temp = False
2025-12-11 19:04:02 -08:00
pipe_obj . path = path
2025-12-11 23:21:45 -08:00
pipe_obj . tag = tag
2025-12-11 12:47:30 -08:00
if title :
pipe_obj . title = title
if isinstance ( pipe_obj . extra , dict ) :
pipe_obj . extra . update ( extra_updates or { } )
else :
pipe_obj . extra = dict ( extra_updates or { } )
@staticmethod
def _emit_pipe_object ( pipe_obj : models . PipeObject ) - > None :
2026-04-17 16:17:16 -07:00
payload = pipe_obj . to_dict ( )
ctx . emit ( payload )
2025-12-11 12:47:30 -08:00
ctx . set_current_stage_table ( None )
2026-04-17 16:17:16 -07:00
stage_ctx = ctx . get_stage_context ( )
is_last = ( stage_ctx is None ) or bool ( getattr ( stage_ctx , " is_last_stage " , False ) )
if not is_last :
return
try :
2026-05-24 12:32:57 -07:00
Add_File . _stop_live_progress_for_terminal_render ( )
2026-05-04 18:41:01 -07:00
from . . _shared import display_and_persist_items
2026-04-17 16:17:16 -07:00
display_and_persist_items ( [ payload ] , title = " Result " , subject = payload )
except Exception :
pass
2026-05-24 12:32:57 -07:00
@staticmethod
def _stop_live_progress_for_terminal_render ( ) - > None :
try :
live_progress = ctx . get_live_progress ( )
except Exception :
live_progress = None
if live_progress is None :
return
try :
stage_ctx = ctx . get_stage_context ( )
pipe_idx = getattr ( stage_ctx , " pipe_index " , None )
if isinstance ( pipe_idx , int ) :
live_progress . finish_pipe ( int ( pipe_idx ) , force_complete = True )
except Exception :
pass
try :
live_progress . stop ( )
except Exception :
pass
try :
if hasattr ( ctx , " set_live_progress " ) :
ctx . set_live_progress ( None )
except Exception :
pass
2025-12-13 12:09:50 -08:00
@staticmethod
2025-12-29 17:05:03 -08:00
def _emit_storage_result (
2025-12-29 18:42:02 -08:00
payload : Dict [ str ,
Any ] ,
* ,
overlay : bool = True ,
emit : bool = True
2025-12-29 17:05:03 -08:00
) - > None :
2025-12-13 12:09:50 -08:00
""" Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
2025-12-30 23:19:02 -08:00
- If this is the last stage (or not in a pipeline), prints a search-file-like table
2025-12-13 12:09:50 -08:00
and sets an overlay table/items for @N selection.
"""
2025-12-14 00:53:52 -08:00
# Emit for downstream commands (no-op if not in a pipeline)
if emit :
ctx . emit ( payload )
2025-12-13 12:09:50 -08:00
stage_ctx = ctx . get_stage_context ( )
2025-12-29 18:42:02 -08:00
is_last = ( stage_ctx
is None ) or bool ( getattr ( stage_ctx ,
" is_last_stage " ,
False ) )
2025-12-14 00:53:52 -08:00
if not is_last or not overlay :
2025-12-13 12:09:50 -08:00
return
try :
2026-01-18 10:50:42 -08:00
from SYS . result_table import Table
2025-12-29 17:05:03 -08:00
2026-01-18 10:50:42 -08:00
table = Table ( " Result " )
2025-12-13 12:09:50 -08:00
table . add_result ( payload )
# Overlay so @1 refers to this add-file result without overwriting search history
2026-03-25 22:39:30 -07:00
publish_result_table ( ctx , table , [ payload ] , subject = payload , overlay = True )
2025-12-13 12:09:50 -08:00
except Exception :
# If table rendering fails, still keep @ selection items
try :
ctx . set_last_result_items_only ( [ payload ] )
except Exception :
pass
2025-12-14 00:53:52 -08:00
@staticmethod
2025-12-30 23:19:02 -08:00
def _try_emit_search_file_by_hash (
2025-12-29 18:42:02 -08:00
* ,
2026-05-03 21:20:05 -07:00
instance : str ,
2025-12-29 18:42:02 -08:00
hash_value : str ,
config : Dict [ str ,
Any ]
2025-12-29 17:05:03 -08:00
) - > Optional [ List [ Any ] ] :
2025-12-30 23:19:02 -08:00
""" Run search-file for a single hash so the final table/payload is consistent.
2025-12-14 00:53:52 -08:00
Important: `add-file` is treated as an action command by the CLI, so the CLI only
2025-12-30 23:19:02 -08:00
prints tables for it when a display overlay exists. After running search-file,
2025-12-14 00:53:52 -08:00
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
2025-12-30 23:19:02 -08:00
Returns the emitted search-file payload items on success, else None.
2025-12-14 00:53:52 -08:00
"""
try :
2026-05-04 18:41:01 -07:00
from cmdlet . file . search import CMDLET as search_file_cmdlet
2025-12-14 00:53:52 -08:00
2026-05-03 21:20:05 -07:00
args = [ " -instance " , str ( instance ) , f " hash: { str ( hash_value ) } " ]
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
# Run search-file under a temporary stage context so its ctx.emit() calls
2025-12-14 00:53:52 -08:00
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx . get_stage_context ( )
2025-12-29 17:05:03 -08:00
temp_ctx = ctx . PipelineStageContext (
stage_index = 0 ,
total_stages = 1 ,
pipe_index = 0 ,
2025-12-29 18:42:02 -08:00
worker_id = getattr ( prev_ctx ,
" worker_id " ,
None ) ,
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
ctx . set_stage_context ( temp_ctx )
try :
2025-12-30 23:19:02 -08:00
code = search_file_cmdlet . run ( None , args , config )
2025-12-16 01:45:01 -08:00
emitted_items = list ( getattr ( temp_ctx , " emits " , [ ] ) or [ ] )
2025-12-14 00:53:52 -08:00
finally :
ctx . set_stage_context ( prev_ctx )
if code != 0 :
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
# Promote the search-file result to a display overlay so the CLI prints it
2025-12-14 00:53:52 -08:00
# for action commands like add-file.
stage_ctx = ctx . get_stage_context ( )
2025-12-29 18:42:02 -08:00
is_last = ( stage_ctx
is None ) or bool ( getattr ( stage_ctx ,
" is_last_stage " ,
False ) )
2025-12-14 00:53:52 -08:00
if is_last :
try :
table = ctx . get_last_result_table ( )
items = ctx . get_last_result_items ( )
2026-03-25 22:39:30 -07:00
overlay_existing_result_table (
ctx ,
subject = {
2026-05-03 21:20:05 -07:00
" store " : instance ,
2026-03-25 22:39:30 -07:00
" hash " : hash_value
} ,
)
2025-12-14 00:53:52 -08:00
except Exception :
pass
2025-12-16 01:45:01 -08:00
return emitted_items
2025-12-14 00:53:52 -08:00
except Exception as exc :
2025-12-29 17:05:03 -08:00
debug (
2025-12-30 23:19:02 -08:00
f " [add-file] Failed to run search-file after add-file: { type ( exc ) . __name__ } : { exc } "
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
return None
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _prepare_metadata (
result : Any ,
media_path : Path ,
pipe_obj : models . PipeObject ,
2025-12-29 18:42:02 -08:00
config : Dict [ str ,
Any ] ,
) - > Tuple [ List [ str ] ,
List [ str ] ,
Optional [ str ] ,
Optional [ str ] ] :
2025-12-11 12:47:30 -08:00
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
2025-12-11 23:21:45 -08:00
tags_from_result = list ( pipe_obj . tag or [ ] )
2025-12-11 12:47:30 -08:00
if not tags_from_result :
try :
2025-12-11 23:21:45 -08:00
tags_from_result = list ( extract_tag_from_result ( result ) or [ ] )
2025-12-11 12:47:30 -08:00
except Exception :
tags_from_result = [ ]
url_from_result = Add_File . _get_url ( result , pipe_obj )
preferred_title = pipe_obj . title
if not preferred_title :
for t in tags_from_result :
if str ( t ) . strip ( ) . lower ( ) . startswith ( " title: " ) :
candidate = t . split ( " : " , 1 ) [ 1 ] . strip ( ) . replace ( " _ " , " " ) . strip ( )
if candidate :
preferred_title = candidate
break
if not preferred_title :
preferred_title = extract_title_from_result ( result )
if preferred_title :
preferred_title = preferred_title . replace ( " _ " , " " ) . strip ( )
2025-12-11 19:04:02 -08:00
store = getattr ( pipe_obj , " store " , None )
2025-12-11 12:47:30 -08:00
_ , sidecar_hash , sidecar_tags , sidecar_url = Add_File . _load_sidecar_bundle (
2025-12-11 19:04:02 -08:00
media_path , store , config
2025-12-11 12:47:30 -08:00
)
def normalize_title_tag ( tag : str ) - > str :
if str ( tag ) . strip ( ) . lower ( ) . startswith ( " title: " ) :
parts = tag . split ( " : " , 1 )
if len ( parts ) == 2 :
value = parts [ 1 ] . replace ( " _ " , " " ) . strip ( )
return f " title: { value } "
return tag
2025-12-29 17:05:03 -08:00
tags_from_result_no_title = [
2025-12-29 18:42:02 -08:00
t for t in tags_from_result
if not str ( t ) . strip ( ) . lower ( ) . startswith ( " title: " )
2025-12-29 17:05:03 -08:00
]
sidecar_tags = collapse_namespace_tag (
2025-12-29 18:42:02 -08:00
[ normalize_title_tag ( t ) for t in sidecar_tags ] ,
" title " ,
prefer = " last "
2025-12-29 17:05:03 -08:00
)
sidecar_tags_filtered = [
t for t in sidecar_tags if not str ( t ) . strip ( ) . lower ( ) . startswith ( " title: " )
]
2025-12-11 12:47:30 -08:00
2025-12-29 17:05:03 -08:00
merged_tags = merge_sequences (
2025-12-29 18:42:02 -08:00
tags_from_result_no_title ,
sidecar_tags_filtered ,
case_sensitive = True
2025-12-29 17:05:03 -08:00
)
2025-12-11 12:47:30 -08:00
if preferred_title :
merged_tags . append ( f " title: { preferred_title } " )
merged_url = merge_sequences ( url_from_result , sidecar_url , case_sensitive = False )
2026-01-12 04:05:52 -08:00
# Final safety filter: ensures no tags/titles leaked into URL list
merged_url = [ u for u in merged_url if Add_File . _is_probable_url ( u ) ]
2025-12-11 12:47:30 -08:00
2025-12-29 18:42:02 -08:00
file_hash = Add_File . _resolve_file_hash (
result ,
media_path ,
pipe_obj ,
sidecar_hash
)
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
2025-12-29 18:42:02 -08:00
t for t in merged_tags
2025-12-16 01:45:01 -08:00
if isinstance ( t , str ) and t . strip ( ) . lower ( ) . startswith ( " relationship: " )
]
if relationship_tags :
try :
2025-12-29 18:42:02 -08:00
if ( not isinstance ( getattr ( pipe_obj ,
" relationships " ,
None ) ,
dict ) or not pipe_obj . relationships ) :
2025-12-16 01:45:01 -08:00
king : Optional [ str ] = None
alts : List [ str ] = [ ]
for rel_tag in relationship_tags :
k , a = Add_File . _parse_relationship_tag_king_alts ( rel_tag )
if k and not king :
king = k
if a :
alts . extend ( a )
if king :
seen_alt : set [ str ] = set ( )
2025-12-29 17:05:03 -08:00
alts = [
2025-12-29 18:42:02 -08:00
h for h in alts if h and h != king and len ( h ) == 64
2025-12-29 17:05:03 -08:00
and not ( h in seen_alt or seen_alt . add ( h ) )
]
2025-12-29 18:42:02 -08:00
payload : Dict [ str ,
Any ] = {
" king " : [ king ]
}
2025-12-16 01:45:01 -08:00
if alts :
payload [ " alt " ] = alts
pipe_obj . relationships = payload
except Exception :
pass
merged_tags = [
2025-12-29 18:42:02 -08:00
t for t in merged_tags if
not ( isinstance ( t , str ) and t . strip ( ) . lower ( ) . startswith ( " relationship: " ) )
2025-12-16 01:45:01 -08:00
]
2025-12-11 12:47:30 -08:00
# Persist back to PipeObject
2025-12-11 23:21:45 -08:00
pipe_obj . tag = merged_tags
2025-12-11 12:47:30 -08:00
if preferred_title and not pipe_obj . title :
pipe_obj . title = preferred_title
if file_hash and not pipe_obj . hash :
pipe_obj . hash = file_hash
if isinstance ( pipe_obj . extra , dict ) :
2026-01-12 04:05:52 -08:00
# Update (don't setdefault) to ensure URLs matched from sidecars or source stores are tracked
pipe_obj . extra [ " url " ] = merged_url
2025-12-11 12:47:30 -08:00
return merged_tags , merged_url , preferred_title , file_hash
2026-05-04 18:41:01 -07:00
@staticmethod
def _normalize_hash_candidate ( value : Any ) - > str :
text = str ( value or " " ) . strip ( ) . lower ( )
if len ( text ) != 64 :
return " "
if any ( ch not in " 0123456789abcdef " for ch in text ) :
return " "
return text
@staticmethod
def _find_existing_hash_by_urls (
backend : Any ,
urls : Sequence [ str ] ,
) - > Optional [ str ] :
""" Best-effort duplicate detection by URL before ingesting file bytes. """
url_candidates : List [ str ] = [ ]
for raw in urls or [ ] :
text = str ( raw or " " ) . strip ( )
if not text or not Add_File . _is_probable_url ( text ) :
continue
if text not in url_candidates :
url_candidates . append ( text )
if not url_candidates :
return None
lookup_exact = getattr ( backend , " find_hashes_by_url " , None )
if callable ( lookup_exact ) :
for candidate_url in url_candidates :
try :
hashes = lookup_exact ( candidate_url ) or [ ]
except Exception :
continue
if not isinstance ( hashes , ( list , tuple , set ) ) :
continue
for item in hashes :
normalized = Add_File . _normalize_hash_candidate ( item )
if normalized :
return normalized
searcher = getattr ( backend , " search " , None )
if callable ( searcher ) :
for candidate_url in url_candidates :
try :
hits = searcher ( f " url: { candidate_url } " , limit = 1 , minimal = True ) or [ ]
except Exception :
continue
if not isinstance ( hits , list ) or not hits :
continue
hit = hits [ 0 ]
for key in ( " hash " , " file_hash " , " sha256 " ) :
normalized = Add_File . _normalize_hash_candidate ( get_field ( hit , key ) )
if normalized :
return normalized
return None
2025-12-11 12:47:30 -08:00
@staticmethod
2026-05-14 17:15:13 -07:00
def _emit_plugin_upload_payload (
upload_payload : Dict [ str , Any ] ,
plugin_name : str ,
instance_name : Optional [ str ] ,
2025-12-11 12:47:30 -08:00
pipe_obj : models . PipeObject ,
2026-05-14 17:15:13 -07:00
media_path : Path ,
2025-12-11 12:47:30 -08:00
delete_after : bool ,
) - > int :
2026-05-14 17:15:13 -07:00
payload = dict ( upload_payload or { } )
extra_updates : Dict [ str , Any ] = { }
raw_extra = payload . get ( " extra " )
if isinstance ( raw_extra , dict ) :
extra_updates . update ( raw_extra )
2025-11-25 20:09:33 -08:00
2026-05-14 17:15:13 -07:00
if plugin_name :
extra_updates . setdefault ( " plugin " , plugin_name )
if instance_name :
extra_updates . setdefault ( " instance " , instance_name )
2026-04-17 16:17:16 -07:00
2026-05-14 17:15:13 -07:00
raw_urls = payload . get ( " url " )
if isinstance ( raw_urls , str ) :
url_values = [ raw_urls . strip ( ) ] if raw_urls . strip ( ) else [ ]
extra_updates [ " url " ] = url_values
elif isinstance ( raw_urls , ( list , tuple , set ) ) :
url_values = [ str ( item ) . strip ( ) for item in raw_urls if str ( item ) . strip ( ) ]
extra_updates [ " url " ] = url_values
2025-12-11 12:47:30 -08:00
2026-05-14 17:15:13 -07:00
relationships = payload . get ( " relationships " )
2025-12-11 12:47:30 -08:00
if relationships :
2026-05-14 17:15:13 -07:00
try :
pipe_obj . relationships = relationships
except Exception :
pass
2025-12-11 12:47:30 -08:00
2026-05-14 17:15:13 -07:00
tags = payload . get ( " tag " )
if isinstance ( tags , list ) :
tag_values = [ str ( tag ) for tag in tags ]
else :
tag_values = list ( pipe_obj . tag or [ ] )
title_value = str ( payload . get ( " title " ) or pipe_obj . title or media_path . name ) . strip ( ) or media_path . name
path_value = str ( payload . get ( " path " ) or pipe_obj . path or media_path ) . strip ( )
hash_value = str (
payload . get ( " hash " )
or payload . get ( " file_hash " )
or getattr ( pipe_obj , " hash " , None )
or " unknown "
) . strip ( ) or " unknown "
store_value = str ( payload . get ( " store " ) or " " ) . strip ( )
provider_value = payload . get ( " provider " )
if provider_value is None and plugin_name :
provider_value = plugin_name
2025-12-11 12:47:30 -08:00
Add_File . _update_pipe_object_destination (
pipe_obj ,
2026-05-14 17:15:13 -07:00
hash_value = hash_value ,
store = store_value ,
provider = str ( provider_value ) if provider_value else None ,
path = path_value ,
tag = tag_values ,
title = title_value ,
2025-12-11 12:47:30 -08:00
extra_updates = extra_updates ,
)
Add_File . _emit_pipe_object ( pipe_obj )
Add_File . _cleanup_after_success ( media_path , delete_source = delete_after )
2025-11-30 11:39:04 -08:00
return 0
2025-12-11 12:47:30 -08:00
@staticmethod
2026-04-19 00:41:09 -07:00
def _handle_plugin_upload (
2025-12-11 12:47:30 -08:00
media_path : Path ,
2026-04-19 00:41:09 -07:00
plugin_name : str ,
2026-04-28 22:20:54 -07:00
instance_name : Optional [ str ] ,
2025-12-11 12:47:30 -08:00
pipe_obj : models . PipeObject ,
2025-12-29 18:42:02 -08:00
config : Dict [ str ,
Any ] ,
2025-12-11 12:47:30 -08:00
delete_after : bool ,
) - > int :
2026-04-19 00:41:09 -07:00
""" Handle uploading via an upload plugin (e.g. 0x0). """
2026-05-21 16:19:17 -07:00
from PluginCore . registry import (
2026-04-28 22:20:54 -07:00
get_plugin_with_capability ,
list_plugin_names_with_capability ,
list_plugins_with_capability ,
)
2025-11-25 20:09:33 -08:00
try :
2026-04-28 22:20:54 -07:00
file_provider = get_plugin_with_capability ( plugin_name , " upload " , config )
2025-12-11 12:47:30 -08:00
if not file_provider :
2026-04-28 22:20:54 -07:00
available_map = list_plugins_with_capability ( " upload " , config )
known_upload_plugins = set ( list_plugin_names_with_capability ( " upload " ) )
available_uploads = [ name for name , enabled in available_map . items ( ) if enabled and name in known_upload_plugins ]
if str ( plugin_name or " " ) . strip ( ) . lower ( ) in known_upload_plugins :
show_plugin_config_panel ( [ plugin_name ] )
else :
log ( f " Upload plugin ' { plugin_name } ' is not available or does not support upload " , file = sys . stderr )
if available_uploads :
show_available_plugins_panel ( sorted ( available_uploads ) )
2025-12-11 12:47:30 -08:00
return 1
2025-11-25 20:09:33 -08:00
2026-05-14 17:15:13 -07:00
upload_kwargs : Dict [ str , Any ] = {
" pipe_obj " : pipe_obj ,
" instance " : instance_name ,
}
2026-05-24 12:32:57 -07:00
pipeline_progress = PipelineProgress ( ctx )
2026-05-14 17:15:13 -07:00
normalized_plugin_name = Add_File . _normalize_provider_key ( plugin_name )
f_hash = Add_File . _resolve_file_hash ( None , media_path , pipe_obj , None )
if normalized_plugin_name == " local " :
result = None
tags , urls , title , f_hash = Add_File . _prepare_metadata ( result , media_path , pipe_obj , config )
relationships = Add_File . _get_relationships ( result , pipe_obj )
direct_export_download = False
try :
if isinstance ( pipe_obj . extra , dict ) :
direct_export_download = bool ( pipe_obj . extra . pop ( " _direct_export_download " , False ) )
except Exception :
direct_export_download = False
upload_kwargs . update (
{
" title " : title ,
" tags " : tags ,
" urls " : urls ,
" hash_value " : f_hash ,
" relationships " : relationships ,
" direct_export_download " : direct_export_download ,
2026-05-24 12:32:57 -07:00
" pipeline_progress " : pipeline_progress ,
2026-05-14 17:15:13 -07:00
}
)
upload_result = file_provider . upload (
2026-04-28 22:20:54 -07:00
str ( media_path ) ,
2026-05-14 17:15:13 -07:00
* * upload_kwargs ,
2026-04-28 22:20:54 -07:00
)
2026-05-04 18:41:01 -07:00
duplicate_upload = False
duplicate_rule = " "
duplicate_target = " "
try :
if isinstance ( getattr ( pipe_obj , " extra " , None ) , dict ) :
duplicate_upload = bool ( pipe_obj . extra . get ( " upload_duplicate " ) )
duplicate_rule = str ( pipe_obj . extra . get ( " upload_duplicate_rule " ) or " " ) . strip ( )
duplicate_target = str ( pipe_obj . extra . get ( " upload_duplicate_target " ) or " " ) . strip ( )
except Exception :
duplicate_upload = False
duplicate_rule = " "
duplicate_target = " "
2025-12-11 12:47:30 -08:00
except Exception as exc :
log ( f " Upload failed: { exc } " , file = sys . stderr )
return 1
2026-05-14 17:15:13 -07:00
if isinstance ( upload_result , dict ) :
return Add_File . _emit_plugin_upload_payload (
upload_result ,
plugin_name ,
instance_name ,
pipe_obj ,
media_path ,
delete_after ,
)
hoster_url = str ( upload_result or " " ) . strip ( )
2025-12-11 12:47:30 -08:00
# Update PipeObject and emit
2025-12-29 18:42:02 -08:00
extra_updates : Dict [ str ,
Any ] = {
2026-04-19 00:41:09 -07:00
" plugin " : plugin_name ,
2026-04-28 22:20:54 -07:00
" instance " : instance_name ,
2026-04-19 00:41:09 -07:00
" plugin_url " : hoster_url ,
2025-12-29 18:42:02 -08:00
}
2025-12-11 12:47:30 -08:00
if isinstance ( pipe_obj . extra , dict ) :
# Also track hoster URL as a url for downstream steps
existing_known = list ( pipe_obj . extra . get ( " url " ) or [ ] )
if hoster_url and hoster_url not in existing_known :
existing_known . append ( hoster_url )
extra_updates [ " url " ] = existing_known
file_path = pipe_obj . path or ( str ( media_path ) if media_path else None ) or " "
Add_File . _update_pipe_object_destination (
pipe_obj ,
2025-12-11 19:04:02 -08:00
hash_value = f_hash or " unknown " ,
2026-04-28 22:20:54 -07:00
store = " " ,
provider = plugin_name or None ,
2025-12-11 19:04:02 -08:00
path = file_path ,
2025-12-11 23:21:45 -08:00
tag = pipe_obj . tag ,
2025-12-11 12:47:30 -08:00
title = pipe_obj . title or ( media_path . name if media_path else None ) ,
extra_updates = extra_updates ,
2025-11-25 20:09:33 -08:00
)
2025-12-11 12:47:30 -08:00
Add_File . _emit_pipe_object ( pipe_obj )
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
Add_File . _cleanup_after_success ( media_path , delete_source = delete_after )
return 0
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_storage_backend (
2025-12-14 00:53:52 -08:00
result : Any ,
2025-12-11 12:47:30 -08:00
media_path : Path ,
backend_name : str ,
pipe_obj : models . PipeObject ,
2025-12-29 18:42:02 -08:00
config : Dict [ str ,
Any ] ,
2025-12-11 12:47:30 -08:00
delete_after : bool ,
2025-12-14 00:53:52 -08:00
* ,
2025-12-29 18:42:02 -08:00
collect_payloads : Optional [ List [ Dict [ str ,
Any ] ] ] = None ,
collect_relationship_pairs : Optional [ Dict [ str ,
set [ tuple [ str ,
str ] ] ] ] = None ,
2025-12-20 23:57:44 -08:00
defer_url_association : bool = False ,
2025-12-29 18:42:02 -08:00
pending_url_associations : Optional [ Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] ] = None ,
2026-01-19 03:14:30 -08:00
defer_tag_association : bool = False ,
pending_tag_associations : Optional [ Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] ] = None ,
2025-12-14 00:53:52 -08:00
suppress_last_stage_overlay : bool = False ,
2025-12-30 23:19:02 -08:00
auto_search_file : bool = True ,
2026-05-23 13:49:47 -07:00
store_instance : Optional [ BackendRegistry ] = None ,
2025-12-11 12:47:30 -08:00
) - > int :
""" Handle uploading to a registered storage backend (e.g., ' test ' folder store, ' hydrus ' , etc.). """
2025-12-17 03:16:41 -08:00
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
2026-05-23 13:49:47 -07:00
pipeline_progress = PipelineProgress ( ctx )
def _set_status ( text : str ) - > None :
try :
pipeline_progress . set_status ( f " { backend_name } : { text } " )
except Exception :
pass
def _clear_status ( ) - > None :
try :
pipeline_progress . clear_status ( )
except Exception :
pass
2025-12-14 00:53:52 -08:00
delete_after_effective = bool ( delete_after )
2026-01-12 04:05:52 -08:00
# ... (lines omitted for brevity but I need to keep them contextually correct)
2025-12-14 00:53:52 -08:00
if not delete_after_effective :
# When download-media is piped into add-file, the downloaded artifact is a temp file.
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
try :
2025-12-29 18:42:02 -08:00
if ( str ( backend_name or " " ) . strip ( ) . lower ( ) != " temp "
and getattr ( pipe_obj ,
" is_temp " ,
False )
and getattr ( pipe_obj ,
" action " ,
None ) == " cmdlet:download-media " ) :
from SYS . config import resolve_output_dir
2025-12-29 17:05:03 -08:00
2025-12-14 00:53:52 -08:00
temp_dir = resolve_output_dir ( config )
try :
2025-12-29 18:42:02 -08:00
if media_path . resolve ( ) . is_relative_to (
temp_dir . expanduser ( ) . resolve ( ) ) :
2025-12-14 00:53:52 -08:00
delete_after_effective = True
2025-12-29 18:42:02 -08:00
debug (
f " [add-file] Auto-delete temp source after ingest: { media_path } "
)
2025-12-14 00:53:52 -08:00
except Exception :
# If path resolution fails, fall back to non-destructive behavior
pass
except Exception :
pass
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
try :
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry ( config )
backend , backend_registry , backend_exc = sh . get_preferred_store_backend (
2026-05-04 18:41:01 -07:00
config ,
backend_name ,
2026-05-23 13:49:47 -07:00
store_registry = backend_registry ,
2026-05-04 18:41:01 -07:00
suppress_debug = True ,
)
if backend is None :
raise backend_exc or KeyError ( f " Unknown store backend: { backend_name } " )
2025-12-29 17:05:03 -08:00
2026-01-31 23:41:47 -08:00
# Use backend properties to drive metadata deferral behavior.
is_remote_backend = getattr ( backend , " is_remote " , False )
prefer_defer_tags = getattr ( backend , " prefer_defer_tags " , False )
2026-05-04 18:41:01 -07:00
supports_url_association = bool ( getattr ( backend , " supports_url_association " , False ) )
supports_note_association = bool ( getattr ( backend , " supports_note_association " , False ) )
supports_relationship_association = bool ( getattr ( backend , " supports_relationship_association " , False ) )
2026-01-15 03:20:52 -08:00
2026-01-31 23:41:47 -08:00
# ...
2025-12-11 12:47:30 -08:00
# Prepare metadata from pipe_obj and sidecars
2025-12-29 17:05:03 -08:00
tags , url , title , f_hash = Add_File . _prepare_metadata (
result , media_path , pipe_obj , config
)
2025-12-16 01:45:01 -08:00
2025-12-20 02:12:45 -08:00
# If we're moving/copying from one store to another, also copy the source store's
# existing associated URLs so they aren't lost.
try :
2025-12-29 23:28:15 -08:00
from SYS . metadata import normalize_urls
2025-12-20 02:12:45 -08:00
source_store = None
source_hash = None
if isinstance ( result , dict ) :
source_store = result . get ( " store " )
source_hash = result . get ( " hash " )
if not source_store :
source_store = getattr ( pipe_obj , " store " , None )
if not source_hash :
source_hash = getattr ( pipe_obj , " hash " , None )
if ( not source_hash ) and isinstance ( pipe_obj . extra , dict ) :
source_hash = pipe_obj . extra . get ( " hash " )
source_store = str ( source_store or " " ) . strip ( )
source_hash = str ( source_hash or " " ) . strip ( ) . lower ( )
2025-12-29 18:42:02 -08:00
if ( source_store and source_hash and len ( source_hash ) == 64
and source_store . lower ( ) != str ( backend_name or " "
) . strip ( ) . lower ( ) ) :
2025-12-20 02:12:45 -08:00
source_backend = None
try :
if source_store in store . list_backends ( ) :
source_backend = store [ source_store ]
except Exception :
source_backend = None
if source_backend is not None :
try :
2025-12-29 18:42:02 -08:00
src_urls = normalize_urls (
source_backend . get_url ( source_hash ) or [ ]
)
2025-12-20 02:12:45 -08:00
except Exception :
src_urls = [ ]
try :
dst_urls = normalize_urls ( url or [ ] )
except Exception :
dst_urls = [ ]
merged : list [ str ] = [ ]
seen : set [ str ] = set ( )
for u in list ( dst_urls or [ ] ) + list ( src_urls or [ ] ) :
if not u :
continue
if u in seen :
continue
seen . add ( u )
merged . append ( u )
url = merged
except Exception :
pass
2025-12-16 01:45:01 -08:00
# Collect relationship pairs for post-ingest DB/API persistence.
2026-05-04 18:41:01 -07:00
if collect_relationship_pairs is not None and supports_relationship_association :
2025-12-16 01:45:01 -08:00
rels = Add_File . _get_relationships ( result , pipe_obj )
if isinstance ( rels , dict ) and rels :
king_hash , alt_hashes = Add_File . _parse_relationships_king_alts ( rels )
if king_hash and alt_hashes :
2025-12-29 18:42:02 -08:00
bucket = collect_relationship_pairs . setdefault (
str ( backend_name ) ,
set ( )
)
2025-12-16 01:45:01 -08:00
for alt_hash in alt_hashes :
if alt_hash and alt_hash != king_hash :
bucket . add ( ( alt_hash , king_hash ) )
# Relationships must never be stored as tags.
if isinstance ( tags , list ) and tags :
2025-12-29 17:05:03 -08:00
tags = [
2025-12-29 18:42:02 -08:00
t for t in tags if not (
isinstance ( t , str )
and t . strip ( ) . lower ( ) . startswith ( " relationship: " )
)
2025-12-29 17:05:03 -08:00
]
2025-12-30 23:19:02 -08:00
# Auto-tag (best-effort) BEFORE uploading so tags land with the stored file.
try :
tags = _maybe_apply_florencevision_tags ( media_path , list ( tags or [ ] ) , config , pipe_obj = pipe_obj )
pipe_obj . tag = list ( tags or [ ] )
except Exception as exc :
# strict mode raises from helper; treat here as a hard failure
log ( f " [add-file] FlorenceVision tagging error: { exc } " , file = sys . stderr )
return 1
2026-01-04 02:23:50 -08:00
upload_tags = tags
2026-01-31 23:41:47 -08:00
if prefer_defer_tags and upload_tags :
2026-01-04 02:23:50 -08:00
upload_tags = [ ]
2026-04-16 17:18:50 -07:00
try :
debug_panel (
" add-file store " ,
[
( " backend " , backend_name ) ,
( " path " , media_path ) ,
( " title " , title ) ,
( " hash_hint " , f_hash [ : 12 ] if f_hash else " N/A " ) ,
( " defer_tags " , bool ( prefer_defer_tags and tags ) ) ,
] ,
border_style = " yellow " ,
)
except Exception :
pass
2026-01-02 02:28:59 -08:00
2026-05-04 18:41:01 -07:00
duplicate_hash = Add_File . _find_existing_hash_by_urls ( backend , url )
if duplicate_hash :
debug (
f " [add-file] URL duplicate detected in ' { backend_name } ' , skipping upload and reusing hash { duplicate_hash [ : 12 ] } ... "
)
file_identifier = duplicate_hash
else :
# Call backend's add_file with full metadata.
# Backend returns hash as identifier. If we already know the hash from _resolve_source
# (which came from download-file emit), pass it to skip re-hashing large files.
file_identifier = backend . add_file (
media_path ,
title = title ,
tag = upload_tags ,
url = [ ] if ( ( defer_url_association and url ) or ( not supports_url_association ) ) else url ,
file_hash = f_hash ,
2026-05-23 13:49:47 -07:00
pipeline_progress = pipeline_progress ,
transfer_label = title or media_path . name ,
2026-05-04 18:41:01 -07:00
)
2025-12-17 03:16:41 -08:00
##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
stored_path : Optional [ str ] = None
2026-01-31 23:41:47 -08:00
# IMPORTANT: avoid calling get_file() for remote backends by default to avoid
# unintended network activity or credential exposure in result payloads.
2025-12-11 19:04:02 -08:00
try :
2026-01-31 23:41:47 -08:00
if not is_remote_backend :
# For local backends, resolving the path is cheap and useful.
2025-12-16 01:45:01 -08:00
maybe_path = backend . get_file ( file_identifier )
if isinstance ( maybe_path , Path ) :
stored_path = str ( maybe_path )
elif isinstance ( maybe_path , str ) and maybe_path :
stored_path = maybe_path
2025-12-11 19:04:02 -08:00
except Exception :
stored_path = None
2025-12-29 17:05:03 -08:00
2026-01-18 13:10:31 -08:00
# Compute canonical hash value for downstream use (defensive against non-string returns).
if isinstance ( file_identifier , str ) and len ( file_identifier ) == 64 :
chosen_hash = file_identifier
else :
chosen_hash = f_hash or ( str ( file_identifier ) if file_identifier is not None else " unknown " )
2025-12-11 12:47:30 -08:00
Add_File . _update_pipe_object_destination (
pipe_obj ,
2026-01-18 13:10:31 -08:00
hash_value = chosen_hash ,
2025-12-11 12:47:30 -08:00
store = backend_name ,
2025-12-11 19:04:02 -08:00
path = stored_path ,
2025-12-11 23:21:45 -08:00
tag = tags ,
2025-12-11 12:47:30 -08:00
title = title or pipe_obj . title or media_path . name ,
extra_updates = {
" url " : url ,
} ,
)
2025-12-13 12:09:50 -08:00
2025-12-30 23:19:02 -08:00
# Emit a search-file-like payload for consistent tables and natural piping.
2026-05-24 12:32:57 -07:00
# Keep hash/store for downstream commands (get-tag, download-file, etc.).
2026-01-18 13:10:31 -08:00
resolved_hash = chosen_hash
2025-12-13 12:09:50 -08:00
2026-01-31 23:41:47 -08:00
if prefer_defer_tags and tags :
2026-01-19 03:14:30 -08:00
# Support deferring tag application for batching bulk operations
if defer_tag_association and pending_tag_associations is not None :
try :
pending_tag_associations . setdefault ( str ( backend_name ) , [ ] ) . append ( ( str ( resolved_hash ) , list ( tags ) ) )
except Exception :
pass
else :
try :
adder = getattr ( backend , " add_tag " , None )
if callable ( adder ) :
2026-05-23 13:49:47 -07:00
_set_status ( " applying deferred tags " )
2026-01-19 03:14:30 -08:00
adder ( resolved_hash , list ( tags ) )
except Exception as exc :
2026-01-31 23:41:47 -08:00
log ( f " [add-file] Post-upload tagging failed for { backend_name } : { exc } " , file = sys . stderr )
2026-01-04 02:23:50 -08:00
2025-12-14 00:53:52 -08:00
# If we have url(s), ensure they get associated with the destination file.
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
2026-05-04 18:41:01 -07:00
if url and supports_url_association :
2025-12-20 23:57:44 -08:00
if defer_url_association and pending_url_associations is not None :
try :
2025-12-29 18:42:02 -08:00
pending_url_associations . setdefault (
str ( backend_name ) ,
[ ]
) . append ( ( str ( resolved_hash ) ,
list ( url ) ) )
2025-12-20 23:57:44 -08:00
except Exception :
pass
else :
try :
2026-01-15 03:20:52 -08:00
# Folder.add_file already persists URLs, avoid extra DB traffic here.
if not is_folder_backend :
2026-05-23 13:49:47 -07:00
_set_status ( " associating urls " )
2026-01-15 03:20:52 -08:00
backend . add_url ( resolved_hash , list ( url ) )
2025-12-20 23:57:44 -08:00
except Exception :
pass
2025-12-14 00:53:52 -08:00
2025-12-16 23:23:43 -08:00
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
2026-05-23 13:49:47 -07:00
def _write_note ( note_name : str , note_text : Optional [ str ] ) - > None :
if not note_text or not supports_note_association :
return
2025-12-16 23:23:43 -08:00
try :
setter = getattr ( backend , " set_note " , None )
if callable ( setter ) :
2026-05-23 13:49:47 -07:00
_set_status ( f " writing { note_name } note " )
setter ( resolved_hash , note_name , note_text )
2026-01-02 02:28:59 -08:00
except Exception as exc :
2026-04-19 00:41:09 -07:00
debug_panel (
" add-file note write failed " ,
[
( " store " , backend_name ) ,
( " hash " , resolved_hash ) ,
2026-05-23 13:49:47 -07:00
( " note " , note_name ) ,
2026-04-19 00:41:09 -07:00
( " error " , exc ) ,
] ,
border_style = " yellow " ,
)
2026-01-03 03:37:48 -08:00
2026-05-23 13:49:47 -07:00
_write_note ( " sub " , Add_File . _get_note_text ( result , pipe_obj , " sub " ) )
_write_note ( " lyric " , Add_File . _get_note_text ( result , pipe_obj , " lyric " ) )
_write_note ( " chapters " , Add_File . _get_note_text ( result , pipe_obj , " chapters " ) )
_write_note ( " caption " , Add_File . _get_note_text ( result , pipe_obj , " caption " ) )
2025-12-30 23:19:02 -08:00
2025-12-29 18:42:02 -08:00
meta : Dict [ str ,
Any ] = { }
2025-12-13 12:09:50 -08:00
try :
2026-01-15 03:20:52 -08:00
if not is_folder_backend :
2026-05-23 13:49:47 -07:00
_set_status ( " loading stored metadata " )
2026-01-15 03:20:52 -08:00
meta = backend . get_metadata ( resolved_hash ) or { }
2025-12-13 12:09:50 -08:00
except Exception :
meta = { }
# Determine size bytes
size_bytes : Optional [ int ] = None
for key in ( " size_bytes " , " size " , " filesize " , " file_size " ) :
try :
raw_size = meta . get ( key )
if raw_size is not None :
size_bytes = int ( raw_size )
break
except Exception :
pass
if size_bytes is None :
try :
size_bytes = int ( media_path . stat ( ) . st_size )
except Exception :
size_bytes = None
# Determine title/ext
title_out = (
2025-12-29 18:42:02 -08:00
meta . get ( " title " ) or title or pipe_obj . title or media_path . stem
or media_path . name
2025-12-13 12:09:50 -08:00
)
2025-12-29 17:05:03 -08:00
ext_out = meta . get ( " ext " ) or media_path . suffix . lstrip ( " . " )
2025-12-13 12:09:50 -08:00
2025-12-29 18:42:02 -08:00
payload : Dict [
str ,
Any
] = {
2025-12-13 12:09:50 -08:00
" title " : title_out ,
" ext " : str ( ext_out or " " ) ,
" size_bytes " : size_bytes ,
" store " : backend_name ,
" hash " : resolved_hash ,
# Preserve extra fields for downstream commands (kept hidden by default table rules)
" path " : stored_path ,
" tag " : list ( tags or [ ] ) ,
" url " : list ( url or [ ] ) ,
}
2025-12-14 00:53:52 -08:00
if collect_payloads is not None :
try :
collect_payloads . append ( payload )
except Exception :
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
2025-12-30 23:19:02 -08:00
# canonical search-file payload/table for piping/selection consistency.
if auto_search_file and resolved_hash and resolved_hash != " unknown " :
# Show the add-file summary (overlay only) but let search-file provide the downstream payload.
2025-12-29 17:05:03 -08:00
Add_File . _emit_storage_result (
2025-12-29 18:42:02 -08:00
payload ,
overlay = not suppress_last_stage_overlay ,
emit = False
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
2025-12-30 23:19:02 -08:00
refreshed_items = Add_File . _try_emit_search_file_by_hash (
2026-05-03 21:20:05 -07:00
instance = backend_name ,
2025-12-14 00:53:52 -08:00
hash_value = resolved_hash ,
config = config ,
)
2025-12-16 01:45:01 -08:00
if refreshed_items :
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items :
ctx . emit ( emitted )
else :
2025-12-14 00:53:52 -08:00
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx . emit ( payload )
else :
2025-12-29 17:05:03 -08:00
Add_File . _emit_storage_result (
2025-12-29 18:42:02 -08:00
payload ,
overlay = not suppress_last_stage_overlay ,
emit = True
2025-12-29 17:05:03 -08:00
)
2025-12-29 18:42:02 -08:00
Add_File . _cleanup_after_success (
media_path ,
delete_source = delete_after_effective
)
2026-05-23 13:49:47 -07:00
_clear_status ( )
2025-12-11 12:47:30 -08:00
return 0
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
except Exception as exc :
2026-05-23 13:49:47 -07:00
_clear_status ( )
2025-12-29 18:42:02 -08:00
log (
f " ❌ Failed to add file to backend ' { backend_name } ' : { exc } " ,
file = sys . stderr
)
2025-12-11 12:47:30 -08:00
import traceback
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
traceback . print_exc ( file = sys . stderr )
return 1
# --- Helpers ---
2025-12-20 23:57:44 -08:00
@staticmethod
2025-12-29 17:05:03 -08:00
def _apply_pending_url_associations (
2025-12-29 18:42:02 -08:00
pending : Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] ,
config : Dict [ str ,
2026-01-12 04:05:52 -08:00
Any ] ,
2026-05-23 13:49:47 -07:00
store_instance : Optional [ BackendRegistry ] = None ,
2025-12-29 17:05:03 -08:00
) - > None :
2025-12-20 23:57:44 -08:00
""" Apply deferred URL associations in bulk, grouped per backend. """
try :
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry ( config )
2025-12-20 23:57:44 -08:00
except Exception :
return
for backend_name , pairs in ( pending or { } ) . items ( ) :
if not pairs :
continue
try :
2026-05-23 13:49:47 -07:00
backend , backend_registry , _exc = sh . get_store_backend (
2026-03-25 22:39:30 -07:00
config ,
backend_name ,
2026-05-23 13:49:47 -07:00
store_registry = backend_registry ,
2026-03-25 22:39:30 -07:00
)
if backend is None :
continue
2026-05-04 18:41:01 -07:00
if not bool ( getattr ( backend , " supports_url_association " , False ) ) :
continue
2026-03-25 22:39:30 -07:00
items = sh . coalesce_hash_value_pairs ( pairs )
if not items :
continue
bulk = getattr ( backend , " add_url_bulk " , None )
if callable ( bulk ) :
try :
bulk ( items )
continue
except Exception :
pass
single = getattr ( backend , " add_url " , None )
if callable ( single ) :
for h , u in items :
try :
single ( h , u )
except Exception :
continue
2025-12-20 23:57:44 -08:00
except Exception :
continue
2026-01-19 03:14:30 -08:00
@staticmethod
def _apply_pending_tag_associations (
pending : Dict [ str ,
List [ tuple [ str ,
List [ str ] ] ] ] ,
config : Dict [ str ,
Any ] ,
2026-05-23 13:49:47 -07:00
store_instance : Optional [ BackendRegistry ] = None ,
2026-01-19 03:14:30 -08:00
) - > None :
""" Apply deferred tag associations in bulk, grouped per backend. """
try :
2026-05-23 13:49:47 -07:00
backend_registry = store_instance if store_instance is not None else BackendRegistry ( config )
2026-01-19 03:14:30 -08:00
except Exception :
return
2026-03-25 22:39:30 -07:00
sh . run_store_hash_value_batches (
config ,
pending or { } ,
bulk_method_name = " add_tags_bulk " ,
single_method_name = " add_tag " ,
2026-05-23 13:49:47 -07:00
store_registry = backend_registry ,
2026-03-25 22:39:30 -07:00
pass_config_to_bulk = False ,
pass_config_to_single = False ,
)
2026-01-19 03:14:30 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _load_sidecar_bundle (
media_path : Path ,
2026-05-03 21:20:05 -07:00
instance : Optional [ str ] ,
2025-12-29 18:42:02 -08:00
config : Dict [ str ,
Any ] ,
) - > Tuple [ Optional [ Path ] ,
Optional [ str ] ,
List [ str ] ,
List [ str ] ] :
2025-12-11 12:47:30 -08:00
""" Load sidecar metadata. """
return None , None , [ ] , [ ]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_file_hash (
result : Any ,
media_path : Path ,
pipe_obj : models . PipeObject ,
fallback_hash : Optional [ str ] ,
) - > Optional [ str ] :
if pipe_obj . hash and pipe_obj . hash != " unknown " :
return pipe_obj . hash
if fallback_hash :
return fallback_hash
if isinstance ( result , dict ) :
2025-12-29 17:05:03 -08:00
candidate = result . get ( " hash " )
2025-12-11 12:47:30 -08:00
if candidate :
return str ( candidate )
try :
return sha256_file ( media_path )
except Exception :
return None
@staticmethod
def _resolve_media_kind ( path : Path ) - > str :
2026-01-12 04:05:52 -08:00
return resolve_media_kind_by_extension ( path )
2025-12-11 12:47:30 -08:00
@staticmethod
def _persist_local_metadata (
library_root : Path ,
dest_path : Path ,
tags : List [ str ] ,
url : List [ str ] ,
f_hash : Optional [ str ] ,
relationships : Any ,
duration : Any ,
media_kind : str ,
) :
2026-01-22 03:12:16 -08:00
pass
2025-12-11 12:47:30 -08:00
@staticmethod
def _copy_sidecars ( source_path : Path , target_path : Path ) :
possible_sidecars = [
source_path . with_suffix ( source_path . suffix + " .json " ) ,
source_path . with_name ( source_path . name + " .tag " ) ,
source_path . with_name ( source_path . name + " .metadata " ) ,
source_path . with_name ( source_path . name + " .notes " ) ,
]
for sc in possible_sidecars :
try :
if sc . exists ( ) :
suffix_part = sc . name . replace ( source_path . name , " " , 1 )
dest_sidecar = target_path . parent / f " { target_path . name } { suffix_part } "
dest_sidecar . parent . mkdir ( parents = True , exist_ok = True )
shutil . copy2 ( str ( sc ) , dest_sidecar )
except Exception :
pass
@staticmethod
def _cleanup_after_success ( media_path : Path , delete_source : bool ) :
2026-01-18 13:10:31 -08:00
# Determine whether this is a temporary merge/tracking file which should be
# deleted even when delete_source is False.
2025-12-11 12:47:30 -08:00
is_temp_merge = " (merged) " in media_path . name or " .dlhx_ " in media_path . name
2026-01-18 13:10:31 -08:00
# If neither explicit delete was requested nor this looks like a temp-merge,
# avoid deleting the source file.
if not delete_source and not is_temp_merge :
return
# Attempt deletion (best-effort)
try :
media_path . unlink ( )
Add_File . _cleanup_sidecar_files ( media_path )
except Exception as exc :
log ( f " ⚠️ Could not delete file: { exc } " , file = sys . stderr )
2025-12-11 12:47:30 -08:00
@staticmethod
def _cleanup_sidecar_files ( media_path : Path ) :
targets = [
2025-12-29 17:05:03 -08:00
media_path . parent / ( media_path . name + " .metadata " ) ,
media_path . parent / ( media_path . name + " .notes " ) ,
media_path . parent / ( media_path . name + " .tag " ) ,
2025-12-11 12:47:30 -08:00
]
for target in targets :
try :
if target . exists ( ) :
target . unlink ( )
except Exception :
pass
# Create and register the cmdlet
CMDLET = Add_File ( )