This commit is contained in:
2026-01-12 04:05:52 -08:00
parent 6076ea307b
commit 9981424397
11 changed files with 646 additions and 682 deletions

View File

@@ -28,6 +28,12 @@ extract_relationships = sh.extract_relationships
extract_duration = sh.extract_duration
coerce_to_pipe_object = sh.coerce_to_pipe_object
collapse_namespace_tag = sh.collapse_namespace_tag
resolve_target_dir = sh.resolve_target_dir
resolve_media_kind_by_extension = sh.resolve_media_kind_by_extension
coerce_to_path = sh.coerce_to_path
build_pipeline_preview = sh.build_pipeline_preview
get_field = sh.get_field
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
from SYS.metadata import write_metadata
@@ -198,6 +204,12 @@ class Add_File(Cmdlet):
parsed = parse_cmdlet_args(args, self)
progress = PipelineProgress(ctx)
# Initialize Store for backend resolution
try:
storage_registry = Store(config)
except Exception:
storage_registry = None
path_arg = parsed.get("path")
location = parsed.get("store")
provider_name = parsed.get("provider")
@@ -508,7 +520,7 @@ class Add_File(Cmdlet):
pipe_obj = coerce_to_pipe_object(item, path_arg)
try:
label = pipe_obj.title or pipe_obj.name
label = pipe_obj.title
if not label and pipe_obj.path:
try:
label = Path(str(pipe_obj.path)).name
@@ -527,7 +539,7 @@ class Add_File(Cmdlet):
delete_after_item = delete_after
try:
media_path, file_hash, temp_dir_to_cleanup = self._resolve_source(
item, path_arg, pipe_obj, config
item, path_arg, pipe_obj, config, store_instance=storage_registry
)
debug(
f"[add-file] RESOLVED source: path={media_path}, hash={file_hash[:12] if file_hash else 'N/A'}..."
@@ -575,7 +587,7 @@ class Add_File(Cmdlet):
if location:
try:
store = Store(config)
store = storage_registry or Store(config)
backends = store.list_backends()
if location in backends:
code = self._handle_storage_backend(
@@ -591,6 +603,7 @@ class Add_File(Cmdlet):
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_file,
auto_search_file=auto_search_file_after_add,
store_instance=storage_registry,
)
else:
code = self._handle_local_export(
@@ -638,7 +651,8 @@ class Add_File(Cmdlet):
try:
Add_File._apply_pending_url_associations(
pending_url_associations,
config
config,
store_instance=storage_registry
)
except Exception:
pass
@@ -660,6 +674,7 @@ class Add_File(Cmdlet):
store=str(location),
hash_values=hashes,
config=config,
store_instance=storage_registry,
)
if not refreshed_items:
# Fallback: at least show the add-file payloads as a display overlay
@@ -681,7 +696,8 @@ class Add_File(Cmdlet):
try:
Add_File._apply_pending_relationships(
pending_relationship_pairs,
config
config,
store_instance=storage_registry
)
except Exception:
pass
@@ -699,7 +715,8 @@ class Add_File(Cmdlet):
store: str,
hash_values: List[str],
config: Dict[str,
Any]
Any],
store_instance: Optional[Store] = None,
) -> Optional[List[Any]]:
"""Run search-file for a list of hashes and promote the table to a display overlay.
@@ -894,7 +911,8 @@ class Add_File(Cmdlet):
set[tuple[str,
str]]],
config: Dict[str,
Any]
Any],
store_instance: Optional[Store] = None,
) -> None:
"""Persist relationships to backends that support relationships.
@@ -904,7 +922,7 @@ class Add_File(Cmdlet):
return
try:
store = Store(config)
store = store_instance if store_instance is not None else Store(config)
except Exception:
return
@@ -976,6 +994,7 @@ class Add_File(Cmdlet):
pipe_obj: models.PipeObject,
config: Dict[str,
Any],
store_instance: Optional[Any] = None,
) -> Tuple[Optional[Path],
Optional[str],
Optional[Path]]:
@@ -983,162 +1002,79 @@ class Add_File(Cmdlet):
Returns (media_path, file_hash, temp_dir_to_cleanup).
"""
# PRIORITY 1a: Try hash+path from directory scan result (has 'path' and 'hash' keys)
# PRIORITY 1a: Try hash+path from directory scan result
if isinstance(result, dict):
result_path = result.get("path")
result_hash = result.get("hash")
# Check if this looks like a directory scan result (has path and hash but no 'store' key)
result_store = result.get("store")
if result_path and result_hash and not result_store:
r_path = result.get("path")
r_hash = result.get("hash")
r_store = result.get("store")
# If we have path+hash but no store, it's likely a dir scan result
if r_path and r_hash and not r_store:
try:
media_path = (
Path(result_path) if not isinstance(result_path,
Path) else result_path
)
if media_path.exists() and media_path.is_file():
debug(
f"[add-file] Using path+hash from directory scan: {media_path}"
)
pipe_obj.path = str(media_path)
return media_path, str(result_hash), None
except Exception as exc:
debug(f"[add-file] Failed to use directory scan result: {exc}")
p = coerce_to_path(r_path)
if p.exists() and p.is_file():
pipe_obj.path = str(p)
return p, str(r_hash), None
except Exception:
pass
# PRIORITY 1b: Try hash+store from result dict (most reliable for @N selections)
# PRIORITY 1b: Try hash+store from result dict (fetch from backend)
if isinstance(result, dict):
result_hash = result.get("hash")
result_store = result.get("store")
if result_hash and result_store:
debug(
f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}"
)
r_hash = result.get("hash")
r_store = result.get("store")
if r_hash and r_store:
try:
store = Store(config)
if result_store in store.list_backends():
backend = store[result_store]
media_path = backend.get_file(result_hash)
if isinstance(media_path, Path) and media_path.exists():
pipe_obj.path = str(media_path)
return media_path, str(result_hash), None
if isinstance(media_path, str) and media_path.strip():
downloaded, tmp_dir = Add_File._maybe_download_backend_file(
backend,
str(result_hash),
pipe_obj,
store = store_instance
if not store:
store = Store(config)
if r_store in store.list_backends():
backend = store[r_store]
# Try direct access (Path)
mp = backend.get_file(r_hash)
if isinstance(mp, Path) and mp.exists():
pipe_obj.path = str(mp)
return mp, str(r_hash), None
# Try download to temp
if isinstance(mp, str) and mp.strip():
dl_path, tmp_dir = Add_File._maybe_download_backend_file(
backend, str(r_hash), pipe_obj
)
if isinstance(downloaded, Path) and downloaded.exists():
pipe_obj.path = str(downloaded)
return downloaded, str(result_hash), tmp_dir
except Exception as exc:
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
if dl_path and dl_path.exists():
pipe_obj.path = str(dl_path)
return dl_path, str(r_hash), tmp_dir
except Exception:
pass
# PRIORITY 2: Try explicit path argument
# PRIORITY 2: Generic Coercion (Path arg > PipeObject > Result)
candidate: Optional[Path] = None
if path_arg:
media_path = Path(path_arg)
pipe_obj.path = str(media_path)
debug(f"[add-file] Using explicit path argument: {media_path}")
return media_path, None, None
candidate = Path(path_arg)
elif pipe_obj.path:
candidate = Path(pipe_obj.path)
if not candidate:
# Unwrap list if needed
obj = result[0] if isinstance(result, list) and result else result
if obj:
try:
candidate = coerce_to_path(obj)
except ValueError:
pass
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
pipe_path = getattr(pipe_obj, "path", None)
if pipe_path:
pipe_path_str = str(pipe_path)
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.lower().startswith(("http://",
"https://",
"magnet:",
"torrent:",
"tidal:",
"hydrus:")):
log(
"add-file ingests local files only. Use download-file first.",
file=sys.stderr,
)
return None, None, None
return Path(pipe_path_str), None, None
if candidate:
s = str(candidate).lower()
if s.startswith(("http://", "https://", "magnet:", "torrent:", "tidal:", "hydrus:")):
log("add-file ingests local files only. Use download-file first.", file=sys.stderr)
return None, None, None
pipe_obj.path = str(candidate)
# Retain hash from input if available to avoid re-hashing
hash_hint = get_field(result, "hash") or get_field(result, "file_hash") or getattr(pipe_obj, "hash", None)
return candidate, hash_hint, None
# Try from result (if it's a string path or URL)
if isinstance(result, str):
debug(f"Checking result string: {result}")
# Check if result is a URL before treating as file path
if result.lower().startswith(("http://",
"https://",
"magnet:",
"torrent:",
"tidal:",
"hydrus:")):
log(
"add-file ingests local files only. Use download-file first.",
file=sys.stderr,
)
return None, None, None
media_path = Path(result)
pipe_obj.path = str(media_path)
return media_path, None, None
# Try from result if it's a list (pipeline emits multiple results)
if isinstance(result, list) and result:
first_item = result[0]
# If the first item is a string, it's either a URL or a file path
if isinstance(first_item, str):
debug(f"Checking result list[0]: {first_item}")
if first_item.lower().startswith(("http://",
"https://",
"magnet:",
"torrent:",
"tidal:",
"hydrus:")):
log(
"add-file ingests local files only. Use download-file first.",
file=sys.stderr,
)
return None, None, None
media_path = Path(first_item)
pipe_obj.path = str(media_path)
return media_path, None, None
# If the first item is a dict, interpret it as a PipeObject-style result
if isinstance(first_item, dict):
# Look for path or path-like keys
path_candidate = (
first_item.get("path") or first_item.get("filepath")
or first_item.get("file")
)
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
paths_val = first_item.get("paths")
if not path_candidate and isinstance(paths_val,
(list,
tuple)) and paths_val:
path_candidate = paths_val[0]
if path_candidate:
debug(f"Resolved path from result dict: {path_candidate}")
try:
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, first_item.get("hash"), None
except Exception:
return None, first_item.get("hash"), None
# If first item is a PipeObject object
try:
# models.PipeObject is an actual class; check attribute presence
from SYS import models as _models
if isinstance(first_item, _models.PipeObject):
path_candidate = getattr(first_item, "path", None)
if path_candidate:
debug(f"Resolved path from PipeObject: {path_candidate}")
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, getattr(first_item, "hash", None), None
except Exception:
pass
debug(
f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}"
)
debug(f"No resolution path matched. result type={type(result).__name__}")
log("File path could not be resolved")
return None, None, None
@@ -1207,18 +1143,6 @@ class Add_File(Cmdlet):
if media_path is None:
return False
target_str = str(media_path)
# add-file does not accept URL inputs.
if target_str.lower().startswith(("http://",
"https://",
"magnet:",
"torrent:",
"tidal:",
"hydrus:")):
log("add-file ingests local files only.", file=sys.stderr)
return False
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
@@ -1232,34 +1156,54 @@ class Add_File(Cmdlet):
return True
@staticmethod
def _is_probable_url(s: Any) -> bool:
"""Check if a string looks like a URL/magnet/identifier (vs a tag/title)."""
if not isinstance(s, str):
return False
val = s.strip().lower()
if not val:
return False
# Obvious schemes
if val.startswith(("http://", "https://", "magnet:", "torrent:", "tidal:", "hydrus:")):
return True
# Domain-like patterns or local file paths (but we want URLs here)
if "://" in val:
return True
# Hydrus hash-like search queries are NOT urls
if val.startswith("hash:"):
return False
return False
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
"""Extract valid URLs from pipe object or result dict."""
from SYS.metadata import normalize_urls
# Prefer explicit PipeObject.url if present
urls: List[str] = []
try:
urls = normalize_urls(getattr(pipe_obj, "url", None))
except Exception:
urls = []
candidates: List[str] = []
# 1. Prefer explicit PipeObject top-level field
if pipe_obj.url:
candidates.append(pipe_obj.url)
if pipe_obj.source_url:
candidates.append(pipe_obj.source_url)
# Then check extra.url
if not urls:
try:
if isinstance(pipe_obj.extra, dict):
urls = normalize_urls(pipe_obj.extra.get("url"))
except Exception:
pass
# 2. Check extra and metadata fields
if isinstance(pipe_obj.extra, dict):
u = pipe_obj.extra.get("url")
if isinstance(u, list):
candidates.extend(str(x) for x in u if x)
elif isinstance(u, str):
candidates.append(u)
# Then check result dict
if not urls and isinstance(result, dict):
urls = normalize_urls(result.get("url"))
# 3. Check result (which might be a dict or another PipeObject)
raw_from_result = extract_url_from_result(result)
if raw_from_result:
candidates.extend(raw_from_result)
# Finally, try extractor helper
if not urls:
urls = normalize_urls(extract_url_from_result(result))
return urls
# 4. Normalize and filter: MUST look like a URL to avoid tag leakage
normalized = normalize_urls(candidates)
return [u for u in normalized if Add_File._is_probable_url(u)]
@staticmethod
def _get_relationships(result: Any,
@@ -1588,6 +1532,8 @@ class Add_File(Cmdlet):
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
# Final safety filter: ensures no tags/titles leaked into URL list
merged_url = [u for u in merged_url if Add_File._is_probable_url(u)]
file_hash = Add_File._resolve_file_hash(
result,
@@ -1645,7 +1591,8 @@ class Add_File(Cmdlet):
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.setdefault("url", merged_url)
# Update (don't setdefault) to ensure URLs matched from sidecars or source stores are tracked
pipe_obj.extra["url"] = merged_url
return merged_tags, merged_url, preferred_title, file_hash
@staticmethod
@@ -1830,11 +1777,13 @@ class Add_File(Cmdlet):
List[str]]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_file: bool = True,
store_instance: Optional[Store] = None,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
delete_after_effective = bool(delete_after)
# ... (lines omitted for brevity but I need to keep them contextually correct)
if not delete_after_effective:
# When download-media is piped into add-file, the downloaded artifact is a temp file.
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
@@ -1863,7 +1812,7 @@ class Add_File(Cmdlet):
pass
try:
store = Store(config)
store = store_instance if store_instance is not None else Store(config)
backend = store[backend_name]
hydrus_like_backend = False
@@ -2202,12 +2151,13 @@ class Add_File(Cmdlet):
List[tuple[str,
List[str]]]],
config: Dict[str,
Any]
Any],
store_instance: Optional[Store] = None,
) -> None:
"""Apply deferred URL associations in bulk, grouped per backend."""
try:
store = Store(config)
store = store_instance if store_instance is not None else Store(config)
except Exception:
return
@@ -2329,51 +2279,7 @@ class Add_File(Cmdlet):
@staticmethod
def _resolve_media_kind(path: Path) -> str:
# Reusing logic
suffix = path.suffix.lower()
if suffix in {".mp3",
".flac",
".wav",
".m4a",
".aac",
".ogg",
".opus",
".wma",
".mka"}:
return "audio"
if suffix in {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
}:
return "video"
if suffix in {".jpg",
".jpeg",
".png",
".gif",
".webp",
".bmp",
".tiff"}:
return "image"
if suffix in {".pdf",
".epub",
".txt",
".mobi",
".azw3",
".cbz",
".cbr",
".doc",
".docx"}:
return "document"
return "other"
return resolve_media_kind_by_extension(path)
@staticmethod
def _persist_local_metadata(