This commit is contained in:
2025-12-30 23:19:02 -08:00
parent a97657a757
commit 3bbaa28fb4
17 changed files with 1735 additions and 558 deletions

View File

@@ -38,6 +38,95 @@ from SYS.metadata import write_metadata
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
def _maybe_apply_florencevision_tags(
media_path: Path,
tags: List[str],
config: Dict[str, Any],
pipe_obj: Optional[models.PipeObject] = None,
) -> List[str]:
"""Optionally auto-tag images using the FlorenceVision tool.
Controlled via config:
[tool=florencevision]
enabled=true
strict=false
If strict=false (default), failures log a warning and return the original tags.
If strict=true, failures raise to abort the ingest.
"""
try:
tool_block = (config or {}).get("tool")
fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None
enabled = False
strict = False
if isinstance(fv_block, dict):
enabled = bool(fv_block.get("enabled"))
strict = bool(fv_block.get("strict"))
if not enabled:
return tags
from tool.florencevision import FlorenceVisionTool
# Special-case: if this file was produced by the `screen-shot` cmdlet,
# OCR is more useful than caption/detection for tagging screenshots.
cfg_for_tool: Dict[str, Any] = config
try:
action = str(getattr(pipe_obj, "action", "") or "") if pipe_obj is not None else ""
cmdlet_name = ""
if action.lower().startswith("cmdlet:"):
cmdlet_name = action.split(":", 1)[1].strip().lower()
if cmdlet_name in {"screen-shot", "screen_shot", "screenshot"}:
tool_block2 = dict((config or {}).get("tool") or {})
fv_block2 = dict(tool_block2.get("florencevision") or {})
fv_block2["task"] = "ocr"
tool_block2["florencevision"] = fv_block2
cfg_for_tool = dict(config or {})
cfg_for_tool["tool"] = tool_block2
except Exception:
cfg_for_tool = config
fv = FlorenceVisionTool(cfg_for_tool)
if not fv.enabled() or not fv.applicable_path(media_path):
return tags
auto_tags = fv.tags_for_file(media_path)
# Capture caption (if any) into PipeObject notes for downstream persistence.
try:
caption_text = getattr(fv, "last_caption", None)
if caption_text and pipe_obj is not None:
if not isinstance(pipe_obj.extra, dict):
pipe_obj.extra = {}
notes = pipe_obj.extra.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("caption", caption_text)
pipe_obj.extra["notes"] = notes
except Exception:
pass
if not auto_tags:
return tags
merged = merge_sequences(tags or [], auto_tags, case_sensitive=False)
debug(f"[add-file] FlorenceVision added {len(auto_tags)} tag(s)")
return merged
except Exception as exc:
# Decide strictness from config if we couldn't read it above.
strict2 = False
try:
tool_block = (config or {}).get("tool")
fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None
strict2 = bool(fv_block.get("strict")) if isinstance(fv_block, dict) else False
except Exception:
strict2 = False
if strict or strict2:
raise
log(f"[add-file] Warning: FlorenceVision tagging failed: {exc}", file=sys.stderr)
return tags
class Add_File(Cmdlet):
"""Add file into the DB"""
@@ -349,14 +438,14 @@ class Add_File(Cmdlet):
successes = 0
failures = 0
# When add-file -store is the last stage, always show a final search-store table.
# When add-file -store is the last stage, always show a final search-file table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = (
want_final_search_file = (
bool(is_last_stage) and bool(is_storage_backend_location)
and bool(location)
)
auto_search_store_after_add = False
auto_search_file_after_add = False
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
@@ -879,9 +968,9 @@ class Add_File(Cmdlet):
pending_url_associations=
pending_url_associations,
suppress_last_stage_overlay=
want_final_search_store,
auto_search_store=
auto_search_store_after_add,
want_final_search_file,
auto_search_file=
auto_search_file_after_add,
)
else:
code = self._handle_local_export(
@@ -1005,8 +1094,8 @@ class Add_File(Cmdlet):
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
suppress_last_stage_overlay=want_final_search_file,
auto_search_file=auto_search_file_after_add,
)
else:
code = self._handle_local_export(
@@ -1053,7 +1142,7 @@ class Add_File(Cmdlet):
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
if want_final_search_file and collected_payloads:
try:
hashes: List[str] = []
for payload in collected_payloads:
@@ -1064,7 +1153,7 @@ class Add_File(Cmdlet):
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
refreshed_items = Add_File._try_emit_search_file_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
@@ -1102,29 +1191,29 @@ class Add_File(Cmdlet):
return 1
@staticmethod
def _try_emit_search_store_by_hashes(
def _try_emit_search_file_by_hashes(
*,
store: str,
hash_values: List[str],
config: Dict[str,
Any]
) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
"""Run search-file for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
Returns the emitted search-file payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
from cmdlet.search_file import CMDLET as search_file_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
debug(f'[add-file] Refresh: search-store -store {store} "{query}"')
debug(f'[add-file] Refresh: search-file -store {store} "{query}"')
# Run search-store under a temporary stage context so its ctx.emit() calls
# Run search-file under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
@@ -1137,7 +1226,7 @@ class Add_File(Cmdlet):
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
code = search_file_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
@@ -1145,7 +1234,7 @@ class Add_File(Cmdlet):
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# Promote the search-file result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
@@ -1171,7 +1260,7 @@ class Add_File(Cmdlet):
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
)
return None
@@ -2109,7 +2198,7 @@ class Add_File(Cmdlet):
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
- If this is the last stage (or not in a pipeline), prints a search-store-like table
- If this is the last stage (or not in a pipeline), prints a search-file-like table
and sets an overlay table/items for @N selection.
"""
# Emit for downstream commands (no-op if not in a pipeline)
@@ -2139,28 +2228,28 @@ class Add_File(Cmdlet):
pass
@staticmethod
def _try_emit_search_store_by_hash(
def _try_emit_search_file_by_hash(
*,
store: str,
hash_value: str,
config: Dict[str,
Any]
) -> Optional[List[Any]]:
"""Run search-store for a single hash so the final table/payload is consistent.
"""Run search-file for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
prints tables for it when a display overlay exists. After running search-store,
prints tables for it when a display overlay exists. After running search-file,
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
Returns the emitted search-store payload items on success, else None.
Returns the emitted search-file payload items on success, else None.
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
from cmdlet.search_file import CMDLET as search_file_cmdlet
args = ["-store", str(store), f"hash:{str(hash_value)}"]
# Run search-store under a temporary stage context so its ctx.emit() calls
# Run search-file under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
@@ -2173,14 +2262,14 @@ class Add_File(Cmdlet):
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
code = search_file_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# Promote the search-file result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
@@ -2206,7 +2295,7 @@ class Add_File(Cmdlet):
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
)
return None
@@ -3097,7 +3186,7 @@ class Add_File(Cmdlet):
List[tuple[str,
List[str]]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
auto_search_file: bool = True,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
@@ -3217,6 +3306,15 @@ class Add_File(Cmdlet):
)
]
# Auto-tag (best-effort) BEFORE uploading so tags land with the stored file.
try:
tags = _maybe_apply_florencevision_tags(media_path, list(tags or []), config, pipe_obj=pipe_obj)
pipe_obj.tag = list(tags or [])
except Exception as exc:
# strict mode raises from helper; treat here as a hard failure
log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr)
return 1
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
@@ -3254,7 +3352,7 @@ class Add_File(Cmdlet):
},
)
# Emit a search-store-like payload for consistent tables and natural piping.
# Emit a search-file-like payload for consistent tables and natural piping.
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
resolved_hash = (
file_identifier if len(file_identifier) == 64 else
@@ -3299,6 +3397,15 @@ class Add_File(Cmdlet):
except Exception:
pass
caption_note = Add_File._get_note_text(result, pipe_obj, "caption")
if caption_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "caption", caption_note)
except Exception:
pass
meta: Dict[str,
Any] = {}
try:
@@ -3350,16 +3457,16 @@ class Add_File(Cmdlet):
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
# canonical search-store payload/table for piping/selection consistency.
if auto_search_store and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
# canonical search-file payload/table for piping/selection consistency.
if auto_search_file and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-file provide the downstream payload.
Add_File._emit_storage_result(
payload,
overlay=not suppress_last_stage_overlay,
emit=False
)
refreshed_items = Add_File._try_emit_search_store_by_hash(
refreshed_items = Add_File._try_emit_search_file_by_hash(
store=backend_name,
hash_value=resolved_hash,
config=config,