dfdfdf
This commit is contained in:
93
cmdlet/__init__.py
Normal file
93
cmdlet/__init__.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, Iterable, Sequence
|
||||
from importlib import import_module as _import_module
|
||||
|
||||
# A cmdlet is a callable taking (result, args, config) -> int
|
||||
Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
|
||||
|
||||
# Registry of command-name -> cmdlet function
|
||||
REGISTRY: Dict[str, Cmdlet] = {}
|
||||
|
||||
|
||||
def _normalize_cmd_name(name: str) -> str:
|
||||
return str(name or "").replace('_', '-').lower().strip()
|
||||
|
||||
|
||||
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
|
||||
"""Register a callable under one or more command names.
|
||||
|
||||
This is the single registration mechanism used by both:
|
||||
- legacy function cmdlet (decorator form)
|
||||
- class-based cmdlet (Cmdlet.register())
|
||||
"""
|
||||
for name in names:
|
||||
key = _normalize_cmd_name(name)
|
||||
if key:
|
||||
REGISTRY[key] = fn
|
||||
return fn
|
||||
|
||||
|
||||
def register(names: Iterable[str]):
|
||||
"""Decorator to register a function under one or more command names.
|
||||
|
||||
Usage:
|
||||
@register(["add-tags"])
|
||||
def _run(result, args, config) -> int: ...
|
||||
"""
|
||||
def _wrap(fn: Cmdlet) -> Cmdlet:
|
||||
return register_callable(names, fn)
|
||||
return _wrap
|
||||
|
||||
|
||||
def get(cmd_name: str) -> Cmdlet | None:
|
||||
return REGISTRY.get(_normalize_cmd_name(cmd_name))
|
||||
|
||||
|
||||
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
|
||||
# cmdlet self-register when instantiated via their __init__ method
|
||||
import os
|
||||
cmdlet_dir = os.path.dirname(__file__)
|
||||
for filename in os.listdir(cmdlet_dir):
|
||||
if not (
|
||||
filename.endswith(".py")
|
||||
and not filename.startswith("_")
|
||||
and filename != "__init__.py"
|
||||
):
|
||||
continue
|
||||
|
||||
mod_name = filename[:-3]
|
||||
|
||||
# Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file)
|
||||
# Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe)
|
||||
if "_" not in mod_name:
|
||||
continue
|
||||
|
||||
try:
|
||||
_import_module(f".{mod_name}", __name__)
|
||||
except Exception as e:
|
||||
import sys
|
||||
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Import and register native commands that are not considered cmdlet
|
||||
try:
|
||||
from cmdnat import register_native_commands as _register_native_commands
|
||||
_register_native_commands(REGISTRY)
|
||||
except Exception:
|
||||
# Native commands are optional; ignore if unavailable
|
||||
pass
|
||||
|
||||
# Import root-level modules that also register cmdlet
|
||||
for _root_mod in ("select_cmdlet",):
|
||||
try:
|
||||
_import_module(_root_mod)
|
||||
except Exception:
|
||||
# Allow missing optional modules
|
||||
continue
|
||||
|
||||
# Also import helper modules that register cmdlet
|
||||
try:
|
||||
import API.alldebrid as _alldebrid
|
||||
except Exception:
|
||||
pass
|
||||
1708
cmdlet/_shared.py
Normal file
1708
cmdlet/_shared.py
Normal file
File diff suppressed because it is too large
Load Diff
955
cmdlet/add_file.py
Normal file
955
cmdlet/add_file.py
Normal file
@@ -0,0 +1,955 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from SYS.logger import log, debug
|
||||
from Store import Store
|
||||
from ._shared import (
|
||||
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
|
||||
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
|
||||
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
|
||||
)
|
||||
from ._shared import collapse_namespace_tag
|
||||
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
|
||||
from SYS.utils import sha256_file, unique_path
|
||||
from metadata import write_metadata
|
||||
|
||||
# Use official Hydrus supported filetypes from hydrus_wrapper
|
||||
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
|
||||
|
||||
class Add_File(Cmdlet):
|
||||
"""Add file into the DB"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize add-file cmdlet."""
|
||||
super().__init__(
|
||||
name="add-file",
|
||||
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
|
||||
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
|
||||
arg=[
|
||||
SharedArgs.PATH,
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
|
||||
CmdletArg(name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del"),
|
||||
],
|
||||
detail=[
|
||||
"- Storage location options (use -storage):",
|
||||
" hydrus: Upload to Hydrus database with metadata tagging",
|
||||
" local: Copy file to local directory",
|
||||
" <path>: Copy file to specified directory",
|
||||
"- File provider options (use -provider):",
|
||||
" 0x0: Upload to 0x0.st for temporary hosting",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main execution entry point."""
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Initialize state
|
||||
path_arg = parsed.get("path")
|
||||
location = parsed.get("store") # Fixed: was "storage", should be "store"
|
||||
provider_name = parsed.get("provider")
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
# Coerce result to PipeObject; if result is a list, prefer the first element
|
||||
effective_result = result
|
||||
if isinstance(result, list) and result:
|
||||
first_item = result[0]
|
||||
# Prefer first item if it's a dict or PipeObject
|
||||
if isinstance(first_item, (dict, )):
|
||||
effective_result = first_item
|
||||
pipe_obj = coerce_to_pipe_object(effective_result, path_arg)
|
||||
|
||||
# Debug: Log input result details
|
||||
debug(f"[add-file] INPUT result type={type(result).__name__}")
|
||||
if isinstance(result, list):
|
||||
debug(f"[add-file] INPUT result is list with {len(result)} items")
|
||||
if result and isinstance(result[0], dict):
|
||||
first = result[0]
|
||||
hash_val = first.get('hash')
|
||||
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
|
||||
debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}")
|
||||
elif isinstance(result, dict):
|
||||
hash_val = result.get('hash')
|
||||
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
|
||||
debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}")
|
||||
|
||||
# Debug: Log parsed arguments
|
||||
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
|
||||
|
||||
# Resolve source - returns (media_path_or_url, file_hash)
|
||||
media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config)
|
||||
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
|
||||
if not media_path_or_url:
|
||||
debug(f"[add-file] ERROR: Could not resolve source file/URL")
|
||||
return 1
|
||||
|
||||
# Update pipe_obj with resolved path
|
||||
pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url)
|
||||
|
||||
# Check if it's a URL before validating as file
|
||||
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
debug(f"Detected URL target, delegating to download-data: {media_path_or_url}")
|
||||
return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config)
|
||||
|
||||
# Convert to Path and validate
|
||||
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
|
||||
|
||||
# Validate source
|
||||
if not self._validate_source(media_path):
|
||||
debug(f"[add-file] ERROR: Source validation failed for {media_path}")
|
||||
return 1
|
||||
|
||||
# Debug: Log execution path decision
|
||||
debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}")
|
||||
debug(f" media_path={media_path}, exists={media_path.exists()}")
|
||||
|
||||
# Execute transfer based on destination (using Store registry)
|
||||
if provider_name:
|
||||
debug(f"[add-file] ROUTE: file provider upload")
|
||||
return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after)
|
||||
elif location:
|
||||
# Check if location is a registered backend name
|
||||
try:
|
||||
store = Store(config)
|
||||
backends = store.list_backends()
|
||||
|
||||
if location in backends:
|
||||
debug(f"[add-file] ROUTE: storage backend '{location}'")
|
||||
return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after)
|
||||
else:
|
||||
# Treat as local export path
|
||||
debug(f"[add-file] ROUTE: local export to path '{location}'")
|
||||
return self._handle_local_export(media_path, location, pipe_obj, config, delete_after)
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
|
||||
log(f"Invalid location: {location}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
debug(f"[add-file] ERROR: No location or provider specified")
|
||||
log(f"No storage location or provider specified", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def _resolve_source(
|
||||
result: Any,
|
||||
path_arg: Optional[str],
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
) -> Tuple[Optional[Path | str], Optional[str]]:
|
||||
"""Resolve the source file path from args or pipeline result.
|
||||
|
||||
PRIORITY: hash+store pattern is preferred over path-based resolution.
|
||||
This ensures consistency when @N selections pass hash+store identifiers.
|
||||
|
||||
Returns (media_path_or_url, file_hash)
|
||||
where media_path_or_url can be a Path object or a URL string.
|
||||
"""
|
||||
# PRIORITY 1: Try hash+store from result dict (most reliable for @N selections)
|
||||
if isinstance(result, dict):
|
||||
result_hash = result.get("hash")
|
||||
result_store = result.get("store")
|
||||
if result_hash and result_store:
|
||||
debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}")
|
||||
# Use get_file to retrieve from the specific store
|
||||
try:
|
||||
store = Store(config)
|
||||
if result_store in store.list_backends():
|
||||
backend = store[result_store]
|
||||
media_path = backend.get_file(result_hash)
|
||||
if isinstance(media_path, Path) and media_path.exists():
|
||||
pipe_obj.path = str(media_path)
|
||||
debug(f"[add-file] Retrieved file from {result_store}: {media_path}")
|
||||
return media_path, result_hash
|
||||
|
||||
if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")):
|
||||
pipe_obj.path = media_path
|
||||
debug(f"[add-file] Retrieved URL from {result_store}: {media_path}")
|
||||
return media_path, result_hash
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
|
||||
|
||||
# PRIORITY 2: Try explicit path argument
|
||||
if path_arg:
|
||||
media_path = Path(path_arg)
|
||||
pipe_obj.path = str(media_path)
|
||||
debug(f"[add-file] Using explicit path argument: {media_path}")
|
||||
return media_path, None
|
||||
|
||||
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
|
||||
pipe_path = getattr(pipe_obj, "path", None)
|
||||
if pipe_path:
|
||||
pipe_path_str = str(pipe_path)
|
||||
debug(f"Resolved pipe_path: {pipe_path_str}")
|
||||
if pipe_path_str.startswith("hydrus:"):
|
||||
file_hash = pipe_path_str.split(":", 1)[1]
|
||||
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
|
||||
return media_path, file_hash if success else None
|
||||
# Check if pipe_path is a URL - skip to URL handling below
|
||||
if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
media_path = Path(pipe_path_str)
|
||||
return media_path, None
|
||||
|
||||
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
|
||||
pipe_url = getattr(pipe_obj, "url", None)
|
||||
if pipe_url and isinstance(pipe_url, str):
|
||||
# Check if it's a URL
|
||||
if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
debug(f"Detected URL in pipe_obj.url: {pipe_url}")
|
||||
return pipe_url, None
|
||||
|
||||
# Try from hydrus hash in pipe_obj.extra or hash
|
||||
hydrus_hash = None
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash")
|
||||
hydrus_hash = hydrus_hash or pipe_obj.hash
|
||||
|
||||
if hydrus_hash and hydrus_hash != "unknown":
|
||||
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config)
|
||||
return media_path, str(hydrus_hash) if success else None
|
||||
|
||||
# Try from result (if it's a string path or URL)
|
||||
if isinstance(result, str):
|
||||
debug(f"Checking result string: {result}")
|
||||
# Check if result is a URL before treating as file path
|
||||
if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
debug(f"Detected URL in result string: {result}")
|
||||
return result, None # Return URL string directly
|
||||
media_path = Path(result)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, None
|
||||
|
||||
# Try from result if it's a list (pipeline emits multiple results)
|
||||
if isinstance(result, list) and result:
|
||||
first_item = result[0]
|
||||
# If the first item is a string, it's either a URL or a file path
|
||||
if isinstance(first_item, str):
|
||||
debug(f"Checking result list[0]: {first_item}")
|
||||
if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
debug(f"Detected URL in result list: {first_item}")
|
||||
return first_item, None # Return URL string directly
|
||||
media_path = Path(first_item)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, None
|
||||
|
||||
# If the first item is a dict, interpret it as a PipeObject-style result
|
||||
if isinstance(first_item, dict):
|
||||
# Look for path or path-like keys
|
||||
path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
|
||||
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
|
||||
if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"):
|
||||
path_candidate = first_item.get("paths")[0]
|
||||
if path_candidate:
|
||||
debug(f"Resolved path from result dict: {path_candidate}")
|
||||
try:
|
||||
media_path = Path(path_candidate)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, first_item.get("hash")
|
||||
except Exception:
|
||||
# Fallback to returning string if not a path
|
||||
return str(path_candidate), first_item.get("hash")
|
||||
|
||||
# If first item is a PipeObject object
|
||||
try:
|
||||
# models.PipeObject is an actual class; check attribute presence
|
||||
import models as _models
|
||||
if isinstance(first_item, _models.PipeObject):
|
||||
path_candidate = getattr(first_item, "path", None)
|
||||
if path_candidate:
|
||||
debug(f"Resolved path from PipeObject: {path_candidate}")
|
||||
media_path = Path(path_candidate)
|
||||
pipe_obj.path = str(media_path)
|
||||
return media_path, getattr(first_item, "hash", None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
debug(f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}")
|
||||
log("File path could not be resolved")
|
||||
return None, None
|
||||
|
||||
@staticmethod
|
||||
def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]:
|
||||
"""Fetch the physical path of a file from Hydrus using its hash."""
|
||||
if not file_hash:
|
||||
return None, False
|
||||
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
if not client:
|
||||
log("❌ Hydrus client not available", file=sys.stderr)
|
||||
return None, False
|
||||
|
||||
response = client.get_file_path(file_hash)
|
||||
file_path_str = response.get("path")
|
||||
if not file_path_str:
|
||||
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
|
||||
return None, False
|
||||
|
||||
media_path = Path(file_path_str)
|
||||
if not media_path.exists():
|
||||
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
|
||||
return None, False
|
||||
|
||||
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
|
||||
return media_path, True
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
|
||||
return None, False
|
||||
|
||||
@staticmethod
|
||||
def _validate_source(media_path: Optional[Path]) -> bool:
|
||||
"""Validate that the source file exists and is supported."""
|
||||
if media_path is None:
|
||||
return False
|
||||
|
||||
target_str = str(media_path)
|
||||
|
||||
# If it's a URL target, we skip file existence checks
|
||||
if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
return True
|
||||
|
||||
if not media_path.exists() or not media_path.is_file():
|
||||
log(f"File not found: {media_path}")
|
||||
return False
|
||||
|
||||
# Validate file type
|
||||
file_extension = media_path.suffix.lower()
|
||||
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
|
||||
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _is_url_target(media_path: Optional[Path]) -> bool:
|
||||
"""Check if the target is a URL that needs downloading."""
|
||||
if media_path and str(media_path).lower().startswith(("http://", "https://")):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _delegate_to_download_data(
|
||||
self,
|
||||
result: Any,
|
||||
url_str: str,
|
||||
location: Optional[str],
|
||||
provider_name: Optional[str],
|
||||
args: Sequence[str],
|
||||
config: Dict[str, Any],
|
||||
) -> int:
|
||||
"""Delegate URL handling to download-media cmdlet."""
|
||||
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
|
||||
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
|
||||
from cmdlet.download_media import CMDLET as dl_cmdlet
|
||||
dl_args = list(args) if args else []
|
||||
|
||||
# Add the URL to the argument list for download-media
|
||||
dl_args.insert(0, url_str)
|
||||
|
||||
# If result has selection_args (like -item from @N selection), include them
|
||||
if isinstance(result, dict) and "_selection_args" in result:
|
||||
selection_args = result["_selection_args"]
|
||||
if selection_args:
|
||||
dl_args.extend(selection_args)
|
||||
elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra:
|
||||
selection_args = result.extra["_selection_args"]
|
||||
if selection_args:
|
||||
dl_args.extend(selection_args)
|
||||
|
||||
# download-media doesn't support -storage flag
|
||||
# It downloads to the configured directory, then add-file will handle storage
|
||||
# Note: Provider uploads (0x0) are not supported via this path
|
||||
|
||||
# Call download-media with the URL in args
|
||||
return dl_cmdlet.run(None, dl_args, config)
|
||||
|
||||
@staticmethod
|
||||
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
|
||||
url: List[str] = []
|
||||
try:
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not url and isinstance(result, dict):
|
||||
url = list(result.get("url") or result.get("url") or [])
|
||||
if not url:
|
||||
url = list(extract_url_from_result(result) or [])
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
rels = pipe_obj.get_relationships()
|
||||
if rels:
|
||||
return rels
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(result, dict) and result.get("relationships"):
|
||||
return result.get("relationships")
|
||||
try:
|
||||
return extract_relationships(result)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
|
||||
if getattr(pipe_obj, "duration", None) is not None:
|
||||
return pipe_obj.duration
|
||||
try:
|
||||
return extract_duration(result)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _update_pipe_object_destination(
|
||||
pipe_obj: models.PipeObject,
|
||||
*,
|
||||
hash_value: str,
|
||||
store: str,
|
||||
path: Optional[str],
|
||||
tag: List[str],
|
||||
title: Optional[str],
|
||||
extra_updates: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
pipe_obj.hash = hash_value
|
||||
pipe_obj.store = store
|
||||
pipe_obj.path = path
|
||||
pipe_obj.tag = tag
|
||||
if title:
|
||||
pipe_obj.title = title
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
pipe_obj.extra.update(extra_updates or {})
|
||||
else:
|
||||
pipe_obj.extra = dict(extra_updates or {})
|
||||
|
||||
@staticmethod
|
||||
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
|
||||
from result_table import format_result
|
||||
log(format_result(pipe_obj, title="Result"), file=sys.stderr)
|
||||
ctx.emit(pipe_obj.to_dict())
|
||||
ctx.set_current_stage_table(None)
|
||||
|
||||
@staticmethod
|
||||
def _prepare_metadata(
|
||||
result: Any,
|
||||
media_path: Path,
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
) -> Tuple[List[str], List[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Prepare tags, url, and title for the file.
|
||||
Returns (tags, url, preferred_title, file_hash)
|
||||
"""
|
||||
tags_from_result = list(pipe_obj.tag or [])
|
||||
if not tags_from_result:
|
||||
try:
|
||||
tags_from_result = list(extract_tag_from_result(result) or [])
|
||||
except Exception:
|
||||
tags_from_result = []
|
||||
|
||||
url_from_result = Add_File._get_url(result, pipe_obj)
|
||||
|
||||
preferred_title = pipe_obj.title
|
||||
if not preferred_title:
|
||||
for t in tags_from_result:
|
||||
if str(t).strip().lower().startswith("title:"):
|
||||
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
|
||||
if candidate:
|
||||
preferred_title = candidate
|
||||
break
|
||||
if not preferred_title:
|
||||
preferred_title = extract_title_from_result(result)
|
||||
if preferred_title:
|
||||
preferred_title = preferred_title.replace("_", " ").strip()
|
||||
|
||||
store = getattr(pipe_obj, "store", None)
|
||||
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
|
||||
media_path, store, config
|
||||
)
|
||||
|
||||
def normalize_title_tag(tag: str) -> str:
|
||||
if str(tag).strip().lower().startswith("title:"):
|
||||
parts = tag.split(":", 1)
|
||||
if len(parts) == 2:
|
||||
value = parts[1].replace("_", " ").strip()
|
||||
return f"title:{value}"
|
||||
return tag
|
||||
|
||||
tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")]
|
||||
sidecar_tags = collapse_namespace_tag([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
|
||||
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
|
||||
|
||||
merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True)
|
||||
|
||||
if preferred_title:
|
||||
merged_tags.append(f"title:{preferred_title}")
|
||||
|
||||
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
|
||||
|
||||
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
|
||||
|
||||
# Persist back to PipeObject
|
||||
pipe_obj.tag = merged_tags
|
||||
if preferred_title and not pipe_obj.title:
|
||||
pipe_obj.title = preferred_title
|
||||
if file_hash and not pipe_obj.hash:
|
||||
pipe_obj.hash = file_hash
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
pipe_obj.extra.setdefault("url", merged_url)
|
||||
return merged_tags, merged_url, preferred_title, file_hash
|
||||
|
||||
@staticmethod
|
||||
def _handle_local_export(
|
||||
media_path: Path,
|
||||
location: str,
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
delete_after: bool,
|
||||
) -> int:
|
||||
"""Handle exporting to a specific local path (Copy)."""
|
||||
try:
|
||||
destination_root = Path(location)
|
||||
except Exception as exc:
|
||||
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Exporting to local path: {destination_root}", file=sys.stderr)
|
||||
|
||||
result = None
|
||||
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
|
||||
|
||||
# Determine Filename (Title-based)
|
||||
title_value = title
|
||||
if not title_value:
|
||||
# Try to find title in tags
|
||||
title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None)
|
||||
if title_tag:
|
||||
title_value = title_tag.split(":", 1)[1].strip()
|
||||
|
||||
if not title_value:
|
||||
title_value = media_path.stem.replace("_", " ").strip()
|
||||
|
||||
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
|
||||
base_name = safe_title or media_path.stem
|
||||
new_name = base_name + media_path.suffix
|
||||
|
||||
destination_root.mkdir(parents=True, exist_ok=True)
|
||||
target_path = destination_root / new_name
|
||||
|
||||
if target_path.exists():
|
||||
target_path = unique_path(target_path)
|
||||
|
||||
# COPY Operation (Safe Export)
|
||||
try:
|
||||
shutil.copy2(str(media_path), target_path)
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to export file: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Copy Sidecars
|
||||
Add_File._copy_sidecars(media_path, target_path)
|
||||
|
||||
# Ensure hash for exported copy
|
||||
if not f_hash:
|
||||
try:
|
||||
f_hash = sha256_file(target_path)
|
||||
except Exception:
|
||||
f_hash = None
|
||||
|
||||
# Write Metadata Sidecars (since it's an export)
|
||||
relationships = Add_File._get_relationships(result, pipe_obj)
|
||||
try:
|
||||
write_sidecar(target_path, tags, url, f_hash)
|
||||
write_metadata(target_path, hash_value=f_hash, url=url, relationships=relationships or [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Update PipeObject and emit
|
||||
extra_updates = {
|
||||
"url": url,
|
||||
"export_path": str(destination_root),
|
||||
}
|
||||
if relationships:
|
||||
extra_updates["relationships"] = relationships
|
||||
|
||||
chosen_title = title or title_value or pipe_obj.title or target_path.name
|
||||
|
||||
Add_File._update_pipe_object_destination(
|
||||
pipe_obj,
|
||||
hash_value=f_hash or "unknown",
|
||||
store="local",
|
||||
path=str(target_path),
|
||||
tag=tags,
|
||||
title=chosen_title,
|
||||
extra_updates=extra_updates,
|
||||
)
|
||||
Add_File._emit_pipe_object(pipe_obj)
|
||||
|
||||
# Cleanup
|
||||
# Only delete if explicitly requested!
|
||||
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
|
||||
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _download_soulseek_file(
|
||||
result: Any,
|
||||
config: Dict[str, Any]
|
||||
) -> Optional[Path]:
|
||||
"""
|
||||
Download a file from Soulseek peer.
|
||||
|
||||
Extracts username and filename from soulseek result metadata and initiates download.
|
||||
"""
|
||||
try:
|
||||
import asyncio
|
||||
from ProviderCore.registry import download_soulseek_file
|
||||
from pathlib import Path
|
||||
|
||||
# Extract metadata from result
|
||||
full_metadata = {}
|
||||
if isinstance(result, dict):
|
||||
full_metadata = result.get("full_metadata", {})
|
||||
elif hasattr(result, "extra") and isinstance(result.extra, dict) and "full_metadata" in result.extra:
|
||||
full_metadata = result.extra.get("full_metadata", {})
|
||||
elif hasattr(result, "full_metadata"):
|
||||
# Direct attribute access (fallback)
|
||||
val = getattr(result, "full_metadata", {})
|
||||
if isinstance(val, dict):
|
||||
full_metadata = val
|
||||
|
||||
username = full_metadata.get("username")
|
||||
filename = full_metadata.get("filename")
|
||||
|
||||
if not username or not filename:
|
||||
debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
|
||||
if hasattr(result, "extra"):
|
||||
debug(f"[add-file] Result extra keys: {list(result.extra.keys())}")
|
||||
return None
|
||||
|
||||
if not username or not filename:
|
||||
debug(f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})")
|
||||
return None
|
||||
|
||||
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
|
||||
|
||||
# Determine output directory (prefer downloads folder in config)
|
||||
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Run async download in event loop
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_closed():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
downloaded_path = loop.run_until_complete(
|
||||
download_soulseek_file(
|
||||
username=username,
|
||||
filename=filename,
|
||||
output_dir=output_dir,
|
||||
timeout=1200 # 20 minutes
|
||||
)
|
||||
)
|
||||
|
||||
return downloaded_path
|
||||
|
||||
except Exception as e:
|
||||
log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
debug(f"[add-file] Soulseek download traceback: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _handle_provider_upload(
|
||||
media_path: Path,
|
||||
provider_name: str,
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
delete_after: bool,
|
||||
) -> int:
|
||||
"""Handle uploading to a file provider (e.g. 0x0)."""
|
||||
from ProviderCore.registry import get_file_provider
|
||||
|
||||
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
file_provider = get_file_provider(provider_name, config)
|
||||
if not file_provider:
|
||||
log(f"File provider '{provider_name}' not available", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
hoster_url = file_provider.upload(str(media_path))
|
||||
log(f"File uploaded: {hoster_url}", file=sys.stderr)
|
||||
|
||||
# Associate URL with Hydrus if possible
|
||||
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
|
||||
if f_hash:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
if client:
|
||||
client.associate_url(f_hash, hoster_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Upload failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Update PipeObject and emit
|
||||
extra_updates: Dict[str, Any] = {
|
||||
"provider": provider_name,
|
||||
"provider_url": hoster_url,
|
||||
}
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
# Also track hoster URL as a url for downstream steps
|
||||
existing_known = list(pipe_obj.extra.get("url") or [])
|
||||
if hoster_url and hoster_url not in existing_known:
|
||||
existing_known.append(hoster_url)
|
||||
extra_updates["url"] = existing_known
|
||||
|
||||
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
|
||||
Add_File._update_pipe_object_destination(
|
||||
pipe_obj,
|
||||
hash_value=f_hash or "unknown",
|
||||
store=provider_name or "provider",
|
||||
path=file_path,
|
||||
tag=pipe_obj.tag,
|
||||
title=pipe_obj.title or (media_path.name if media_path else None),
|
||||
extra_updates=extra_updates,
|
||||
)
|
||||
Add_File._emit_pipe_object(pipe_obj)
|
||||
|
||||
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _handle_storage_backend(
|
||||
media_path: Path,
|
||||
backend_name: str,
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
delete_after: bool,
|
||||
) -> int:
|
||||
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
|
||||
log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
store = Store(config)
|
||||
backend = store[backend_name]
|
||||
|
||||
# Prepare metadata from pipe_obj and sidecars
|
||||
tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config)
|
||||
|
||||
# Call backend's add_file with full metadata
|
||||
# Backend returns hash as identifier
|
||||
file_identifier = backend.add_file(
|
||||
media_path,
|
||||
title=title,
|
||||
tags=tags,
|
||||
url=url
|
||||
)
|
||||
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
|
||||
|
||||
stored_path: Optional[str] = None
|
||||
try:
|
||||
maybe_path = backend.get_file(file_identifier)
|
||||
if isinstance(maybe_path, Path):
|
||||
stored_path = str(maybe_path)
|
||||
elif isinstance(maybe_path, str) and maybe_path:
|
||||
# Some backends may return a browser URL
|
||||
stored_path = maybe_path
|
||||
except Exception:
|
||||
stored_path = None
|
||||
|
||||
Add_File._update_pipe_object_destination(
|
||||
pipe_obj,
|
||||
hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown",
|
||||
store=backend_name,
|
||||
path=stored_path,
|
||||
tag=tags,
|
||||
title=title or pipe_obj.title or media_path.name,
|
||||
extra_updates={
|
||||
"url": url,
|
||||
},
|
||||
)
|
||||
Add_File._emit_pipe_object(pipe_obj)
|
||||
|
||||
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
@staticmethod
|
||||
def _load_sidecar_bundle(
|
||||
media_path: Path,
|
||||
store: Optional[str],
|
||||
config: Dict[str, Any],
|
||||
) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]:
|
||||
"""Load sidecar metadata."""
|
||||
if store and store.lower() == "local":
|
||||
try:
|
||||
from config import get_local_storage_path
|
||||
db_root = get_local_storage_path(config)
|
||||
if db_root:
|
||||
with API_folder_store(Path(db_root)) as db:
|
||||
file_hash = db.get_file_hash(media_path)
|
||||
if file_hash:
|
||||
tags = db.get_tags(file_hash) or []
|
||||
metadata = db.get_metadata(file_hash) or {}
|
||||
url = metadata.get("url") or []
|
||||
f_hash = metadata.get("hash") or file_hash
|
||||
if tags or url or f_hash:
|
||||
return None, f_hash, tags, url
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
sidecar_path = find_sidecar(media_path)
|
||||
if sidecar_path and sidecar_path.exists():
|
||||
h, t, u = read_sidecar(sidecar_path)
|
||||
return sidecar_path, h, t or [], u or []
|
||||
except Exception:
|
||||
pass
|
||||
return None, None, [], []
|
||||
|
||||
@staticmethod
|
||||
def _resolve_file_hash(
|
||||
result: Any,
|
||||
media_path: Path,
|
||||
pipe_obj: models.PipeObject,
|
||||
fallback_hash: Optional[str],
|
||||
) -> Optional[str]:
|
||||
if pipe_obj.hash and pipe_obj.hash != "unknown":
|
||||
return pipe_obj.hash
|
||||
if fallback_hash:
|
||||
return fallback_hash
|
||||
|
||||
if isinstance(result, dict):
|
||||
candidate = result.get('hash')
|
||||
if candidate:
|
||||
return str(candidate)
|
||||
|
||||
try:
|
||||
return sha256_file(media_path)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_media_kind(path: Path) -> str:
|
||||
# Reusing logic
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
|
||||
return 'audio'
|
||||
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||
return 'video'
|
||||
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
|
||||
return 'image'
|
||||
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
|
||||
return 'document'
|
||||
return 'other'
|
||||
|
||||
@staticmethod
|
||||
def _persist_local_metadata(
|
||||
library_root: Path,
|
||||
dest_path: Path,
|
||||
tags: List[str],
|
||||
url: List[str],
|
||||
f_hash: Optional[str],
|
||||
relationships: Any,
|
||||
duration: Any,
|
||||
media_kind: str,
|
||||
):
|
||||
payload = {
|
||||
'hash': f_hash,
|
||||
'url': url,
|
||||
'relationships': relationships or [],
|
||||
'duration': duration,
|
||||
'size': None,
|
||||
'ext': dest_path.suffix.lower(),
|
||||
'media_type': media_kind,
|
||||
'media_kind': media_kind,
|
||||
}
|
||||
try:
|
||||
payload['size'] = dest_path.stat().st_size
|
||||
except OSError:
|
||||
payload['size'] = None
|
||||
|
||||
with API_folder_store(library_root) as db:
|
||||
try:
|
||||
db.save_file_info(dest_path, payload, tags)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr)
|
||||
|
||||
@staticmethod
|
||||
def _copy_sidecars(source_path: Path, target_path: Path):
|
||||
possible_sidecars = [
|
||||
source_path.with_suffix(source_path.suffix + ".json"),
|
||||
source_path.with_name(source_path.name + ".tag"),
|
||||
source_path.with_name(source_path.name + ".metadata"),
|
||||
source_path.with_name(source_path.name + ".notes"),
|
||||
]
|
||||
for sc in possible_sidecars:
|
||||
try:
|
||||
if sc.exists():
|
||||
suffix_part = sc.name.replace(source_path.name, "", 1)
|
||||
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
|
||||
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(str(sc), dest_sidecar)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _cleanup_after_success(media_path: Path, delete_source: bool):
|
||||
if not delete_source:
|
||||
return
|
||||
|
||||
# Check if it's a temp file that should always be deleted
|
||||
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
|
||||
|
||||
if delete_source or is_temp_merge:
|
||||
log(f"Deleting source file...", file=sys.stderr)
|
||||
try:
|
||||
media_path.unlink()
|
||||
Add_File._cleanup_sidecar_files(media_path)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
|
||||
|
||||
@staticmethod
|
||||
def _cleanup_sidecar_files(media_path: Path):
|
||||
targets = [
|
||||
media_path.parent / (media_path.name + '.metadata'),
|
||||
media_path.parent / (media_path.name + '.notes'),
|
||||
media_path.parent / (media_path.name + '.tag'),
|
||||
]
|
||||
for target in targets:
|
||||
try:
|
||||
if target.exists():
|
||||
target.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Create and register the cmdlet
|
||||
CMDLET = Add_File()
|
||||
148
cmdlet/add_note.py
Normal file
148
cmdlet/add_note.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Add_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-note",
|
||||
summary="Add or set a named note on a file in a store.",
|
||||
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
|
||||
alias=["set-note", "add_note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
|
||||
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- Notes are stored via the selected store backend.",
|
||||
"- For lyrics: store LRC text in a note named 'lyric'.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
# Populate dynamic store choices for autocomplete
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
note_name = str(parsed.get("name") or "").strip()
|
||||
text_parts = parsed.get("text")
|
||||
|
||||
if not note_name:
|
||||
log("[add_note] Error: Requires <name>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if isinstance(text_parts, list):
|
||||
note_text = " ".join([str(p) for p in text_parts]).strip()
|
||||
else:
|
||||
note_text = str(text_parts or "").strip()
|
||||
|
||||
if not note_text:
|
||||
log("[add_note] Error: Empty note text", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
updated = 0
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[add_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
ok = False
|
||||
try:
|
||||
ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
|
||||
except Exception as exc:
|
||||
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
updated += 1
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(f"[add_note] Updated {updated} item(s)", file=sys.stderr)
|
||||
return 0 if updated > 0 else 1
|
||||
|
||||
|
||||
CMDLET = Add_Note()
|
||||
|
||||
492
cmdlet/add_relationship.py
Normal file
492
cmdlet/add_relationship.py
Normal file
@@ -0,0 +1,492 @@
|
||||
"""Add file relationships in Hydrus based on relationship tags in sidecar."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
|
||||
from API.folder import read_sidecar, find_sidecar
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-relationship",
|
||||
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
|
||||
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
|
||||
arg=[
|
||||
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
|
||||
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
|
||||
],
|
||||
detail=[
|
||||
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
|
||||
"- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4",
|
||||
"- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...')",
|
||||
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
|
||||
"- When using -king, all piped items become the specified relationship type to the king",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
||||
"""Normalize a hash hex string to lowercase 64-char format."""
|
||||
if not value or not isinstance(value, str):
|
||||
return None
|
||||
normalized = value.strip().lower()
|
||||
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
|
||||
return normalized
|
||||
return None
|
||||
|
||||
|
||||
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
|
||||
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
|
||||
|
||||
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
|
||||
"""
|
||||
result: Dict[str, list[str]] = {}
|
||||
if not isinstance(tag_value, str):
|
||||
return result
|
||||
|
||||
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
|
||||
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
|
||||
matches = re.findall(pattern, tag_value)
|
||||
|
||||
for rel_type, hash_value in matches:
|
||||
normalized = _normalise_hash_hex(hash_value)
|
||||
if normalized:
|
||||
if rel_type not in result:
|
||||
result[rel_type] = []
|
||||
result[rel_type].append(normalized)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_king_reference(king_arg: str) -> Optional[str]:
|
||||
"""Resolve a king reference like '@4' to its actual hash or path.
|
||||
|
||||
Supports:
|
||||
- Direct hash: '0123456789abcdef...' (64 chars)
|
||||
- Selection reference: '@4' (resolves from pipeline context)
|
||||
|
||||
Returns:
|
||||
- For Hydrus items: normalized hash
|
||||
- For local storage items: file path
|
||||
- None if not found
|
||||
"""
|
||||
if not king_arg:
|
||||
return None
|
||||
|
||||
# Check if it's already a valid hash
|
||||
normalized = _normalise_hash_hex(king_arg)
|
||||
if normalized:
|
||||
return normalized
|
||||
|
||||
# Try to resolve as @N selection from pipeline context
|
||||
if king_arg.startswith('@'):
|
||||
try:
|
||||
# Get the result items from the pipeline context
|
||||
from pipeline import get_last_result_items
|
||||
items = get_last_result_items()
|
||||
if not items:
|
||||
log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Parse @N to get the index (1-based)
|
||||
index_str = king_arg[1:] # Remove '@'
|
||||
index = int(index_str) - 1 # Convert to 0-based
|
||||
|
||||
if 0 <= index < len(items):
|
||||
item = items[index]
|
||||
|
||||
# Try to extract hash from the item (could be dict or object)
|
||||
item_hash = (
|
||||
get_field(item, 'hash_hex')
|
||||
or get_field(item, 'hash')
|
||||
or get_field(item, 'file_hash')
|
||||
)
|
||||
|
||||
if item_hash:
|
||||
normalized = _normalise_hash_hex(item_hash)
|
||||
if normalized:
|
||||
return normalized
|
||||
|
||||
# If no hash, try to get file path (for local storage)
|
||||
file_path = (
|
||||
get_field(item, 'file_path')
|
||||
or get_field(item, 'path')
|
||||
or get_field(item, 'target')
|
||||
)
|
||||
|
||||
if file_path:
|
||||
return str(file_path)
|
||||
|
||||
log(f"Item {king_arg} has no hash or path information", file=sys.stderr)
|
||||
return None
|
||||
else:
|
||||
log(f"Index {king_arg} out of range", file=sys.stderr)
|
||||
return None
|
||||
except (ValueError, IndexError) as e:
|
||||
log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh relationships via get-relationship."""
|
||||
try:
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
get_relationship = None
|
||||
try:
|
||||
get_relationship = get_cmdlet("get-relationship")
|
||||
except Exception:
|
||||
get_relationship = None
|
||||
if not callable(get_relationship):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is None:
|
||||
return
|
||||
|
||||
def norm(val: Any) -> str:
|
||||
return str(val).lower()
|
||||
|
||||
target_hashes = [norm(v) for v in [target_hash, other] if v]
|
||||
target_paths = [norm(v) for v in [target_path, other] if v]
|
||||
|
||||
subj_hashes: list[str] = []
|
||||
subj_paths: list[str] = []
|
||||
if isinstance(subject, dict):
|
||||
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
|
||||
subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v]
|
||||
else:
|
||||
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
|
||||
subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)]
|
||||
|
||||
is_match = False
|
||||
if target_hashes and any(h in subj_hashes for h in target_hashes):
|
||||
is_match = True
|
||||
if target_paths and any(p in subj_paths for p in target_paths):
|
||||
is_match = True
|
||||
if not is_match:
|
||||
return
|
||||
|
||||
refresh_args: list[str] = []
|
||||
if target_hash:
|
||||
refresh_args.extend(["-hash", target_hash])
|
||||
get_relationship(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Associate file relationships in Hydrus.
|
||||
|
||||
Two modes of operation:
|
||||
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
|
||||
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
|
||||
|
||||
Returns 0 on success, non-zero on failure.
|
||||
"""
|
||||
# Help
|
||||
if should_show_help(_args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments using CMDLET spec
|
||||
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||
arg_path: Optional[Path] = None
|
||||
king_arg = parsed.get("king")
|
||||
rel_type = parsed.get("type", "alt")
|
||||
|
||||
raw_path = parsed.get("path")
|
||||
if raw_path:
|
||||
try:
|
||||
arg_path = Path(str(raw_path)).expanduser()
|
||||
except Exception:
|
||||
arg_path = Path(str(raw_path))
|
||||
|
||||
# Handle @N selection which creates a list
|
||||
# Use normalize_result_input to handle both single items and lists
|
||||
items_to_process = normalize_result_input(result)
|
||||
|
||||
if not items_to_process and not arg_path:
|
||||
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# If no items from pipeline, just process the -path arg
|
||||
if not items_to_process and arg_path:
|
||||
items_to_process = [{"file_path": arg_path}]
|
||||
|
||||
# Import local storage utilities
|
||||
from API.folder import LocalLibrarySearchOptimizer
|
||||
from config import get_local_storage_path
|
||||
|
||||
local_storage_path = get_local_storage_path(config) if config else None
|
||||
|
||||
# Check if any items have Hydrus hashes (file_hash or hash_hex fields)
|
||||
has_hydrus_hashes = any(
|
||||
(isinstance(item, dict) and (item.get('hash_hex') or item.get('hash')))
|
||||
or (hasattr(item, 'hash_hex') or hasattr(item, 'hash'))
|
||||
for item in items_to_process
|
||||
)
|
||||
|
||||
# Only try to initialize Hydrus if we actually have Hydrus hashes to work with
|
||||
hydrus_client = None
|
||||
if has_hydrus_hashes:
|
||||
try:
|
||||
hydrus_client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr)
|
||||
|
||||
# Use local storage if it's available and either Hydrus is not available or items are local files
|
||||
use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists()))
|
||||
|
||||
# Resolve the king reference once (if provided)
|
||||
king_hash = None
|
||||
if king_arg:
|
||||
# Resolve the king reference (could be @4 or a direct hash)
|
||||
king_hash = _resolve_king_reference(king_arg)
|
||||
if not king_hash:
|
||||
log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr)
|
||||
return 1
|
||||
log(f"Using king hash: {king_hash}", file=sys.stderr)
|
||||
|
||||
# Process each item in the list
|
||||
for item_idx, item in enumerate(items_to_process):
|
||||
# Extract hash and path from current item
|
||||
file_hash = None
|
||||
file_path_from_result = None
|
||||
|
||||
if isinstance(item, dict):
|
||||
file_hash = item.get("hash_hex") or item.get("hash")
|
||||
file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
|
||||
else:
|
||||
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
|
||||
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
|
||||
|
||||
# PIPELINE MODE with Hydrus: Track relationships using hash
|
||||
if file_hash and hydrus_client:
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# If explicit -king provided, use it
|
||||
if king_hash:
|
||||
try:
|
||||
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
_refresh_relationship_view_if_current(file_hash, file_path_from_result, king_hash, config)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
# Original behavior: no explicit king, first becomes king, rest become alts
|
||||
try:
|
||||
existing_king = ctx.load_value("relationship_king")
|
||||
except Exception:
|
||||
existing_king = None
|
||||
|
||||
# If this is the first item, make it the king
|
||||
if not existing_king:
|
||||
try:
|
||||
ctx.store_value("relationship_king", file_hash)
|
||||
log(f"Established king hash: {file_hash}", file=sys.stderr)
|
||||
continue # Move to next item
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If we already have a king and this is a different hash, link them
|
||||
if existing_king and existing_king != file_hash:
|
||||
try:
|
||||
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
|
||||
file=sys.stderr
|
||||
)
|
||||
_refresh_relationship_view_if_current(file_hash, file_path_from_result, existing_king, config)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# LOCAL STORAGE MODE: Handle relationships for local files
|
||||
elif use_local_storage and file_path_from_result:
|
||||
try:
|
||||
file_path_obj = Path(str(file_path_from_result))
|
||||
|
||||
if not file_path_obj.exists():
|
||||
log(f"File not found: {file_path_obj}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if king_hash:
|
||||
# king_hash is a file path from _resolve_king_reference (or a Hydrus hash)
|
||||
king_file_path = Path(str(king_hash)) if king_hash else None
|
||||
if king_file_path and king_file_path.exists():
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
db.set_relationship(file_path_obj, king_file_path, rel_type)
|
||||
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
|
||||
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
|
||||
else:
|
||||
log(f"King file not found or invalid: {king_hash}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
# Original behavior: first becomes king, rest become alts
|
||||
try:
|
||||
king_path = ctx.load_value("relationship_king_path")
|
||||
except Exception:
|
||||
king_path = None
|
||||
|
||||
if not king_path:
|
||||
try:
|
||||
ctx.store_value("relationship_king_path", str(file_path_obj))
|
||||
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
|
||||
continue # Move to next item
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if king_path and king_path != str(file_path_obj):
|
||||
try:
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
db.set_relationship(file_path_obj, Path(king_path), rel_type)
|
||||
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
|
||||
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Local storage error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
|
||||
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
|
||||
|
||||
# Resolve media path from -path arg or result target
|
||||
target = getattr(result, "target", None) or getattr(result, "path", None)
|
||||
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
|
||||
if media_path is None:
|
||||
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Validate local file
|
||||
if str(media_path).lower().startswith(("http://", "https://")):
|
||||
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
|
||||
return 1
|
||||
if not media_path.exists() or not media_path.is_file():
|
||||
log(f"File not found: {media_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Build Hydrus client
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Read sidecar to find relationship tags
|
||||
sidecar_path = find_sidecar(media_path)
|
||||
if sidecar_path is None:
|
||||
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
_, tags, _ = read_sidecar(sidecar_path)
|
||||
except Exception as exc:
|
||||
log(f"Failed to read sidecar: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
|
||||
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
|
||||
|
||||
if not relationship_tags:
|
||||
log(f"No relationship tags found in sidecar", file=sys.stderr)
|
||||
return 0 # Not an error, just nothing to do
|
||||
|
||||
# Get the file hash from result (should have been set by add-file)
|
||||
file_hash = getattr(result, "hash_hex", None)
|
||||
if not file_hash:
|
||||
log("File hash not available (run add-file first)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Parse relationships from tags and apply them
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
for rel_tag in relationship_tags:
|
||||
try:
|
||||
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
|
||||
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
|
||||
|
||||
# Parse relationships
|
||||
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
|
||||
|
||||
# Set the relationships in Hydrus
|
||||
for rel_type, related_hashes in rels.items():
|
||||
if not related_hashes:
|
||||
continue
|
||||
|
||||
for related_hash in related_hashes:
|
||||
# Don't set relationship between hash and itself
|
||||
if file_hash == related_hash:
|
||||
continue
|
||||
|
||||
try:
|
||||
hydrus_client.set_relationship(file_hash, related_hash, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: "
|
||||
f"{file_hash} <-> {related_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
success_count += 1
|
||||
except Exception as exc:
|
||||
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
|
||||
error_count += 1
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
|
||||
error_count += 1
|
||||
|
||||
if success_count > 0:
|
||||
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
|
||||
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
|
||||
return 0
|
||||
elif error_count == 0:
|
||||
log(f"No relationships to set", file=sys.stderr)
|
||||
return 0 # Success with nothing to do
|
||||
else:
|
||||
log(f"Failed with {error_count} error(s)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.alias = ["add-rel"]
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
488
cmdlet/add_tag.py
Normal file
488
cmdlet/add_tag.py
Normal file
@@ -0,0 +1,488 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import normalize_result_input, filter_results_by_temp
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_tag_arguments,
|
||||
expand_tag_groups,
|
||||
parse_cmdlet_args,
|
||||
collapse_namespace_tag,
|
||||
should_show_help,
|
||||
get_field,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
def _extract_title_tag(tags: List[str]) -> Optional[str]:
|
||||
"""Return the value of the first title: tag if present."""
|
||||
for t in tags:
|
||||
if t.lower().startswith("title:"):
|
||||
value = t.split(":", 1)[1].strip()
|
||||
return value or None
|
||||
return None
|
||||
|
||||
|
||||
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
|
||||
"""Update result object/dict title fields and columns in-place."""
|
||||
if not title_value:
|
||||
return
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.title = title_value
|
||||
# Update columns if present (Title column assumed index 0)
|
||||
columns = getattr(res, "columns", None)
|
||||
if isinstance(columns, list) and columns:
|
||||
label, *_ = columns[0]
|
||||
if str(label).lower() == "title":
|
||||
columns[0] = (label, title_value)
|
||||
elif isinstance(res, dict):
|
||||
res["title"] = title_value
|
||||
cols = res.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, _val = col
|
||||
if str(label).lower() == "title":
|
||||
updated.append((label, title_value))
|
||||
changed = True
|
||||
else:
|
||||
updated.append(col)
|
||||
else:
|
||||
updated.append(col)
|
||||
if changed:
|
||||
res["columns"] = updated
|
||||
|
||||
|
||||
def _matches_target(
|
||||
item: Any,
|
||||
target_hash: Optional[str],
|
||||
target_path: Optional[str],
|
||||
target_store: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Determine whether a result item refers to the given target.
|
||||
|
||||
Important: hashes can collide across backends in this app's UX (same media in
|
||||
multiple stores). When target_store is provided, it must match too.
|
||||
"""
|
||||
|
||||
def norm(val: Any) -> Optional[str]:
|
||||
return str(val).lower() if val is not None else None
|
||||
|
||||
target_hash_l = target_hash.lower() if target_hash else None
|
||||
target_path_l = target_path.lower() if target_path else None
|
||||
target_store_l = target_store.lower() if target_store else None
|
||||
|
||||
if isinstance(item, dict):
|
||||
hashes = [norm(item.get("hash"))]
|
||||
paths = [norm(item.get("path"))]
|
||||
stores = [norm(item.get("store"))]
|
||||
else:
|
||||
hashes = [norm(get_field(item, "hash"))]
|
||||
paths = [norm(get_field(item, "path"))]
|
||||
stores = [norm(get_field(item, "store"))]
|
||||
|
||||
if target_store_l:
|
||||
if target_store_l not in stores:
|
||||
return False
|
||||
|
||||
if target_hash_l and target_hash_l in hashes:
|
||||
return True
|
||||
if target_path_l and target_path_l in paths:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _update_item_title_fields(item: Any, new_title: str) -> None:
|
||||
"""Mutate an item to reflect a new title in plain fields and columns."""
|
||||
if isinstance(item, models.PipeObject):
|
||||
item.title = new_title
|
||||
columns = getattr(item, "columns", None)
|
||||
if isinstance(columns, list) and columns:
|
||||
label, *_ = columns[0]
|
||||
if str(label).lower() == "title":
|
||||
columns[0] = (label, new_title)
|
||||
elif isinstance(item, dict):
|
||||
item["title"] = new_title
|
||||
cols = item.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated_cols = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, _val = col
|
||||
if str(label).lower() == "title":
|
||||
updated_cols.append((label, new_title))
|
||||
changed = True
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
if changed:
|
||||
item["columns"] = updated_cols
|
||||
|
||||
|
||||
def _refresh_result_table_title(
|
||||
new_title: str,
|
||||
target_hash: Optional[str],
|
||||
target_store: Optional[str],
|
||||
target_path: Optional[str],
|
||||
) -> None:
|
||||
"""Refresh the cached result table with an updated title and redisplay it."""
|
||||
try:
|
||||
last_table = ctx.get_last_result_table()
|
||||
items = ctx.get_last_result_items()
|
||||
if not last_table or not items:
|
||||
return
|
||||
|
||||
updated_items = []
|
||||
match_found = False
|
||||
for item in items:
|
||||
try:
|
||||
if _matches_target(item, target_hash, target_path, target_store):
|
||||
_update_item_title_fields(item, new_title)
|
||||
match_found = True
|
||||
except Exception:
|
||||
pass
|
||||
updated_items.append(item)
|
||||
if not match_found:
|
||||
return
|
||||
|
||||
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
|
||||
|
||||
for item in updated_items:
|
||||
new_table.add_result(item)
|
||||
|
||||
# Keep the underlying history intact; update only the overlay so @.. can
|
||||
# clear the overlay then continue back to prior tables (e.g., the search list).
|
||||
ctx.set_last_result_table_overlay(new_table, updated_items)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
|
||||
try:
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not target_hash or not store_name:
|
||||
return
|
||||
|
||||
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
|
||||
|
||||
get_tag = None
|
||||
try:
|
||||
get_tag = get_cmdlet("get-tag")
|
||||
except Exception:
|
||||
get_tag = None
|
||||
if not callable(get_tag):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject and _matches_target(subject, target_hash, target_path, store_name):
|
||||
get_tag(subject, refresh_args, config)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
get_tag(res, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Add_Tag(Cmdlet):
|
||||
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-tag",
|
||||
summary="Add tag to a file in a store.",
|
||||
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||
arg=[
|
||||
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
|
||||
],
|
||||
detail=[
|
||||
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
|
||||
"- Requires a store backend: use -store or pipe items that include store.",
|
||||
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
|
||||
"- Multiple tag can be comma-separated or space-separated.",
|
||||
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
|
||||
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
|
||||
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
|
||||
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
|
||||
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||
"- The source namespace must already exist in the file being tagged.",
|
||||
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add tag to a file with smart filtering for pipeline results."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Check for --all flag
|
||||
include_temp = parsed.get("all", False)
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
# Filter by temp status (unless --all is set)
|
||||
if not include_temp:
|
||||
results = filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
if not results:
|
||||
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get tag from arguments (or fallback to pipeline payload)
|
||||
raw_tag = parsed.get("tag", [])
|
||||
if isinstance(raw_tag, str):
|
||||
raw_tag = [raw_tag]
|
||||
|
||||
# Fallback: if no tag provided explicitly, try to pull from first result payload
|
||||
if not raw_tag and results:
|
||||
first = results[0]
|
||||
payload_tag = None
|
||||
|
||||
# Try multiple tag lookup strategies in order
|
||||
tag_lookups = [
|
||||
lambda x: getattr(x, "tag", None),
|
||||
lambda x: x.get("tag") if isinstance(x, dict) else None,
|
||||
]
|
||||
|
||||
for lookup in tag_lookups:
|
||||
try:
|
||||
payload_tag = lookup(first)
|
||||
if payload_tag:
|
||||
break
|
||||
except (AttributeError, TypeError, KeyError):
|
||||
continue
|
||||
|
||||
if payload_tag:
|
||||
if isinstance(payload_tag, str):
|
||||
raw_tag = [payload_tag]
|
||||
elif isinstance(payload_tag, list):
|
||||
raw_tag = payload_tag
|
||||
|
||||
# Handle -list argument (convert to {list} syntax)
|
||||
list_arg = parsed.get("list")
|
||||
if list_arg:
|
||||
for l in list_arg.split(','):
|
||||
l = l.strip()
|
||||
if l:
|
||||
raw_tag.append(f"{{{l}}}")
|
||||
|
||||
# Parse and expand tag
|
||||
tag_to_add = parse_tag_arguments(raw_tag)
|
||||
tag_to_add = expand_tag_groups(tag_to_add)
|
||||
|
||||
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
|
||||
extracted_hash = None
|
||||
filtered_tag: List[str] = []
|
||||
for tag in tag_to_add:
|
||||
if isinstance(tag, str) and tag.lower().startswith("hash:"):
|
||||
_, _, hash_val = tag.partition(":")
|
||||
if hash_val:
|
||||
extracted_hash = normalize_hash(hash_val.strip())
|
||||
continue
|
||||
filtered_tag.append(tag)
|
||||
tag_to_add = filtered_tag
|
||||
|
||||
if not tag_to_add:
|
||||
log("No tag provided to add", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get other flags (hash override can come from -hash or hash: token)
|
||||
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
|
||||
duplicate_arg = parsed.get("duplicate")
|
||||
|
||||
# tag ARE provided - apply them to each store-backed result
|
||||
total_added = 0
|
||||
total_modified = 0
|
||||
|
||||
store_override = parsed.get("store")
|
||||
|
||||
for res in results:
|
||||
store_name: Optional[str]
|
||||
raw_hash: Optional[str]
|
||||
raw_path: Optional[str]
|
||||
|
||||
if isinstance(res, models.PipeObject):
|
||||
store_name = store_override or res.store
|
||||
raw_hash = res.hash
|
||||
raw_path = res.path
|
||||
elif isinstance(res, dict):
|
||||
store_name = store_override or res.get("store")
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
else:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
if not store_name:
|
||||
log("[add_tag] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
|
||||
if not resolved_hash and raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
resolved_hash = stem.lower()
|
||||
elif p.exists() and p.is_file():
|
||||
resolved_hash = sha256_file(p)
|
||||
except Exception:
|
||||
resolved_hash = None
|
||||
|
||||
if not resolved_hash:
|
||||
log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = Store(config)[str(store_name)]
|
||||
except Exception as exc:
|
||||
log(f"[add_tag] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
existing_tag, _src = backend.get_tag(resolved_hash, config=config)
|
||||
except Exception:
|
||||
existing_tag = []
|
||||
|
||||
existing_tag_list = [t for t in (existing_tag or []) if isinstance(t, str)]
|
||||
existing_lower = {t.lower() for t in existing_tag_list}
|
||||
original_title = _extract_title_tag(existing_tag_list)
|
||||
|
||||
# Per-item tag list (do not mutate shared list)
|
||||
item_tag_to_add = list(tag_to_add)
|
||||
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
|
||||
|
||||
# Handle -duplicate logic (copy existing tag to new namespaces)
|
||||
if duplicate_arg:
|
||||
parts = str(duplicate_arg).split(':')
|
||||
source_ns = ""
|
||||
targets: list[str] = []
|
||||
|
||||
if len(parts) > 1:
|
||||
source_ns = parts[0]
|
||||
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
|
||||
else:
|
||||
parts2 = str(duplicate_arg).split(',')
|
||||
if len(parts2) > 1:
|
||||
source_ns = parts2[0]
|
||||
targets = [t.strip() for t in parts2[1:] if t.strip()]
|
||||
|
||||
if source_ns and targets:
|
||||
source_prefix = source_ns.lower() + ":"
|
||||
for t in existing_tag_list:
|
||||
if not t.lower().startswith(source_prefix):
|
||||
continue
|
||||
value = t.split(":", 1)[1]
|
||||
for target_ns in targets:
|
||||
new_tag = f"{target_ns}:{value}"
|
||||
if new_tag.lower() not in existing_lower:
|
||||
item_tag_to_add.append(new_tag)
|
||||
|
||||
# Namespace replacement: delete old namespace:* when adding namespace:value
|
||||
removed_namespace_tag: list[str] = []
|
||||
for new_tag in item_tag_to_add:
|
||||
if not isinstance(new_tag, str) or ":" not in new_tag:
|
||||
continue
|
||||
ns = new_tag.split(":", 1)[0].strip()
|
||||
if not ns:
|
||||
continue
|
||||
ns_prefix = ns.lower() + ":"
|
||||
for t in existing_tag_list:
|
||||
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
|
||||
removed_namespace_tag.append(t)
|
||||
|
||||
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
|
||||
|
||||
actual_tag_to_add = [t for t in item_tag_to_add if isinstance(t, str) and t.lower() not in existing_lower]
|
||||
|
||||
changed = False
|
||||
if removed_namespace_tag:
|
||||
try:
|
||||
ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
|
||||
if ok_del:
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr)
|
||||
|
||||
if actual_tag_to_add:
|
||||
try:
|
||||
ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
|
||||
if ok_add:
|
||||
changed = True
|
||||
else:
|
||||
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
|
||||
|
||||
if changed:
|
||||
total_added += len(actual_tag_to_add)
|
||||
total_modified += 1
|
||||
|
||||
try:
|
||||
refreshed_tag, _src2 = backend.get_tag(resolved_hash, config=config)
|
||||
refreshed_list = [t for t in (refreshed_tag or []) if isinstance(t, str)]
|
||||
except Exception:
|
||||
refreshed_list = existing_tag_list
|
||||
|
||||
# Update the result's tag using canonical field
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.tag = refreshed_list
|
||||
elif isinstance(res, dict):
|
||||
res["tag"] = refreshed_list
|
||||
|
||||
final_title = _extract_title_tag(refreshed_list)
|
||||
_apply_title_to_result(res, final_title)
|
||||
|
||||
if final_title and (not original_title or final_title.lower() != original_title.lower()):
|
||||
_refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path)
|
||||
|
||||
if changed:
|
||||
_refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config)
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(
|
||||
f"[add_tag] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Add_Tag()
|
||||
456
cmdlet/add_tags.py
Normal file
456
cmdlet/add_tags.py
Normal file
@@ -0,0 +1,456 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import normalize_result_input, filter_results_by_temp
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_tag_arguments,
|
||||
expand_tag_groups,
|
||||
parse_cmdlet_args,
|
||||
collapse_namespace_tags,
|
||||
should_show_help,
|
||||
get_field,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
def _extract_title_tag(tags: List[str]) -> Optional[str]:
|
||||
"""Return the value of the first title: tag if present."""
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith("title:"):
|
||||
value = tag.split(":", 1)[1].strip()
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
|
||||
"""Update result object/dict title fields and columns in-place."""
|
||||
if not title_value:
|
||||
return
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.title = title_value
|
||||
# Update columns if present (Title column assumed index 0)
|
||||
if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
|
||||
label, *_ = res.columns[0]
|
||||
if str(label).lower() == "title":
|
||||
res.columns[0] = (res.columns[0][0], title_value)
|
||||
elif isinstance(res, dict):
|
||||
res["title"] = title_value
|
||||
cols = res.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, val = col
|
||||
if str(label).lower() == "title":
|
||||
updated.append((label, title_value))
|
||||
changed = True
|
||||
else:
|
||||
updated.append(col)
|
||||
else:
|
||||
updated.append(col)
|
||||
if changed:
|
||||
res["columns"] = updated
|
||||
|
||||
|
||||
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
|
||||
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
|
||||
|
||||
def norm(val: Any) -> Optional[str]:
|
||||
return str(val).lower() if val is not None else None
|
||||
|
||||
target_hash_l = target_hash.lower() if target_hash else None
|
||||
target_path_l = target_path.lower() if target_path else None
|
||||
|
||||
if isinstance(item, dict):
|
||||
hashes = [norm(item.get("hash"))]
|
||||
paths = [norm(item.get("path"))]
|
||||
else:
|
||||
hashes = [norm(get_field(item, "hash"))]
|
||||
paths = [norm(get_field(item, "path"))]
|
||||
|
||||
if target_hash_l and target_hash_l in hashes:
|
||||
return True
|
||||
if target_path_l and target_path_l in paths:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _update_item_title_fields(item: Any, new_title: str) -> None:
|
||||
"""Mutate an item to reflect a new title in plain fields and columns."""
|
||||
if isinstance(item, models.PipeObject):
|
||||
item.title = new_title
|
||||
if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
|
||||
label, *_ = item.columns[0]
|
||||
if str(label).lower() == "title":
|
||||
item.columns[0] = (label, new_title)
|
||||
elif isinstance(item, dict):
|
||||
item["title"] = new_title
|
||||
cols = item.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated_cols = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, val = col
|
||||
if str(label).lower() == "title":
|
||||
updated_cols.append((label, new_title))
|
||||
changed = True
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
if changed:
|
||||
item["columns"] = updated_cols
|
||||
|
||||
|
||||
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
|
||||
"""Refresh the cached result table with an updated title and redisplay it."""
|
||||
try:
|
||||
last_table = ctx.get_last_result_table()
|
||||
items = ctx.get_last_result_items()
|
||||
if not last_table or not items:
|
||||
return
|
||||
|
||||
updated_items = []
|
||||
match_found = False
|
||||
for item in items:
|
||||
try:
|
||||
if _matches_target(item, target_hash, target_path):
|
||||
_update_item_title_fields(item, new_title)
|
||||
match_found = True
|
||||
except Exception:
|
||||
pass
|
||||
updated_items.append(item)
|
||||
if not match_found:
|
||||
return
|
||||
|
||||
from result_table import ResultTable # Local import to avoid circular dependency
|
||||
|
||||
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
|
||||
|
||||
for item in updated_items:
|
||||
new_table.add_result(item)
|
||||
|
||||
# Keep the underlying history intact; update only the overlay so @.. can
|
||||
# clear the overlay then continue back to prior tables (e.g., the search list).
|
||||
ctx.set_last_result_table_overlay(new_table, updated_items)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
|
||||
try:
|
||||
from cmdlet import get_tag as get_tag_cmd # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not target_hash or not store_name:
|
||||
return
|
||||
|
||||
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject and _matches_target(subject, target_hash, target_path):
|
||||
get_tag_cmd._run(subject, refresh_args, config)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
get_tag_cmd._run(res, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Add_Tag(Cmdlet):
|
||||
"""Class-based add-tags cmdlet with Cmdlet metadata inheritance."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-tags",
|
||||
summary="Add tags to a file in a store.",
|
||||
usage="add-tags -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
|
||||
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
|
||||
"- Requires a store backend: use -store or pipe items that include store.",
|
||||
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
|
||||
"- Multiple tags can be comma-separated or space-separated.",
|
||||
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
|
||||
"- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"",
|
||||
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
|
||||
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
|
||||
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||
"- The source namespace must already exist in the file being tagged.",
|
||||
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add tags to a file with smart filtering for pipeline results."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Check for --all flag
|
||||
include_temp = parsed.get("all", False)
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
# Filter by temp status (unless --all is set)
|
||||
if not include_temp:
|
||||
results = filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
if not results:
|
||||
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get tags from arguments (or fallback to pipeline payload)
|
||||
raw_tags = parsed.get("tags", [])
|
||||
if isinstance(raw_tags, str):
|
||||
raw_tags = [raw_tags]
|
||||
|
||||
# Fallback: if no tags provided explicitly, try to pull from first result payload
|
||||
if not raw_tags and results:
|
||||
first = results[0]
|
||||
payload_tags = None
|
||||
|
||||
# Try multiple tag lookup strategies in order
|
||||
tag_lookups = [
|
||||
lambda x: getattr(x, "tags", None),
|
||||
lambda x: x.get("tags") if isinstance(x, dict) else None,
|
||||
]
|
||||
|
||||
for lookup in tag_lookups:
|
||||
try:
|
||||
payload_tags = lookup(first)
|
||||
if payload_tags:
|
||||
break
|
||||
except (AttributeError, TypeError, KeyError):
|
||||
continue
|
||||
|
||||
if payload_tags:
|
||||
if isinstance(payload_tags, str):
|
||||
raw_tags = [payload_tags]
|
||||
elif isinstance(payload_tags, list):
|
||||
raw_tags = payload_tags
|
||||
|
||||
# Handle -list argument (convert to {list} syntax)
|
||||
list_arg = parsed.get("list")
|
||||
if list_arg:
|
||||
for l in list_arg.split(','):
|
||||
l = l.strip()
|
||||
if l:
|
||||
raw_tags.append(f"{{{l}}}")
|
||||
|
||||
# Parse and expand tags
|
||||
tags_to_add = parse_tag_arguments(raw_tags)
|
||||
tags_to_add = expand_tag_groups(tags_to_add)
|
||||
|
||||
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
|
||||
extracted_hash = None
|
||||
filtered_tags: List[str] = []
|
||||
for tag in tags_to_add:
|
||||
if isinstance(tag, str) and tag.lower().startswith("hash:"):
|
||||
_, _, hash_val = tag.partition(":")
|
||||
if hash_val:
|
||||
extracted_hash = normalize_hash(hash_val.strip())
|
||||
continue
|
||||
filtered_tags.append(tag)
|
||||
tags_to_add = filtered_tags
|
||||
|
||||
if not tags_to_add:
|
||||
log("No tags provided to add", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get other flags (hash override can come from -hash or hash: token)
|
||||
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
|
||||
duplicate_arg = parsed.get("duplicate")
|
||||
|
||||
# Tags ARE provided - apply them to each store-backed result
|
||||
total_added = 0
|
||||
total_modified = 0
|
||||
|
||||
store_override = parsed.get("store")
|
||||
|
||||
for res in results:
|
||||
store_name: Optional[str]
|
||||
raw_hash: Optional[str]
|
||||
raw_path: Optional[str]
|
||||
|
||||
if isinstance(res, models.PipeObject):
|
||||
store_name = store_override or res.store
|
||||
raw_hash = res.hash
|
||||
raw_path = res.path
|
||||
elif isinstance(res, dict):
|
||||
store_name = store_override or res.get("store")
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
else:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
if not store_name:
|
||||
log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
|
||||
if not resolved_hash and raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
resolved_hash = stem.lower()
|
||||
elif p.exists() and p.is_file():
|
||||
resolved_hash = sha256_file(p)
|
||||
except Exception:
|
||||
resolved_hash = None
|
||||
|
||||
if not resolved_hash:
|
||||
log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = Store(config)[str(store_name)]
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
existing_tags, _src = backend.get_tag(resolved_hash, config=config)
|
||||
except Exception:
|
||||
existing_tags = []
|
||||
|
||||
existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)]
|
||||
existing_lower = {t.lower() for t in existing_tags_list}
|
||||
original_title = _extract_title_tag(existing_tags_list)
|
||||
|
||||
# Per-item tag list (do not mutate shared list)
|
||||
item_tags_to_add = list(tags_to_add)
|
||||
item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
|
||||
|
||||
# Handle -duplicate logic (copy existing tags to new namespaces)
|
||||
if duplicate_arg:
|
||||
parts = str(duplicate_arg).split(':')
|
||||
source_ns = ""
|
||||
targets: list[str] = []
|
||||
|
||||
if len(parts) > 1:
|
||||
source_ns = parts[0]
|
||||
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
|
||||
else:
|
||||
parts2 = str(duplicate_arg).split(',')
|
||||
if len(parts2) > 1:
|
||||
source_ns = parts2[0]
|
||||
targets = [t.strip() for t in parts2[1:] if t.strip()]
|
||||
|
||||
if source_ns and targets:
|
||||
source_prefix = source_ns.lower() + ":"
|
||||
for t in existing_tags_list:
|
||||
if not t.lower().startswith(source_prefix):
|
||||
continue
|
||||
value = t.split(":", 1)[1]
|
||||
for target_ns in targets:
|
||||
new_tag = f"{target_ns}:{value}"
|
||||
if new_tag.lower() not in existing_lower:
|
||||
item_tags_to_add.append(new_tag)
|
||||
|
||||
# Namespace replacement: delete old namespace:* when adding namespace:value
|
||||
removed_namespace_tags: list[str] = []
|
||||
for new_tag in item_tags_to_add:
|
||||
if not isinstance(new_tag, str) or ":" not in new_tag:
|
||||
continue
|
||||
ns = new_tag.split(":", 1)[0].strip()
|
||||
if not ns:
|
||||
continue
|
||||
ns_prefix = ns.lower() + ":"
|
||||
for t in existing_tags_list:
|
||||
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
|
||||
removed_namespace_tags.append(t)
|
||||
|
||||
removed_namespace_tags = sorted({t for t in removed_namespace_tags})
|
||||
|
||||
actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower]
|
||||
|
||||
changed = False
|
||||
if removed_namespace_tags:
|
||||
try:
|
||||
backend.delete_tag(resolved_hash, removed_namespace_tags, config=config)
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr)
|
||||
|
||||
if actual_tags_to_add:
|
||||
try:
|
||||
backend.add_tag(resolved_hash, actual_tags_to_add, config=config)
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr)
|
||||
|
||||
if changed:
|
||||
total_added += len(actual_tags_to_add)
|
||||
total_modified += 1
|
||||
|
||||
try:
|
||||
refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config)
|
||||
refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)]
|
||||
except Exception:
|
||||
refreshed_list = existing_tags_list
|
||||
|
||||
# Update the result's tags using canonical field
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.tags = refreshed_list
|
||||
elif isinstance(res, dict):
|
||||
res["tags"] = refreshed_list
|
||||
|
||||
final_title = _extract_title_tag(refreshed_list)
|
||||
_apply_title_to_result(res, final_title)
|
||||
|
||||
if final_title and (not original_title or final_title.lower() != original_title.lower()):
|
||||
_refresh_result_table_title(final_title, resolved_hash, raw_path)
|
||||
|
||||
if changed:
|
||||
_refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config)
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(
|
||||
f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Add_Tag()
|
||||
85
cmdlet/add_url.py
Normal file
85
cmdlet/add_url.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
from Store import Store
|
||||
|
||||
|
||||
class Add_Url(Cmdlet):
|
||||
"""Add URL associations to files via hash+store."""
|
||||
|
||||
NAME = "add-url"
|
||||
SUMMARY = "Associate a URL with a file"
|
||||
USAGE = "@1 | add-url <url>"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to associate"),
|
||||
]
|
||||
DETAIL = [
|
||||
"- Associates URL with file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
]
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add URL to file via hash+store backend."""
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Extract hash and store from result or args
|
||||
file_hash = parsed.get("hash") or get_field(result, "hash")
|
||||
store_name = parsed.get("store") or get_field(result, "store")
|
||||
url_arg = parsed.get("url")
|
||||
|
||||
if not file_hash:
|
||||
log("Error: No file hash provided")
|
||||
return 1
|
||||
|
||||
if not store_name:
|
||||
log("Error: No store name provided")
|
||||
return 1
|
||||
|
||||
if not url_arg:
|
||||
log("Error: No URL provided")
|
||||
return 1
|
||||
|
||||
# Normalize hash
|
||||
file_hash = normalize_hash(file_hash)
|
||||
if not file_hash:
|
||||
log("Error: Invalid hash format")
|
||||
return 1
|
||||
|
||||
# Parse url (comma-separated)
|
||||
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
|
||||
if not urls:
|
||||
log("Error: No valid url provided")
|
||||
return 1
|
||||
|
||||
# Get backend and add url
|
||||
try:
|
||||
storage = Store(config)
|
||||
backend = storage[store_name]
|
||||
|
||||
backend.add_url(file_hash, urls)
|
||||
for u in urls:
|
||||
ctx.emit(f"Added URL: {u}")
|
||||
|
||||
return 0
|
||||
|
||||
except KeyError:
|
||||
log(f"Error: Storage backend '{store_name}' not configured")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Error adding URL: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["add-url", "add_url"])(Add_Url)
|
||||
|
||||
|
||||
|
||||
215
cmdlet/catalog.py
Normal file
215
cmdlet/catalog.py
Normal file
@@ -0,0 +1,215 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
from cmdlet import REGISTRY
|
||||
except Exception:
|
||||
REGISTRY = {} # type: ignore
|
||||
|
||||
try:
|
||||
from cmdnat import register_native_commands as _register_native_commands
|
||||
except Exception:
|
||||
_register_native_commands = None
|
||||
|
||||
|
||||
def ensure_registry_loaded() -> None:
|
||||
"""Ensure native commands are registered into REGISTRY (idempotent)."""
|
||||
if _register_native_commands and REGISTRY is not None:
|
||||
try:
|
||||
_register_native_commands(REGISTRY)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _normalize_mod_name(mod_name: str) -> str:
|
||||
"""Normalize a command/module name for import resolution."""
|
||||
normalized = (mod_name or "").strip()
|
||||
if normalized.startswith('.'):
|
||||
normalized = normalized.lstrip('.')
|
||||
normalized = normalized.replace('-', '_')
|
||||
return normalized
|
||||
|
||||
|
||||
def import_cmd_module(mod_name: str):
|
||||
"""Import a cmdlet/native module from cmdnat or cmdlet packages."""
|
||||
normalized = _normalize_mod_name(mod_name)
|
||||
if not normalized:
|
||||
return None
|
||||
for package in ("cmdnat", "cmdlet", None):
|
||||
try:
|
||||
qualified = f"{package}.{normalized}" if package else normalized
|
||||
return import_module(qualified)
|
||||
except ModuleNotFoundError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_arg(arg: Any) -> Dict[str, Any]:
|
||||
"""Convert a CmdletArg/dict into a plain metadata dict."""
|
||||
if isinstance(arg, dict):
|
||||
name = arg.get("name", "")
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": arg.get("type", "string"),
|
||||
"required": bool(arg.get("required", False)),
|
||||
"description": arg.get("description", ""),
|
||||
"choices": arg.get("choices", []) or [],
|
||||
"alias": arg.get("alias", ""),
|
||||
"variadic": arg.get("variadic", False),
|
||||
}
|
||||
|
||||
name = getattr(arg, "name", "") or ""
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": getattr(arg, "type", "string"),
|
||||
"required": bool(getattr(arg, "required", False)),
|
||||
"description": getattr(arg, "description", ""),
|
||||
"choices": getattr(arg, "choices", []) or [],
|
||||
"alias": getattr(arg, "alias", ""),
|
||||
"variadic": getattr(arg, "variadic", False),
|
||||
}
|
||||
|
||||
|
||||
def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
|
||||
ensure_registry_loaded()
|
||||
normalized = cmd_name.replace("-", "_")
|
||||
mod = import_cmd_module(normalized)
|
||||
data = getattr(mod, "CMDLET", None) if mod else None
|
||||
|
||||
if data is None:
|
||||
try:
|
||||
reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
|
||||
if reg_fn:
|
||||
owner_mod = getattr(reg_fn, "__module__", "")
|
||||
if owner_mod:
|
||||
owner = import_module(owner_mod)
|
||||
data = getattr(owner, "CMDLET", None)
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
if not data:
|
||||
return None
|
||||
|
||||
if hasattr(data, "to_dict"):
|
||||
base = data.to_dict()
|
||||
elif isinstance(data, dict):
|
||||
base = data
|
||||
else:
|
||||
base = {}
|
||||
|
||||
name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name
|
||||
aliases = getattr(data, "alias", base.get("alias", [])) or []
|
||||
usage = getattr(data, "usage", base.get("usage", ""))
|
||||
summary = getattr(data, "summary", base.get("summary", ""))
|
||||
details = getattr(data, "detail", base.get("detail", [])) or []
|
||||
args_list = getattr(data, "arg", base.get("arg", [])) or []
|
||||
args = [_normalize_arg(arg) for arg in args_list]
|
||||
|
||||
return {
|
||||
"name": str(name).replace("_", "-").lower(),
|
||||
"aliases": [str(a).replace("_", "-").lower() for a in aliases if a],
|
||||
"usage": usage,
|
||||
"summary": summary,
|
||||
"details": details,
|
||||
"args": args,
|
||||
"raw": data,
|
||||
}
|
||||
|
||||
|
||||
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
|
||||
"""Collect metadata for all registered cmdlet keyed by canonical name."""
|
||||
ensure_registry_loaded()
|
||||
entries: Dict[str, Dict[str, Any]] = {}
|
||||
for reg_name in (REGISTRY or {}).keys():
|
||||
meta = get_cmdlet_metadata(reg_name)
|
||||
canonical = str(reg_name).replace("_", "-").lower()
|
||||
|
||||
if meta:
|
||||
canonical = meta.get("name", canonical)
|
||||
aliases = meta.get("aliases", [])
|
||||
base = entries.get(
|
||||
canonical,
|
||||
{
|
||||
"name": canonical,
|
||||
"aliases": [],
|
||||
"usage": "",
|
||||
"summary": "",
|
||||
"details": [],
|
||||
"args": [],
|
||||
"raw": meta.get("raw"),
|
||||
},
|
||||
)
|
||||
merged_aliases = set(base.get("aliases", [])) | set(aliases)
|
||||
if canonical != reg_name:
|
||||
merged_aliases.add(reg_name)
|
||||
base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical)
|
||||
if not base.get("usage") and meta.get("usage"):
|
||||
base["usage"] = meta["usage"]
|
||||
if not base.get("summary") and meta.get("summary"):
|
||||
base["summary"] = meta["summary"]
|
||||
if not base.get("details") and meta.get("details"):
|
||||
base["details"] = meta["details"]
|
||||
if not base.get("args") and meta.get("args"):
|
||||
base["args"] = meta["args"]
|
||||
if not base.get("raw"):
|
||||
base["raw"] = meta.get("raw")
|
||||
entries[canonical] = base
|
||||
else:
|
||||
entries.setdefault(
|
||||
canonical,
|
||||
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def list_cmdlet_names(include_aliases: bool = True) -> List[str]:
|
||||
"""Return sorted cmdlet names (optionally including aliases)."""
|
||||
ensure_registry_loaded()
|
||||
entries = list_cmdlet_metadata()
|
||||
names = set()
|
||||
for meta in entries.values():
|
||||
names.add(meta.get("name", ""))
|
||||
if include_aliases:
|
||||
for alias in meta.get("aliases", []):
|
||||
names.add(alias)
|
||||
return sorted(n for n in names if n)
|
||||
|
||||
|
||||
def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
|
||||
"""Return flag variants for cmdlet arguments (e.g., -name/--name)."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
|
||||
# Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
|
||||
# completions feel stable and predictable.
|
||||
flags: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for arg in meta.get("args", []):
|
||||
name = str(arg.get("name") or "").strip().lstrip("-")
|
||||
if not name:
|
||||
continue
|
||||
for candidate in (f"-{name}", f"--{name}"):
|
||||
if candidate not in seen:
|
||||
flags.append(candidate)
|
||||
seen.add(candidate)
|
||||
|
||||
return flags
|
||||
|
||||
|
||||
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
|
||||
"""Return declared choices for a cmdlet argument."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
target = arg_name.lstrip("-")
|
||||
for arg in meta.get("args", []):
|
||||
if arg.get("name") == target:
|
||||
return list(arg.get("choices", []) or [])
|
||||
return []
|
||||
154
cmdlet/check_file_status.py
Normal file
154
cmdlet/check_file_status.py
Normal file
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="check-file-status",
|
||||
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
|
||||
usage="check-file-status [-hash <sha256>]",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
],
|
||||
detail=[
|
||||
"- Shows whether file is active in Hydrus or marked as deleted",
|
||||
"- Detects corrupted data (e.g., comma-separated url)",
|
||||
"- Displays file metadata and service locations",
|
||||
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
override_hash: str | None = None
|
||||
i = 0
|
||||
while i < len(args):
|
||||
token = args[i]
|
||||
low = str(token).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
i += 1
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
|
||||
if not hash_hex:
|
||||
log("No hash provided and no result selected", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result_data = client.fetch_file_metadata(hashes=[hash_hex])
|
||||
if not result_data.get("metadata"):
|
||||
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_info = result_data["metadata"][0]
|
||||
|
||||
# Status summary
|
||||
is_deleted = file_info.get("is_deleted", False)
|
||||
is_local = file_info.get("is_local", False)
|
||||
is_trashed = file_info.get("is_trashed", False)
|
||||
|
||||
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
|
||||
log(f"File status: {status_str}", file=sys.stderr)
|
||||
|
||||
# File info
|
||||
log(f"\n📄 File Information:", file=sys.stderr)
|
||||
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
|
||||
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
|
||||
log(f" MIME: {file_info['mime']}", file=sys.stderr)
|
||||
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
|
||||
|
||||
# Service status
|
||||
file_services = file_info.get("file_services", {})
|
||||
current_services = file_services.get("current", {})
|
||||
deleted_services = file_services.get("deleted", {})
|
||||
|
||||
if current_services:
|
||||
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
|
||||
for service_key, service_info in current_services.items():
|
||||
sname = service_info.get("name", "unknown")
|
||||
stype = service_info.get("type_pretty", "unknown")
|
||||
log(f" - {sname} ({stype})", file=sys.stderr)
|
||||
|
||||
if deleted_services:
|
||||
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
|
||||
for service_key, service_info in deleted_services.items():
|
||||
sname = service_info.get("name", "unknown")
|
||||
stype = service_info.get("type_pretty", "unknown")
|
||||
time_deleted = service_info.get("time_deleted", "?")
|
||||
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
|
||||
|
||||
# URL check
|
||||
url = file_info.get("url", [])
|
||||
log(f"\n🔗 url ({len(url)}):", file=sys.stderr)
|
||||
|
||||
corrupted_count = 0
|
||||
for i, url in enumerate(url, 1):
|
||||
if "," in url:
|
||||
corrupted_count += 1
|
||||
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
|
||||
else:
|
||||
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
|
||||
|
||||
if corrupted_count > 0:
|
||||
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
|
||||
|
||||
# Tags
|
||||
tags_dict = file_info.get("tags", {})
|
||||
total_tags = 0
|
||||
for service_key, service_data in tags_dict.items():
|
||||
service_name = service_data.get("name", "unknown")
|
||||
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||
total_tags += len(display_tags)
|
||||
|
||||
if total_tags > 0:
|
||||
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
|
||||
for service_key, service_data in tags_dict.items():
|
||||
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||
if display_tags:
|
||||
service_name = service_data.get("name", "unknown")
|
||||
log(f" {service_name}:", file=sys.stderr)
|
||||
for tag in display_tags[:5]: # Show first 5
|
||||
log(f" - {tag}", file=sys.stderr)
|
||||
if len(display_tags) > 5:
|
||||
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
|
||||
|
||||
log("\n", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error checking file status: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.alias = ["check-status", "file-status", "status"]
|
||||
CMDLET.register()
|
||||
105
cmdlet/cleanup.py
Normal file
105
cmdlet/cleanup.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
|
||||
|
||||
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
|
||||
then emits the remaining non-temporary results for further pipeline stages.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
|
||||
import models
|
||||
import pipeline as pipeline_context
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Remove temporary files from pipeline results.
|
||||
|
||||
Accepts:
|
||||
- Single result object with is_temp field
|
||||
- List of result objects to clean up
|
||||
|
||||
Process:
|
||||
- Filters results by is_temp=True
|
||||
- Deletes those files from disk
|
||||
- Emits only non-temporary results
|
||||
|
||||
Typical pipeline usage:
|
||||
download-data url | screen-shot | add-tag -store local "tag" --all | cleanup
|
||||
"""
|
||||
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
if not results:
|
||||
log("[cleanup] No results to process", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Separate temporary and permanent results
|
||||
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
|
||||
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
# Delete temporary files
|
||||
deleted_count = 0
|
||||
for temp_result in temp_results:
|
||||
try:
|
||||
file_path = get_pipe_object_path(temp_result)
|
||||
|
||||
if file_path:
|
||||
path_obj = Path(file_path)
|
||||
if path_obj.exists():
|
||||
# Delete the file
|
||||
path_obj.unlink()
|
||||
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
|
||||
deleted_count += 1
|
||||
|
||||
# Clean up any associated sidecar files
|
||||
for ext in ['.tag', '.metadata']:
|
||||
sidecar = path_obj.parent / (path_obj.name + ext)
|
||||
if sidecar.exists():
|
||||
try:
|
||||
sidecar.unlink()
|
||||
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
|
||||
else:
|
||||
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
|
||||
|
||||
# Log summary
|
||||
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
|
||||
|
||||
# Emit permanent results for downstream processing
|
||||
for perm_result in perm_results:
|
||||
pipeline_context.emit(perm_result)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="cleanup",
|
||||
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
|
||||
usage="cleanup",
|
||||
arg=[],
|
||||
detail=[
|
||||
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
|
||||
"- Deletes files marked with is_temp=True from disk",
|
||||
"- Also cleans up associated sidecar files (.tag, .metadata)",
|
||||
"- Emits only non-temporary results for further processing",
|
||||
"- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
|
||||
"- Exit code 0 if cleanup successful, 1 if no results to process",
|
||||
],
|
||||
exec=_run,
|
||||
).register()
|
||||
|
||||
242
cmdlet/delete_file.py
Normal file
242
cmdlet/delete_file.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from SYS.logger import debug, log
|
||||
from Store.Folder import Folder
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
import pipeline as ctx
|
||||
|
||||
|
||||
class Delete_File(Cmdlet):
|
||||
"""Class-based delete-file cmdlet with self-registration."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="delete-file",
|
||||
summary="Delete a file locally and/or from Hydrus, including database entries.",
|
||||
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
|
||||
alias=["del-file"],
|
||||
arg=[
|
||||
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
|
||||
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
|
||||
CmdletArg("reason", description="Optional reason for deletion (free text)."),
|
||||
],
|
||||
detail=[
|
||||
"Default removes both the local file and Hydrus file.",
|
||||
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
|
||||
"Database entries are automatically cleaned up for local files.",
|
||||
"Any remaining arguments are treated as the Hydrus reason text.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def _process_single_item(self, item: Any, override_hash: str | None, conserve: str | None,
|
||||
lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool:
|
||||
"""Process deletion for a single item."""
|
||||
# Handle item as either dict or object
|
||||
if isinstance(item, dict):
|
||||
hash_hex_raw = item.get("hash_hex") or item.get("hash")
|
||||
target = item.get("target") or item.get("file_path") or item.get("path")
|
||||
else:
|
||||
hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash")
|
||||
target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path")
|
||||
|
||||
store = None
|
||||
if isinstance(item, dict):
|
||||
store = item.get("store")
|
||||
else:
|
||||
store = get_field(item, "store")
|
||||
|
||||
store_lower = str(store).lower() if store else ""
|
||||
is_hydrus_store = bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"})
|
||||
|
||||
# For Hydrus files, the target IS the hash
|
||||
if is_hydrus_store and not hash_hex_raw:
|
||||
hash_hex_raw = target
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
|
||||
|
||||
local_deleted = False
|
||||
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
|
||||
|
||||
if conserve != "local" and local_target:
|
||||
path = Path(str(target))
|
||||
|
||||
# If lib_root is provided and this is from a folder store, use the Folder class
|
||||
if lib_root:
|
||||
try:
|
||||
folder = Folder(Path(lib_root), name=store or "local")
|
||||
if folder.delete_file(str(path)):
|
||||
local_deleted = True
|
||||
ctx.emit(f"Removed file: {path.name}")
|
||||
log(f"Deleted: {path.name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
debug(f"Folder.delete_file failed: {exc}", file=sys.stderr)
|
||||
# Fallback to manual deletion
|
||||
try:
|
||||
if path.exists() and path.is_file():
|
||||
path.unlink()
|
||||
local_deleted = True
|
||||
ctx.emit(f"Removed local file: {path}")
|
||||
log(f"Deleted: {path.name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Local delete failed: {exc}", file=sys.stderr)
|
||||
else:
|
||||
# No lib_root, just delete the file
|
||||
try:
|
||||
if path.exists() and path.is_file():
|
||||
path.unlink()
|
||||
local_deleted = True
|
||||
ctx.emit(f"Removed local file: {path}")
|
||||
log(f"Deleted: {path.name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Local delete failed: {exc}", file=sys.stderr)
|
||||
|
||||
# Remove common sidecars regardless of file removal success
|
||||
for sidecar in (
|
||||
path.with_suffix(".tag"),
|
||||
path.with_suffix(".metadata"),
|
||||
path.with_suffix(".notes"),
|
||||
):
|
||||
try:
|
||||
if sidecar.exists() and sidecar.is_file():
|
||||
sidecar.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
hydrus_deleted = False
|
||||
should_try_hydrus = is_hydrus_store
|
||||
|
||||
# If conserve is set to hydrus, definitely don't delete
|
||||
if conserve == "hydrus":
|
||||
should_try_hydrus = False
|
||||
|
||||
if should_try_hydrus and hash_hex:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return False
|
||||
else:
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return False
|
||||
else:
|
||||
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||
if reason:
|
||||
payload["reason"] = reason
|
||||
try:
|
||||
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||
hydrus_deleted = True
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
# If it's not in Hydrus (e.g. 404 or similar), that's fine
|
||||
if not local_deleted:
|
||||
return False
|
||||
|
||||
if hydrus_deleted and hash_hex:
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
if reason:
|
||||
ctx.emit(f"Deleted {preview} (reason: {reason}).")
|
||||
else:
|
||||
ctx.emit(f"Deleted {preview}.")
|
||||
|
||||
if hydrus_deleted or local_deleted:
|
||||
return True
|
||||
|
||||
log("Selected result has neither Hydrus hash nor local file target")
|
||||
return False
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Execute delete-file command."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
override_hash: str | None = None
|
||||
conserve: str | None = None
|
||||
lib_root: str | None = None
|
||||
reason_tokens: list[str] = []
|
||||
i = 0
|
||||
|
||||
while i < len(args):
|
||||
token = args[i]
|
||||
low = str(token).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
|
||||
value = str(args[i + 1]).strip().lower()
|
||||
if value in {"local", "hydrus"}:
|
||||
conserve = value
|
||||
i += 2
|
||||
continue
|
||||
if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
|
||||
lib_root = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
reason_tokens.append(token)
|
||||
i += 1
|
||||
|
||||
# If no lib_root provided, try to get the first folder store from config
|
||||
if not lib_root:
|
||||
try:
|
||||
storage_config = config.get("storage", {})
|
||||
folder_config = storage_config.get("folder", {})
|
||||
if folder_config:
|
||||
# Get first folder store path
|
||||
for store_name, store_config in folder_config.items():
|
||||
if isinstance(store_config, dict):
|
||||
path = store_config.get("path")
|
||||
if path:
|
||||
lib_root = path
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
|
||||
|
||||
items = []
|
||||
if isinstance(result, list):
|
||||
items = result
|
||||
elif result:
|
||||
items = [result]
|
||||
|
||||
if not items:
|
||||
log("No items to delete", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
success_count = 0
|
||||
for item in items:
|
||||
if self._process_single_item(item, override_hash, conserve, lib_root, reason, config):
|
||||
success_count += 1
|
||||
|
||||
if success_count > 0:
|
||||
# Clear cached tables/items so deleted entries are not redisplayed
|
||||
try:
|
||||
ctx.set_last_result_table_overlay(None, None, None)
|
||||
ctx.set_last_result_table(None, [])
|
||||
ctx.set_last_result_items_only([])
|
||||
ctx.set_current_stage_table(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return 0 if success_count > 0 else 1
|
||||
|
||||
|
||||
# Instantiate and register the cmdlet
|
||||
Delete_File()
|
||||
|
||||
|
||||
140
cmdlet/delete_note.py
Normal file
140
cmdlet/delete_note.py
Normal file
@@ -0,0 +1,140 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
get_field,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Delete_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="delete-note",
|
||||
summary="Delete a named note from a file in a store.",
|
||||
usage="delete-note -store <store> [-hash <sha256>] <name>",
|
||||
alias=["del-note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
|
||||
],
|
||||
detail=[
|
||||
"- Deletes the named note from the selected store backend.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
note_name_override = str(parsed.get("name") or "").strip()
|
||||
# Allow piping note rows from get-note: the selected item carries note_name.
|
||||
inferred_note_name = str(get_field(result, "note_name") or "").strip()
|
||||
if not note_name_override and not inferred_note_name:
|
||||
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
deleted = 0
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
# Resolve which note name to delete for this item.
|
||||
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
|
||||
if not note_name:
|
||||
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[delete_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
ok = False
|
||||
try:
|
||||
ok = bool(backend.delete_note(resolved_hash, note_name, config=config))
|
||||
except Exception as exc:
|
||||
log(f"[delete_note] Error: Failed to delete note: {exc}", file=sys.stderr)
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
deleted += 1
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(f"[delete_note] Deleted note on {deleted} item(s)", file=sys.stderr)
|
||||
return 0 if deleted > 0 else 1
|
||||
|
||||
|
||||
CMDLET = Delete_Note()
|
||||
216
cmdlet/delete_relationship.py
Normal file
216
cmdlet/delete_relationship.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""Delete file relationships."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help
|
||||
from API.folder import LocalLibrarySearchOptimizer
|
||||
from config import get_local_storage_path
|
||||
|
||||
|
||||
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh relationships via get-relationship."""
|
||||
try:
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is None:
|
||||
return
|
||||
|
||||
def norm(val: Any) -> str:
|
||||
return str(val).lower()
|
||||
|
||||
target_hashes = [norm(v) for v in [target_hash, other] if v]
|
||||
target_paths = [norm(v) for v in [target_path, other] if v]
|
||||
|
||||
subj_hashes: list[str] = []
|
||||
subj_paths: list[str] = []
|
||||
for field in ("hydrus_hash", "hash", "hash_hex", "file_hash"):
|
||||
val = get_field(subject, field)
|
||||
if val:
|
||||
subj_hashes.append(norm(val))
|
||||
for field in ("file_path", "path", "target"):
|
||||
val = get_field(subject, field)
|
||||
if val:
|
||||
subj_paths.append(norm(val))
|
||||
|
||||
is_match = False
|
||||
if target_hashes and any(h in subj_hashes for h in target_hashes):
|
||||
is_match = True
|
||||
if target_paths and any(p in subj_paths for p in target_paths):
|
||||
is_match = True
|
||||
if not is_match:
|
||||
return
|
||||
|
||||
refresh_args: list[str] = []
|
||||
if target_hash:
|
||||
refresh_args.extend(["-hash", target_hash])
|
||||
|
||||
cmd = get_cmdlet("get-relationship")
|
||||
if not cmd:
|
||||
return
|
||||
cmd(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Delete relationships from files.
|
||||
|
||||
Args:
|
||||
result: Input result(s) from previous cmdlet
|
||||
args: Command arguments
|
||||
config: CLI configuration
|
||||
|
||||
Returns:
|
||||
Exit code (0 = success)
|
||||
"""
|
||||
try:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed_args = parse_cmdlet_args(args, CMDLET)
|
||||
delete_all_flag = parsed_args.get("all", False)
|
||||
rel_type_filter = parsed_args.get("type")
|
||||
|
||||
# Get storage path
|
||||
local_storage_path = get_local_storage_path(config)
|
||||
if not local_storage_path:
|
||||
log("Local storage path not configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Normalize input
|
||||
results = normalize_result_input(result)
|
||||
|
||||
if not results:
|
||||
log("No results to process", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
for single_result in results:
|
||||
try:
|
||||
# Get file path from result
|
||||
file_path_from_result = (
|
||||
get_field(single_result, "file_path")
|
||||
or get_field(single_result, "path")
|
||||
or get_field(single_result, "target")
|
||||
or (str(single_result) if not isinstance(single_result, dict) else None)
|
||||
)
|
||||
|
||||
if not file_path_from_result:
|
||||
log("Could not extract file path from result", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_path_obj = Path(str(file_path_from_result))
|
||||
|
||||
if not file_path_obj.exists():
|
||||
log(f"File not found: {file_path_obj}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
file_id = db.db.get_file_id(file_path_obj)
|
||||
|
||||
if not file_id:
|
||||
log(f"File not in database: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Get current relationships
|
||||
cursor = db.db.connection.cursor()
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
relationships_str = row[0]
|
||||
if not relationships_str:
|
||||
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
try:
|
||||
relationships = json.loads(relationships_str)
|
||||
except json.JSONDecodeError:
|
||||
log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
if not isinstance(relationships, dict):
|
||||
relationships = {}
|
||||
|
||||
# Determine what to delete
|
||||
if delete_all_flag:
|
||||
# Delete all relationships
|
||||
deleted_types = list(relationships.keys())
|
||||
relationships = {}
|
||||
log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr)
|
||||
elif rel_type_filter:
|
||||
# Delete specific type
|
||||
if rel_type_filter in relationships:
|
||||
deleted_count_for_type = len(relationships[rel_type_filter])
|
||||
del relationships[rel_type_filter]
|
||||
log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr)
|
||||
else:
|
||||
log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
else:
|
||||
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Save updated relationships
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (file_id, json.dumps(relationships) if relationships else None))
|
||||
|
||||
db.db.connection.commit()
|
||||
_refresh_relationship_view_if_current(None, str(file_path_obj), None, config)
|
||||
deleted_count += 1
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error deleting relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-relationship",
|
||||
summary="Remove relationships from files.",
|
||||
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
|
||||
arg=[
|
||||
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
|
||||
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
|
||||
],
|
||||
detail=[
|
||||
"- Delete all relationships: pipe files | delete-relationship --all",
|
||||
"- Delete specific type: pipe files | delete-relationship -type alt",
|
||||
"- Delete all from file: delete-relationship -path <file> --all",
|
||||
],
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
335
cmdlet/delete_tag.py
Normal file
335
cmdlet/delete_tag.py
Normal file
@@ -0,0 +1,335 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
|
||||
from SYS.logger import debug, log
|
||||
from Store import Store
|
||||
|
||||
|
||||
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh tags via get-tag."""
|
||||
try:
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
get_tag = None
|
||||
try:
|
||||
get_tag = get_cmdlet("get-tag")
|
||||
except Exception:
|
||||
get_tag = None
|
||||
if not callable(get_tag):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is None:
|
||||
return
|
||||
|
||||
def norm(val: Any) -> str:
|
||||
return str(val).lower()
|
||||
|
||||
target_hash = norm(file_hash) if file_hash else None
|
||||
target_path = norm(path) if path else None
|
||||
|
||||
subj_hashes: list[str] = []
|
||||
subj_paths: list[str] = []
|
||||
if isinstance(subject, dict):
|
||||
subj_hashes = [norm(v) for v in [subject.get("hash")] if v]
|
||||
subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v]
|
||||
else:
|
||||
subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)]
|
||||
subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)]
|
||||
|
||||
is_match = False
|
||||
if target_hash and target_hash in subj_hashes:
|
||||
is_match = True
|
||||
if target_path and target_path in subj_paths:
|
||||
is_match = True
|
||||
if not is_match:
|
||||
return
|
||||
|
||||
refresh_args: list[str] = []
|
||||
if file_hash:
|
||||
refresh_args.extend(["-hash", file_hash])
|
||||
if store_name:
|
||||
refresh_args.extend(["-store", store_name])
|
||||
get_tag(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-tag",
|
||||
summary="Remove tags from a file in a store.",
|
||||
usage="delete-tag -store <store> [-hash <sha256>] <tag>[,<tag>...]",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
|
||||
],
|
||||
detail=[
|
||||
"- Requires a Hydrus file (hash present) or explicit -hash override.",
|
||||
"- Multiple tags can be comma-separated or space-separated.",
|
||||
],
|
||||
)
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
|
||||
has_piped_tag = (result and hasattr(result, '__class__') and
|
||||
result.__class__.__name__ == 'TagItem' and
|
||||
hasattr(result, 'tag_name'))
|
||||
|
||||
# Check if we have a piped list of TagItems (from @N selection)
|
||||
has_piped_tag_list = (isinstance(result, list) and result and
|
||||
hasattr(result[0], '__class__') and
|
||||
result[0].__class__.__name__ == 'TagItem')
|
||||
|
||||
if not args and not has_piped_tag and not has_piped_tag_list:
|
||||
log("Requires at least one tag argument")
|
||||
return 1
|
||||
|
||||
# Parse -hash override and collect tags from remaining args
|
||||
override_hash: str | None = None
|
||||
override_store: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
if low in {"-store", "--store", "store"} and i + 1 < len(args):
|
||||
override_store = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
|
||||
# Check if first argument is @ syntax (result table selection)
|
||||
# @5 or @{2,5,8} to delete tags from ResultTable by index
|
||||
tags_from_at_syntax = []
|
||||
hash_from_at_syntax = None
|
||||
path_from_at_syntax = None
|
||||
store_from_at_syntax = None
|
||||
|
||||
if rest and str(rest[0]).startswith("@"):
|
||||
selector_arg = str(rest[0])
|
||||
pipe_selector = selector_arg[1:].strip()
|
||||
# Parse @N or @{N,M,K} syntax
|
||||
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
|
||||
# @{2,5,8}
|
||||
pipe_selector = pipe_selector[1:-1]
|
||||
try:
|
||||
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
|
||||
except ValueError:
|
||||
log("Invalid selection syntax. Use @2 or @{2,5,8}")
|
||||
return 1
|
||||
|
||||
# Get the last ResultTable from pipeline context
|
||||
try:
|
||||
last_table = ctx._LAST_RESULT_TABLE
|
||||
if last_table:
|
||||
# Extract tags from selected rows
|
||||
for idx in indices:
|
||||
if 1 <= idx <= len(last_table.rows):
|
||||
# Look for a TagItem in _LAST_RESULT_ITEMS by index
|
||||
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
|
||||
item = ctx._LAST_RESULT_ITEMS[idx - 1]
|
||||
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
|
||||
tag_name = get_field(item, 'tag_name')
|
||||
if tag_name:
|
||||
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
|
||||
tags_from_at_syntax.append(tag_name)
|
||||
# Also get hash from first item for consistency
|
||||
if not hash_from_at_syntax:
|
||||
hash_from_at_syntax = get_field(item, 'hash')
|
||||
if not path_from_at_syntax:
|
||||
path_from_at_syntax = get_field(item, 'path')
|
||||
if not store_from_at_syntax:
|
||||
store_from_at_syntax = get_field(item, 'store')
|
||||
|
||||
if not tags_from_at_syntax:
|
||||
log(f"No tags found at indices: {indices}")
|
||||
return 1
|
||||
else:
|
||||
log("No ResultTable in pipeline (use @ after running get-tag)")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
# If we have a list of TagItems, we want to process ALL of them if no args provided
|
||||
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
|
||||
# Also handles: delete-tag @1,2 (where we want to delete tags from multiple files)
|
||||
|
||||
# Normalize result to a list for processing
|
||||
items_to_process = []
|
||||
if isinstance(result, list):
|
||||
items_to_process = result
|
||||
elif result:
|
||||
items_to_process = [result]
|
||||
|
||||
# If we have TagItems and no args, we are deleting the tags themselves
|
||||
# If we have Files (or other objects) and args, we are deleting tags FROM those files
|
||||
|
||||
# Check if we are in "delete selected tags" mode (TagItems)
|
||||
is_tag_item_mode = (items_to_process and hasattr(items_to_process[0], '__class__') and
|
||||
items_to_process[0].__class__.__name__ == 'TagItem')
|
||||
|
||||
if is_tag_item_mode:
|
||||
# Collect all tags to delete from the TagItems
|
||||
# Group by hash/file_path to batch operations if needed, or just process one by one
|
||||
# For simplicity, we'll process one by one or group by file
|
||||
pass
|
||||
else:
|
||||
# "Delete tags from files" mode
|
||||
# We need args (tags to delete)
|
||||
if not args and not tags_from_at_syntax:
|
||||
log("Requires at least one tag argument when deleting from files")
|
||||
return 1
|
||||
|
||||
# Process each item
|
||||
success_count = 0
|
||||
|
||||
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
|
||||
# but we might need the piped result for the file context if @ selection was from a Tag table
|
||||
# Actually, the @ selection logic above already extracted tags.
|
||||
|
||||
if tags_from_at_syntax:
|
||||
# Special case: @ selection of tags.
|
||||
# We already extracted tags and hash/path.
|
||||
# Just run the deletion once using the extracted info.
|
||||
# This preserves the existing logic for @ selection.
|
||||
|
||||
tags = tags_from_at_syntax
|
||||
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
|
||||
path = path_from_at_syntax
|
||||
store_name = override_store or store_from_at_syntax
|
||||
|
||||
if _process_deletion(tags, file_hash, path, store_name, config):
|
||||
success_count += 1
|
||||
|
||||
else:
|
||||
# Process items from pipe (or single result)
|
||||
# If args are provided, they are the tags to delete from EACH item
|
||||
# If items are TagItems and no args, the tag to delete is the item itself
|
||||
|
||||
tags_arg = parse_tag_arguments(rest)
|
||||
|
||||
for item in items_to_process:
|
||||
tags_to_delete = []
|
||||
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
|
||||
item_path = (
|
||||
get_field(item, "path")
|
||||
or get_field(item, "target")
|
||||
)
|
||||
item_store = override_store or get_field(item, "store")
|
||||
|
||||
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
|
||||
# It's a TagItem
|
||||
if tags_arg:
|
||||
# User provided tags to delete FROM this file (ignoring the tag name in the item?)
|
||||
# Or maybe they want to delete the tag in the item AND the args?
|
||||
# Usually if piping TagItems, we delete THOSE tags.
|
||||
# If args are present, maybe we should warn?
|
||||
# For now, if args are present, assume they override or add to the tag item?
|
||||
# Let's assume if args are present, we use args. If not, we use the tag name.
|
||||
tags_to_delete = tags_arg
|
||||
else:
|
||||
tag_name = get_field(item, 'tag_name')
|
||||
if tag_name:
|
||||
tags_to_delete = [tag_name]
|
||||
else:
|
||||
# It's a File or other object
|
||||
if tags_arg:
|
||||
tags_to_delete = tags_arg
|
||||
else:
|
||||
# No tags provided for a file object - skip or error?
|
||||
# We already logged an error if no args and not TagItem mode globally,
|
||||
# but inside the loop we might have mixed items? Unlikely.
|
||||
continue
|
||||
|
||||
if tags_to_delete:
|
||||
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
|
||||
success_count += 1
|
||||
|
||||
if success_count > 0:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool:
|
||||
"""Helper to execute the deletion logic for a single target."""
|
||||
|
||||
if not tags:
|
||||
return False
|
||||
|
||||
if not store_name:
|
||||
log("Store is required (use -store or pipe a result with store)", file=sys.stderr)
|
||||
return False
|
||||
|
||||
resolved_hash = normalize_hash(file_hash) if file_hash else None
|
||||
if not resolved_hash and path:
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
resolved_hash = sha256_file(Path(path))
|
||||
except Exception:
|
||||
resolved_hash = None
|
||||
|
||||
if not resolved_hash:
|
||||
log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def _fetch_existing_tags() -> list[str]:
|
||||
try:
|
||||
backend = Store(config)[store_name]
|
||||
existing, _src = backend.get_tag(resolved_hash, config=config)
|
||||
return list(existing or [])
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
# Safety: only block if this deletion would remove the final title tag
|
||||
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
|
||||
if title_tags:
|
||||
existing_tags = _fetch_existing_tags()
|
||||
current_titles = [t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")]
|
||||
del_title_set = {t.lower() for t in title_tags}
|
||||
remaining_titles = [t for t in current_titles if t.lower() not in del_title_set]
|
||||
if current_titles and not remaining_titles:
|
||||
log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
backend = Store(config)[store_name]
|
||||
ok = backend.delete_tag(resolved_hash, list(tags), config=config)
|
||||
if ok:
|
||||
preview = resolved_hash[:12] + ('…' if len(resolved_hash) > 12 else '')
|
||||
debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.")
|
||||
_refresh_tag_view_if_current(resolved_hash, store_name, path, config)
|
||||
return True
|
||||
return False
|
||||
except Exception as exc:
|
||||
log(f"del-tag failed: {exc}")
|
||||
return False
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
|
||||
82
cmdlet/delete_url.py
Normal file
82
cmdlet/delete_url.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
from Store import Store
|
||||
|
||||
|
||||
class Delete_Url(Cmdlet):
|
||||
"""Delete URL associations from files via hash+store."""
|
||||
|
||||
NAME = "delete-url"
|
||||
SUMMARY = "Remove a URL association from a file"
|
||||
USAGE = "@1 | delete-url <url>"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to remove"),
|
||||
]
|
||||
DETAIL = [
|
||||
"- Removes URL association from file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
]
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Delete URL from file via hash+store backend."""
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Extract hash and store from result or args
|
||||
file_hash = parsed.get("hash") or get_field(result, "hash")
|
||||
store_name = parsed.get("store") or get_field(result, "store")
|
||||
url_arg = parsed.get("url")
|
||||
|
||||
if not file_hash:
|
||||
log("Error: No file hash provided")
|
||||
return 1
|
||||
|
||||
if not store_name:
|
||||
log("Error: No store name provided")
|
||||
return 1
|
||||
|
||||
if not url_arg:
|
||||
log("Error: No URL provided")
|
||||
return 1
|
||||
|
||||
# Normalize hash
|
||||
file_hash = normalize_hash(file_hash)
|
||||
if not file_hash:
|
||||
log("Error: Invalid hash format")
|
||||
return 1
|
||||
|
||||
# Parse url (comma-separated)
|
||||
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
|
||||
if not urls:
|
||||
log("Error: No valid url provided")
|
||||
return 1
|
||||
|
||||
# Get backend and delete url
|
||||
try:
|
||||
storage = Store(config)
|
||||
backend = storage[store_name]
|
||||
|
||||
backend.delete_url(file_hash, urls)
|
||||
for u in urls:
|
||||
ctx.emit(f"Deleted URL: {u}")
|
||||
|
||||
return 0
|
||||
|
||||
except KeyError:
|
||||
log(f"Error: Storage backend '{store_name}' not configured")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Error deleting URL: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["delete-url", "del-url", "delete_url"])(Delete_Url)
|
||||
301
cmdlet/download_file.py
Normal file
301
cmdlet/download_file.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""Generic file downloader.
|
||||
|
||||
Supports:
|
||||
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
|
||||
- Piped provider items (uses provider.download when available)
|
||||
|
||||
No streaming site logic; use download-media for yt-dlp/streaming.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
from SYS.download import DownloadError, _download_direct_file
|
||||
from SYS.logger import log, debug
|
||||
import pipeline as pipeline_context
|
||||
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
parse_cmdlet_args,
|
||||
register_url_with_local_library,
|
||||
coerce_to_pipe_object,
|
||||
get_field,
|
||||
)
|
||||
|
||||
|
||||
class Download_File(Cmdlet):
|
||||
"""Class-based download-file cmdlet - direct HTTP downloads."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize download-file cmdlet."""
|
||||
super().__init__(
|
||||
name="download-file",
|
||||
summary="Download files via HTTP or provider handlers",
|
||||
usage="download-file <url> [options] OR @N | download-file [options]",
|
||||
alias=["dl-file", "download-http"],
|
||||
arg=[
|
||||
CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"),
|
||||
SharedArgs.URL,
|
||||
|
||||
],
|
||||
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main execution method."""
|
||||
stage_ctx = pipeline_context.get_stage_context()
|
||||
in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
|
||||
if in_pipeline and isinstance(config, dict):
|
||||
config["_quiet_background_output"] = True
|
||||
return self._run_impl(result, args, config)
|
||||
|
||||
def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main download implementation for direct HTTP files."""
|
||||
try:
|
||||
debug("Starting download-file")
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Extract explicit URL args (if any)
|
||||
raw_url = parsed.get("url", [])
|
||||
if isinstance(raw_url, str):
|
||||
raw_url = [raw_url]
|
||||
|
||||
# If no URL args were provided, fall back to piped results (provider items)
|
||||
piped_items: List[Any] = []
|
||||
if not raw_url:
|
||||
if isinstance(result, list):
|
||||
piped_items = result
|
||||
elif result:
|
||||
piped_items = [result]
|
||||
|
||||
if not raw_url and not piped_items:
|
||||
log("No url or piped items to download", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get output directory
|
||||
final_output_dir = self._resolve_output_dir(parsed, config)
|
||||
if not final_output_dir:
|
||||
return 1
|
||||
|
||||
debug(f"Output directory: {final_output_dir}")
|
||||
|
||||
# Download each URL and/or provider item
|
||||
downloaded_count = 0
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
|
||||
# Provider lookup is optional; keep import local to avoid overhead if unused
|
||||
get_search_provider = None
|
||||
SearchResult = None
|
||||
try:
|
||||
from ProviderCore.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
|
||||
|
||||
get_search_provider = _get_search_provider
|
||||
SearchResult = _SearchResult
|
||||
except Exception:
|
||||
get_search_provider = None
|
||||
SearchResult = None
|
||||
|
||||
def _emit_local_file(downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]]) -> None:
|
||||
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
|
||||
hash_value = self._compute_file_hash(downloaded_path)
|
||||
tag: List[str] = []
|
||||
if tags_hint:
|
||||
tag.extend([str(t) for t in tags_hint if t])
|
||||
if not any(str(t).lower().startswith("title:") for t in tag):
|
||||
tag.insert(0, f"title:{title_val}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"path": str(downloaded_path),
|
||||
"hash": hash_value,
|
||||
"title": title_val,
|
||||
"action": "cmdlet:download-file",
|
||||
"download_mode": "file",
|
||||
"store": "local",
|
||||
"media_kind": media_kind_hint or "file",
|
||||
"tag": tag,
|
||||
}
|
||||
if full_metadata:
|
||||
payload["full_metadata"] = full_metadata
|
||||
if source and str(source).startswith("http"):
|
||||
payload["url"] = source
|
||||
elif source:
|
||||
payload["source_url"] = source
|
||||
|
||||
pipeline_context.emit(payload)
|
||||
|
||||
# Automatically register url with local library
|
||||
if payload.get("url"):
|
||||
pipe_obj = coerce_to_pipe_object(payload)
|
||||
register_url_with_local_library(pipe_obj, config)
|
||||
|
||||
# 1) Explicit URL downloads
|
||||
for url in raw_url:
|
||||
try:
|
||||
debug(f"Processing URL: {url}")
|
||||
|
||||
result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode)
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
downloaded_path = Path(str(file_path))
|
||||
|
||||
_emit_local_file(
|
||||
downloaded_path=downloaded_path,
|
||||
source=url,
|
||||
title_hint=downloaded_path.stem,
|
||||
tags_hint=[f"title:{downloaded_path.stem}"],
|
||||
media_kind_hint="file",
|
||||
full_metadata=None,
|
||||
)
|
||||
downloaded_count += 1
|
||||
debug("✓ Downloaded and emitted")
|
||||
|
||||
except DownloadError as e:
|
||||
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error processing {url}: {e}", file=sys.stderr)
|
||||
|
||||
# 2) Provider item downloads (piped results)
|
||||
for item in piped_items:
|
||||
try:
|
||||
table = get_field(item, "table")
|
||||
title = get_field(item, "title")
|
||||
target = get_field(item, "path") or get_field(item, "url")
|
||||
media_kind = get_field(item, "media_kind")
|
||||
tags_val = get_field(item, "tag")
|
||||
tags_list: Optional[List[str]]
|
||||
if isinstance(tags_val, list):
|
||||
tags_list = [str(t) for t in tags_val if t]
|
||||
else:
|
||||
tags_list = None
|
||||
|
||||
full_metadata = get_field(item, "full_metadata")
|
||||
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
|
||||
extra_md = item["extra"].get("full_metadata")
|
||||
if isinstance(extra_md, dict):
|
||||
full_metadata = extra_md
|
||||
|
||||
# If this looks like a provider item and providers are available, prefer provider.download()
|
||||
downloaded_path: Optional[Path] = None
|
||||
if table and get_search_provider and SearchResult:
|
||||
provider = get_search_provider(str(table), config)
|
||||
if provider is not None:
|
||||
sr = SearchResult(
|
||||
table=str(table),
|
||||
title=str(title or "Unknown"),
|
||||
path=str(target or ""),
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
|
||||
)
|
||||
debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
|
||||
downloaded_path = provider.download(sr, final_output_dir)
|
||||
|
||||
# Fallback: if we have a direct HTTP URL, download it directly
|
||||
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
|
||||
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
|
||||
result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
downloaded_path = Path(str(file_path))
|
||||
|
||||
if downloaded_path is None:
|
||||
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
_emit_local_file(
|
||||
downloaded_path=downloaded_path,
|
||||
source=str(target) if target else None,
|
||||
title_hint=str(title) if title else downloaded_path.stem,
|
||||
tags_hint=tags_list,
|
||||
media_kind_hint=str(media_kind) if media_kind else None,
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else None,
|
||||
)
|
||||
downloaded_count += 1
|
||||
|
||||
except DownloadError as e:
|
||||
log(f"Download failed: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error downloading item: {e}", file=sys.stderr)
|
||||
|
||||
if downloaded_count > 0:
|
||||
debug(f"✓ Successfully processed {downloaded_count} file(s)")
|
||||
return 0
|
||||
|
||||
log("No downloads completed", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error in download-file: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
||||
"""Resolve the output directory from storage location or config."""
|
||||
output_dir_arg = parsed.get("output")
|
||||
if output_dir_arg:
|
||||
try:
|
||||
out_path = Path(str(output_dir_arg)).expanduser()
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
return out_path
|
||||
except Exception as e:
|
||||
log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
storage_location = parsed.get("storage")
|
||||
|
||||
# Priority 1: --storage flag
|
||||
if storage_location:
|
||||
try:
|
||||
return SharedArgs.resolve_storage(storage_location)
|
||||
except Exception as e:
|
||||
log(f"Invalid storage location: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Priority 2: Config outfile
|
||||
if config and config.get("outfile"):
|
||||
try:
|
||||
return Path(config["outfile"]).expanduser()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Priority 3: Default (home/Downloads)
|
||||
final_output_dir = Path.home() / "Downloads"
|
||||
debug(f"Using default directory: {final_output_dir}")
|
||||
|
||||
# Ensure directory exists
|
||||
try:
|
||||
final_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return final_output_dir
|
||||
|
||||
def _compute_file_hash(self, filepath: Path) -> str:
|
||||
"""Compute SHA256 hash of a file."""
|
||||
import hashlib
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(filepath, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
# Module-level singleton registration
|
||||
CMDLET = Download_File()
|
||||
1483
cmdlet/download_media.py
Normal file
1483
cmdlet/download_media.py
Normal file
File diff suppressed because it is too large
Load Diff
127
cmdlet/download_torrent.py
Normal file
127
cmdlet/download_torrent.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Download torrent/magnet links via AllDebrid in a dedicated cmdlet.
|
||||
|
||||
Features:
|
||||
- Accepts magnet links and .torrent files/url
|
||||
- Uses AllDebrid API for background downloads
|
||||
- Progress tracking and worker management
|
||||
- Self-registering class-based cmdlet
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
import uuid
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
|
||||
from SYS.logger import log
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
|
||||
class Download_Torrent(Cmdlet):
|
||||
"""Class-based download-torrent cmdlet with self-registration."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="download-torrent",
|
||||
summary="Download torrent/magnet links via AllDebrid",
|
||||
usage="download-torrent <magnet|.torrent> [options]",
|
||||
alias=["torrent", "magnet"],
|
||||
arg=[
|
||||
CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
|
||||
CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
|
||||
CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
|
||||
CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
|
||||
],
|
||||
detail=["Download torrents/magnets via AllDebrid API."],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
magnet_args = parsed.get("magnet", [])
|
||||
output_dir = Path(parsed.get("output") or Path.home() / "Downloads")
|
||||
wait_timeout = int(float(parsed.get("wait", 600)))
|
||||
background_mode = parsed.get("background", False)
|
||||
api_key = config.get("alldebrid_api_key")
|
||||
if not api_key:
|
||||
log("AllDebrid API key not configured", file=sys.stderr)
|
||||
return 1
|
||||
for magnet_url in magnet_args:
|
||||
if background_mode:
|
||||
self._start_background_worker(magnet_url, output_dir, config, api_key, wait_timeout)
|
||||
log(f"⧗ Torrent download queued in background: {magnet_url}")
|
||||
else:
|
||||
self._download_torrent_worker(str(uuid.uuid4()), magnet_url, output_dir, config, api_key, wait_timeout)
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _download_torrent_worker(
|
||||
worker_id: str,
|
||||
magnet_url: str,
|
||||
output_dir: Path,
|
||||
config: Dict[str, Any],
|
||||
api_key: str,
|
||||
wait_timeout: int = 600,
|
||||
worker_manager: Optional[Any] = None,
|
||||
) -> None:
|
||||
try:
|
||||
from API.alldebrid import AllDebridClient
|
||||
client = AllDebridClient(api_key)
|
||||
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
|
||||
magnet_info = client.magnet_add(magnet_url)
|
||||
magnet_id = int(magnet_info.get('id', 0))
|
||||
if magnet_id <= 0:
|
||||
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
|
||||
return
|
||||
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
|
||||
# Poll for ready status (simplified)
|
||||
import time
|
||||
elapsed = 0
|
||||
while elapsed < wait_timeout:
|
||||
status = client.magnet_status(magnet_id)
|
||||
if status.get('ready'):
|
||||
break
|
||||
time.sleep(5)
|
||||
elapsed += 5
|
||||
if elapsed >= wait_timeout:
|
||||
log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr)
|
||||
return
|
||||
files_result = client.magnet_links([magnet_id])
|
||||
magnet_files = files_result.get(str(magnet_id), {})
|
||||
files_array = magnet_files.get('files', [])
|
||||
if not files_array:
|
||||
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
|
||||
return
|
||||
for file_info in files_array:
|
||||
file_url = file_info.get('link')
|
||||
file_name = file_info.get('name')
|
||||
if file_url:
|
||||
Download_Torrent._download_file(file_url, output_dir / file_name)
|
||||
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
|
||||
except Exception as e:
|
||||
log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr)
|
||||
|
||||
@staticmethod
|
||||
def _download_file(url: str, dest: Path) -> None:
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(url, stream=True)
|
||||
with open(dest, 'wb') as f:
|
||||
for chunk in resp.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
except Exception as e:
|
||||
log(f"File download failed: {e}", file=sys.stderr)
|
||||
|
||||
def _start_background_worker(self, magnet_url, output_dir, config, api_key, wait_timeout):
|
||||
worker_id = f"torrent_{uuid.uuid4().hex[:6]}"
|
||||
thread = threading.Thread(
|
||||
target=self._download_torrent_worker,
|
||||
args=(worker_id, magnet_url, output_dir, config, api_key, wait_timeout),
|
||||
daemon=False,
|
||||
name=f"TorrentWorker_{worker_id}",
|
||||
)
|
||||
thread.start()
|
||||
|
||||
CMDLET = Download_Torrent()
|
||||
204
cmdlet/get_file.py
Normal file
204
cmdlet/get_file.py
Normal file
@@ -0,0 +1,204 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log, debug
|
||||
from Store import Store
|
||||
from config import resolve_output_dir
|
||||
|
||||
|
||||
class Get_File(Cmdlet):
|
||||
"""Export files to local path via hash+store."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize get-file cmdlet."""
|
||||
super().__init__(
|
||||
name="get-file",
|
||||
summary="Export file to local path",
|
||||
usage="@1 | get-file -path C:\\Downloads",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-path", description="Output directory path (default: from config)"),
|
||||
CmdletArg("-name", description="Output filename (default: from metadata title)"),
|
||||
],
|
||||
detail=[
|
||||
"- Exports file from storage backend to local path",
|
||||
"- Uses hash+store to retrieve file",
|
||||
"- Preserves file extension and metadata",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Export file via hash+store backend."""
|
||||
debug(f"[get-file] run() called with result type: {type(result)}")
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
debug(f"[get-file] parsed args: {parsed}")
|
||||
|
||||
# Extract hash and store from result or args
|
||||
file_hash = parsed.get("hash") or get_field(result, "hash")
|
||||
store_name = parsed.get("store") or get_field(result, "store")
|
||||
output_path = parsed.get("path")
|
||||
output_name = parsed.get("name")
|
||||
|
||||
debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}")
|
||||
|
||||
if not file_hash:
|
||||
log("Error: No file hash provided")
|
||||
return 1
|
||||
|
||||
if not store_name:
|
||||
log("Error: No store name provided")
|
||||
return 1
|
||||
|
||||
# Normalize hash
|
||||
file_hash = normalize_hash(file_hash)
|
||||
if not file_hash:
|
||||
log("Error: Invalid hash format")
|
||||
return 1
|
||||
|
||||
debug(f"[get-file] Getting storage backend: {store_name}")
|
||||
|
||||
# Get storage backend
|
||||
store = Store(config)
|
||||
backend = store[store_name]
|
||||
debug(f"[get-file] Backend retrieved: {type(backend).__name__}")
|
||||
|
||||
# Get file metadata to determine name and extension
|
||||
debug(f"[get-file] Getting metadata for hash...")
|
||||
metadata = backend.get_metadata(file_hash)
|
||||
if not metadata:
|
||||
log(f"Error: File metadata not found for hash {file_hash[:12]}...")
|
||||
return 1
|
||||
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
|
||||
|
||||
# Determine output filename
|
||||
if output_name:
|
||||
filename = output_name
|
||||
else:
|
||||
# Use title from metadata, sanitize it
|
||||
title = metadata.get("title", "export")
|
||||
filename = self._sanitize_filename(title)
|
||||
|
||||
# Add extension if metadata has it
|
||||
ext = metadata.get("ext")
|
||||
if ext and not filename.endswith(ext):
|
||||
if not ext.startswith('.'):
|
||||
ext = '.' + ext
|
||||
filename += ext
|
||||
|
||||
# Determine output directory
|
||||
if output_path:
|
||||
output_dir = Path(output_path).expanduser()
|
||||
else:
|
||||
output_dir = resolve_output_dir(config)
|
||||
|
||||
debug(f"[get-file] Output dir: {output_dir}")
|
||||
|
||||
# Create output directory
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)")
|
||||
|
||||
# Get file from backend (may return Path or URL string depending on backend)
|
||||
source_path = backend.get_file(file_hash)
|
||||
|
||||
debug(f"[get-file] backend.get_file returned: {source_path}")
|
||||
|
||||
# Check if backend returned a URL (HydrusNetwork case)
|
||||
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
|
||||
log(f"File opened in browser: {source_path}", file=sys.stderr)
|
||||
ctx.emit(f"Opened in browser: {source_path}")
|
||||
|
||||
# Emit result for pipeline
|
||||
ctx.emit({
|
||||
"hash": file_hash,
|
||||
"store": store_name,
|
||||
"url": source_path,
|
||||
"title": filename,
|
||||
})
|
||||
return 0
|
||||
|
||||
# Otherwise treat as file path (local/folder backends)
|
||||
if isinstance(source_path, str):
|
||||
source_path = Path(source_path)
|
||||
|
||||
# Determine output directory
|
||||
if output_path:
|
||||
output_dir = Path(output_path).expanduser()
|
||||
else:
|
||||
output_dir = resolve_output_dir(config)
|
||||
|
||||
debug(f"[get-file] Output dir: {output_dir}")
|
||||
|
||||
# Create output directory
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Build full output path
|
||||
dest_path = output_dir / filename
|
||||
|
||||
# Make path unique if file exists
|
||||
dest_path = self._unique_path(dest_path)
|
||||
|
||||
if not source_path or not source_path.exists():
|
||||
log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...")
|
||||
return 1
|
||||
|
||||
# Copy file to destination
|
||||
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
|
||||
shutil.copy2(source_path, dest_path)
|
||||
|
||||
ctx.emit(f"Exported to: {dest_path}")
|
||||
log(f"Exported: {dest_path}", file=sys.stderr)
|
||||
|
||||
# Emit result for pipeline
|
||||
ctx.emit({
|
||||
"hash": file_hash,
|
||||
"store": store_name,
|
||||
"path": str(dest_path),
|
||||
"title": filename,
|
||||
})
|
||||
|
||||
debug(f"[get-file] Completed successfully")
|
||||
return 0
|
||||
|
||||
def _sanitize_filename(self, name: str) -> str:
|
||||
"""Sanitize filename by removing invalid characters."""
|
||||
allowed_chars = []
|
||||
for ch in str(name):
|
||||
if ch.isalnum() or ch in {'-', '_', ' ', '.'}:
|
||||
allowed_chars.append(ch)
|
||||
else:
|
||||
allowed_chars.append(' ')
|
||||
|
||||
# Collapse multiple spaces
|
||||
sanitized = ' '.join(''.join(allowed_chars).split())
|
||||
return sanitized or "export"
|
||||
|
||||
def _unique_path(self, path: Path) -> Path:
|
||||
"""Generate unique path by adding (1), (2), etc. if file exists."""
|
||||
if not path.exists():
|
||||
return path
|
||||
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
|
||||
counter = 1
|
||||
while True:
|
||||
new_path = parent / f"{stem} ({counter}){suffix}"
|
||||
if not new_path.exists():
|
||||
return new_path
|
||||
counter += 1
|
||||
|
||||
|
||||
# Instantiate and register cmdlet
|
||||
Add_File_Instance = Get_File()
|
||||
229
cmdlet/get_metadata.py
Normal file
229
cmdlet/get_metadata.py
Normal file
@@ -0,0 +1,229 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, Optional
|
||||
import json
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
from pathlib import Path
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
|
||||
import pipeline as ctx
|
||||
from result_table import ResultTable
|
||||
|
||||
|
||||
class Get_Metadata(Cmdlet):
|
||||
"""Class-based get-metadata cmdlet with self-registration."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize get-metadata cmdlet."""
|
||||
super().__init__(
|
||||
name="get-metadata",
|
||||
summary="Print metadata for files by hash and storage backend.",
|
||||
usage="get-metadata [-hash <sha256>] [-store <backend>]",
|
||||
alias=["meta"],
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
],
|
||||
detail=[
|
||||
"- Retrieves metadata from storage backend using file hash as identifier.",
|
||||
"- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.",
|
||||
"- Hash and store are taken from piped result or can be overridden with -hash/-store flags.",
|
||||
"- All metadata is retrieved from the storage backend's database (single source of truth).",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
@staticmethod
|
||||
def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]:
|
||||
"""Extract an imported timestamp from metadata if available."""
|
||||
if not isinstance(meta, dict):
|
||||
return None
|
||||
|
||||
# Prefer explicit time_imported if present
|
||||
explicit = meta.get("time_imported")
|
||||
if isinstance(explicit, (int, float)):
|
||||
return int(explicit)
|
||||
|
||||
# Try parsing string timestamps
|
||||
if isinstance(explicit, str):
|
||||
try:
|
||||
import datetime as _dt
|
||||
return int(_dt.datetime.fromisoformat(explicit).timestamp())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _format_imported(ts: Optional[int]) -> str:
|
||||
"""Format timestamp as readable string."""
|
||||
if not ts:
|
||||
return ""
|
||||
try:
|
||||
import datetime as _dt
|
||||
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int],
|
||||
dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str],
|
||||
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Build a table row dict with metadata fields."""
|
||||
size_mb = None
|
||||
if isinstance(size_bytes, int):
|
||||
try:
|
||||
size_mb = int(size_bytes / (1024 * 1024))
|
||||
except Exception:
|
||||
size_mb = None
|
||||
|
||||
dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None
|
||||
pages_int = int(pages) if isinstance(pages, (int, float)) else None
|
||||
imported_label = Get_Metadata._format_imported(imported_ts)
|
||||
|
||||
duration_label = "Duration(s)"
|
||||
duration_value = str(dur_int) if dur_int is not None else ""
|
||||
if mime and mime.lower().startswith("application/pdf"):
|
||||
duration_label = "Pages"
|
||||
duration_value = str(pages_int) if pages_int is not None else ""
|
||||
|
||||
columns = [
|
||||
("Title", title or ""),
|
||||
("Hash", hash_value or ""),
|
||||
("MIME", mime or ""),
|
||||
("Size(MB)", str(size_mb) if size_mb is not None else ""),
|
||||
(duration_label, duration_value),
|
||||
("Imported", imported_label),
|
||||
("Store", store or ""),
|
||||
]
|
||||
|
||||
return {
|
||||
"title": title or path,
|
||||
"path": path,
|
||||
"store": store,
|
||||
"mime": mime,
|
||||
"size_bytes": size_bytes,
|
||||
"duration_seconds": dur_int,
|
||||
"pages": pages_int,
|
||||
"imported_ts": imported_ts,
|
||||
"imported": imported_label,
|
||||
"hash": hash_value,
|
||||
"url": url,
|
||||
"columns": columns,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _add_table_body_row(table: ResultTable, row: Dict[str, Any]) -> None:
|
||||
"""Add a single row to the ResultTable using the prepared columns."""
|
||||
columns = row.get("columns") if isinstance(row, dict) else None
|
||||
lookup: Dict[str, Any] = {}
|
||||
if isinstance(columns, list):
|
||||
for col in columns:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, value = col
|
||||
lookup[str(label)] = value
|
||||
|
||||
row_obj = table.add_row()
|
||||
row_obj.add_column("Hash", lookup.get("Hash", ""))
|
||||
row_obj.add_column("MIME", lookup.get("MIME", ""))
|
||||
row_obj.add_column("Size(MB)", lookup.get("Size(MB)", ""))
|
||||
if "Duration(s)" in lookup:
|
||||
row_obj.add_column("Duration(s)", lookup.get("Duration(s)", ""))
|
||||
elif "Pages" in lookup:
|
||||
row_obj.add_column("Pages", lookup.get("Pages", ""))
|
||||
else:
|
||||
row_obj.add_column("Duration(s)", "")
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main execution entry point."""
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Get hash and store from parsed args or result
|
||||
file_hash = parsed.get("hash") or get_field(result, "hash")
|
||||
storage_source = parsed.get("store") or get_field(result, "store")
|
||||
|
||||
if not file_hash:
|
||||
log("No hash available - use -hash to specify", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not storage_source:
|
||||
log("No storage backend specified - use -store to specify", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Use storage backend to get metadata
|
||||
try:
|
||||
from Store import Store
|
||||
storage = Store(config)
|
||||
backend = storage[storage_source]
|
||||
|
||||
# Get metadata from backend
|
||||
metadata = backend.get_metadata(file_hash)
|
||||
|
||||
if not metadata:
|
||||
log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Extract title from tags if available
|
||||
title = get_field(result, "title") or file_hash[:16]
|
||||
if not get_field(result, "title"):
|
||||
# Try to get title from tags
|
||||
try:
|
||||
tags, _ = backend.get_tag(file_hash)
|
||||
for tag in tags:
|
||||
if tag.lower().startswith("title:"):
|
||||
title = tag.split(":", 1)[1]
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract metadata fields
|
||||
mime_type = metadata.get("mime") or metadata.get("ext", "")
|
||||
file_size = metadata.get("size")
|
||||
duration_seconds = metadata.get("duration")
|
||||
pages = metadata.get("pages")
|
||||
url = metadata.get("url") or []
|
||||
imported_ts = self._extract_imported_ts(metadata)
|
||||
|
||||
# Normalize url
|
||||
if isinstance(url, str):
|
||||
try:
|
||||
url = json.loads(url)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
url = []
|
||||
if not isinstance(url, list):
|
||||
url = []
|
||||
|
||||
# Build display row
|
||||
row = self._build_table_row(
|
||||
title=title,
|
||||
store=storage_source,
|
||||
path=metadata.get("path", ""),
|
||||
mime=mime_type,
|
||||
size_bytes=file_size,
|
||||
dur_seconds=duration_seconds,
|
||||
imported_ts=imported_ts,
|
||||
url=url,
|
||||
hash_value=file_hash,
|
||||
pages=pages,
|
||||
)
|
||||
|
||||
table_title = title
|
||||
table = ResultTable(table_title).init_command("get-metadata", list(args))
|
||||
self._add_table_body_row(table, row)
|
||||
ctx.set_last_result_table_overlay(table, [row], row)
|
||||
ctx.emit(row)
|
||||
return 0
|
||||
|
||||
except KeyError:
|
||||
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Failed to get metadata: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
CMDLET = Get_Metadata()
|
||||
143
cmdlet/get_note.py
Normal file
143
cmdlet/get_note.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Get_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="get-note",
|
||||
summary="List notes on a file in a store.",
|
||||
usage="get-note -store <store> [-hash <sha256>]",
|
||||
alias=["get-notes", "get_note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
],
|
||||
detail=[
|
||||
"- Notes are retrieved via the selected store backend.",
|
||||
"- Lyrics are stored in a note named 'lyric'.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
any_notes = False
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
continue
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[get_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[get_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
notes = {}
|
||||
try:
|
||||
notes = backend.get_note(resolved_hash, config=config) or {}
|
||||
except Exception:
|
||||
notes = {}
|
||||
|
||||
if not notes:
|
||||
continue
|
||||
|
||||
any_notes = True
|
||||
# Emit each note as its own row so CLI renders a proper note table
|
||||
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
|
||||
v = notes.get(k)
|
||||
raw_text = str(v or "")
|
||||
preview = " ".join(raw_text.replace("\r", "").split("\n"))
|
||||
ctx.emit(
|
||||
{
|
||||
"store": store_name,
|
||||
"hash": resolved_hash,
|
||||
"note_name": str(k),
|
||||
"note_text": raw_text,
|
||||
"columns": [
|
||||
("Name", str(k)),
|
||||
("Text", preview.strip()),
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
if not any_notes:
|
||||
ctx.emit("No notes found.")
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Get_Note()
|
||||
|
||||
|
||||
429
cmdlet/get_relationship.py
Normal file
429
cmdlet/get_relationship.py
Normal file
@@ -0,0 +1,429 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
|
||||
from API.folder import API_folder_store
|
||||
from config import get_local_storage_path
|
||||
from result_table import ResultTable
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-relationship",
|
||||
summary="Print relationships for the selected file (Hydrus or Local).",
|
||||
usage="get-relationship [-hash <sha256>]",
|
||||
alias=[
|
||||
"get-rel",
|
||||
],
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
],
|
||||
detail=[
|
||||
"- Lists relationship data as returned by Hydrus or Local DB.",
|
||||
],
|
||||
)
|
||||
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(_args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse -hash override
|
||||
override_hash: str | None = None
|
||||
args_list = list(_args)
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
a = args_list[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||||
override_hash = str(args_list[i + 1]).strip()
|
||||
break
|
||||
i += 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
# Initialize results collection
|
||||
found_relationships = [] # List of dicts: {hash, type, title, path, store}
|
||||
source_title = "Unknown"
|
||||
|
||||
def _add_relationship(entry: Dict[str, Any]) -> None:
|
||||
"""Add relationship if not already present by hash or path."""
|
||||
for existing in found_relationships:
|
||||
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
|
||||
return
|
||||
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
|
||||
return
|
||||
found_relationships.append(entry)
|
||||
|
||||
# Check for local file first
|
||||
file_path = None
|
||||
if isinstance(result, dict):
|
||||
file_path = result.get("file_path") or result.get("path")
|
||||
source_title = result.get("title") or result.get("name") or "Unknown"
|
||||
elif hasattr(result, "file_path"):
|
||||
file_path = result.file_path
|
||||
source_title = getattr(result, "title", "Unknown")
|
||||
|
||||
local_db_checked = False
|
||||
|
||||
if file_path and not override_hash:
|
||||
try:
|
||||
path_obj = Path(file_path)
|
||||
if not source_title or source_title == "Unknown":
|
||||
source_title = path_obj.name
|
||||
|
||||
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
|
||||
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
|
||||
|
||||
if path_obj.exists():
|
||||
storage_path = get_local_storage_path(config)
|
||||
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
|
||||
if storage_path:
|
||||
with API_folder_store(storage_path) as db:
|
||||
file_hash = db.get_file_hash(path_obj)
|
||||
metadata = db.get_metadata(file_hash) if file_hash else None
|
||||
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
|
||||
if metadata and metadata.get("relationships"):
|
||||
local_db_checked = True
|
||||
rels = metadata["relationships"]
|
||||
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
|
||||
if isinstance(rels, dict):
|
||||
for rel_type, hashes in rels.items():
|
||||
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
|
||||
if hashes:
|
||||
for h in hashes:
|
||||
# h is now a file hash (not a path)
|
||||
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
|
||||
# Resolve hash to file path
|
||||
resolved_path = db.search_hash(h)
|
||||
title = h[:16] + "..."
|
||||
path = None
|
||||
if resolved_path and resolved_path.exists():
|
||||
path = str(resolved_path)
|
||||
# Try to get title from tags
|
||||
try:
|
||||
tags = db.get_tags(h)
|
||||
found_title = False
|
||||
for t in tags:
|
||||
if t.lower().startswith('title:'):
|
||||
title = t[6:].strip()
|
||||
found_title = True
|
||||
break
|
||||
if not found_title:
|
||||
title = resolved_path.stem
|
||||
except Exception:
|
||||
title = resolved_path.stem
|
||||
|
||||
entry_type = "king" if rel_type.lower() == "alt" else rel_type
|
||||
_add_relationship({
|
||||
"hash": h,
|
||||
"type": entry_type,
|
||||
"title": title,
|
||||
"path": path,
|
||||
"store": "local"
|
||||
})
|
||||
|
||||
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
|
||||
# then we should look up the king's other alts to show siblings.
|
||||
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
|
||||
# the king's direct relationships with its alts.
|
||||
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
|
||||
if rel_type.lower() == "alt" and path:
|
||||
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
|
||||
try:
|
||||
parent_path_obj = Path(path)
|
||||
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
|
||||
|
||||
# Also add the king/parent itself if not already in results
|
||||
existing_parent = None
|
||||
for r in found_relationships:
|
||||
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
|
||||
existing_parent = r
|
||||
break
|
||||
if not existing_parent:
|
||||
parent_title = parent_path_obj.stem
|
||||
try:
|
||||
parent_hash = db.get_file_hash(parent_path_obj)
|
||||
if parent_hash:
|
||||
parent_tags = db.get_tags(parent_hash)
|
||||
for t in parent_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
parent_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr)
|
||||
_add_relationship({
|
||||
"hash": str(path),
|
||||
"type": "king" if rel_type.lower() == "alt" else rel_type,
|
||||
"title": parent_title,
|
||||
"path": str(path),
|
||||
"store": "local"
|
||||
})
|
||||
else:
|
||||
# If already in results, ensure it's marked as king if appropriate
|
||||
if rel_type.lower() == "alt":
|
||||
existing_parent['type'] = "king"
|
||||
|
||||
# 1. Check forward relationships from parent (siblings)
|
||||
parent_hash = db.get_file_hash(parent_path_obj)
|
||||
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
|
||||
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
|
||||
if parent_metadata:
|
||||
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
|
||||
if parent_metadata and parent_metadata.get("relationships"):
|
||||
parent_rels = parent_metadata["relationships"]
|
||||
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
|
||||
if isinstance(parent_rels, dict):
|
||||
for child_type, child_hashes in parent_rels.items():
|
||||
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
|
||||
if child_hashes:
|
||||
for child_h in child_hashes:
|
||||
# child_h is now a HASH, not a path - resolve it
|
||||
child_path_obj = db.search_hash(child_h)
|
||||
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
|
||||
|
||||
if not child_path_obj:
|
||||
# Hash doesn't resolve - skip it
|
||||
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Check if already added (case-insensitive hash/path check)
|
||||
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
|
||||
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Now child_path_obj is a Path, so we can get tags
|
||||
child_title = child_path_obj.stem
|
||||
try:
|
||||
child_hash = db.get_file_hash(child_path_obj)
|
||||
if child_hash:
|
||||
child_tags = db.get_tags(child_hash)
|
||||
for t in child_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
child_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding sibling: {child_title}", file=sys.stderr)
|
||||
_add_relationship({
|
||||
"hash": child_h,
|
||||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||||
"title": child_title,
|
||||
"path": str(child_path_obj),
|
||||
"store": "local"
|
||||
})
|
||||
else:
|
||||
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
|
||||
|
||||
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
|
||||
# This handles the case where siblings point to parent but parent doesn't point to siblings
|
||||
reverse_children = db.find_files_pointing_to(parent_path_obj)
|
||||
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
|
||||
for child in reverse_children:
|
||||
child_path = child['path']
|
||||
child_type = child['type']
|
||||
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
|
||||
|
||||
# Skip if already added (check by path/hash, case-insensitive)
|
||||
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
|
||||
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
child_path_obj = Path(child_path)
|
||||
child_title = child_path_obj.stem
|
||||
try:
|
||||
child_hash = db.get_file_hash(child_path_obj)
|
||||
if child_hash:
|
||||
child_tags = db.get_tags(child_hash)
|
||||
for t in child_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
child_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding reverse sibling: {child_title}", file=sys.stderr)
|
||||
_add_relationship({
|
||||
"hash": child_path,
|
||||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||||
"title": child_title,
|
||||
"path": child_path,
|
||||
"store": "local"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
log(f"Recursive lookup error: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
|
||||
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
|
||||
# We keep this disabled to avoid adding the same relationships twice.
|
||||
# If needed in future, can be re-enabled with better deduplication.
|
||||
# for rev in reverse_rels:
|
||||
# rev_path = rev['path']
|
||||
# rev_type = rev['type']
|
||||
#
|
||||
# if any(r['hash'] == rev_path for r in found_relationships): continue
|
||||
#
|
||||
# rev_path_obj = Path(rev_path)
|
||||
# rev_title = rev_path_obj.stem
|
||||
# try:
|
||||
# rev_tags = db.get_tags(rev_path_obj)
|
||||
# for t in rev_tags:
|
||||
# if t.lower().startswith('title:'):
|
||||
# rev_title = t[6:].strip(); break
|
||||
# except Exception: pass
|
||||
#
|
||||
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
|
||||
# # But we'll just list them with the relationship type they used
|
||||
# found_relationships.append({
|
||||
# "hash": rev_path,
|
||||
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
|
||||
# "title": rev_title,
|
||||
# "path": rev_path,
|
||||
# "store": "local"
|
||||
# })
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error checking local relationships: {e}", file=sys.stderr)
|
||||
|
||||
# If we found local relationships, we can stop or merge with Hydrus?
|
||||
# For now, if we found local ones, let's show them.
|
||||
# But if the file is also in Hydrus, we might want those too.
|
||||
# Let's try Hydrus if we have a hash.
|
||||
|
||||
hash_hex = get_hash_for_operation(override_hash, result)
|
||||
|
||||
if hash_hex and not local_db_checked:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
if client:
|
||||
rel = client.get_file_relationships(hash_hex)
|
||||
if rel:
|
||||
file_rels = rel.get("file_relationships", {})
|
||||
this_file_rels = file_rels.get(hash_hex)
|
||||
|
||||
if this_file_rels:
|
||||
# Map Hydrus relationship IDs to names
|
||||
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
|
||||
# 3: duplicates, 4: alternatives, 8: king
|
||||
# This mapping is approximate based on Hydrus API docs/behavior
|
||||
rel_map = {
|
||||
"0": "potential duplicate",
|
||||
"1": "false positive",
|
||||
"2": "false positive",
|
||||
"3": "duplicate",
|
||||
"4": "alternative",
|
||||
"8": "king"
|
||||
}
|
||||
|
||||
for rel_type_id, hash_list in this_file_rels.items():
|
||||
# Skip metadata keys
|
||||
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
|
||||
continue
|
||||
|
||||
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
|
||||
|
||||
if isinstance(hash_list, list):
|
||||
for rel_hash in hash_list:
|
||||
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
|
||||
# Check if we already have this hash from local DB
|
||||
if not any(r['hash'] == rel_hash for r in found_relationships):
|
||||
found_relationships.append({
|
||||
"hash": rel_hash,
|
||||
"type": rel_name,
|
||||
"title": rel_hash, # Can't resolve title easily without another API call
|
||||
"path": None,
|
||||
"store": "hydrus"
|
||||
})
|
||||
except Exception as exc:
|
||||
# Only log error if we didn't find local relationships either
|
||||
if not found_relationships:
|
||||
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
|
||||
|
||||
if not found_relationships:
|
||||
log("No relationships found.")
|
||||
return 0
|
||||
|
||||
# Display results
|
||||
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
|
||||
|
||||
# Sort by type then title
|
||||
# Custom sort order: King first, then Derivative, then others
|
||||
def type_sort_key(item):
|
||||
t = item['type'].lower()
|
||||
if t == 'king':
|
||||
return 0
|
||||
elif t == 'derivative':
|
||||
return 1
|
||||
elif t == 'alternative':
|
||||
return 2
|
||||
elif t == 'duplicate':
|
||||
return 3
|
||||
else:
|
||||
return 4
|
||||
|
||||
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
|
||||
|
||||
pipeline_results = []
|
||||
|
||||
for i, item in enumerate(found_relationships):
|
||||
row = table.add_row()
|
||||
row.add_column("Type", item['type'].title())
|
||||
row.add_column("Title", item['title'])
|
||||
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
|
||||
row.add_column("Store", item['store'])
|
||||
|
||||
# Create result object for pipeline
|
||||
res_obj = {
|
||||
"title": item['title'],
|
||||
"hash": item['hash'],
|
||||
"file_hash": item['hash'],
|
||||
"relationship_type": item['type'],
|
||||
"store": item['store']
|
||||
}
|
||||
if item['path']:
|
||||
res_obj["path"] = item['path']
|
||||
res_obj["file_path"] = item['path']
|
||||
res_obj["target"] = item['path']
|
||||
else:
|
||||
# If Hydrus, target is hash
|
||||
res_obj["target"] = item['hash']
|
||||
|
||||
pipeline_results.append(res_obj)
|
||||
|
||||
# Set selection args
|
||||
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
|
||||
if item['path']:
|
||||
table.set_row_selection_args(i, [item['path']])
|
||||
else:
|
||||
table.set_row_selection_args(i, ["-hash", item['hash']])
|
||||
|
||||
ctx.set_last_result_table(table, pipeline_results)
|
||||
print(table)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
1382
cmdlet/get_tag.py
Normal file
1382
cmdlet/get_tag.py
Normal file
File diff suppressed because it is too large
Load Diff
80
cmdlet/get_url.py
Normal file
80
cmdlet/get_url.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
from Store import Store
|
||||
|
||||
|
||||
class Get_Url(Cmdlet):
|
||||
"""Get url associated with files via hash+store."""
|
||||
|
||||
NAME = "get-url"
|
||||
SUMMARY = "List url associated with a file"
|
||||
USAGE = "@1 | get-url"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
]
|
||||
DETAIL = [
|
||||
"- Lists all url associated with file identified by hash+store",
|
||||
]
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Get url for file via hash+store backend."""
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Extract hash and store from result or args
|
||||
file_hash = parsed.get("hash") or get_field(result, "hash")
|
||||
store_name = parsed.get("store") or get_field(result, "store")
|
||||
|
||||
if not file_hash:
|
||||
log("Error: No file hash provided")
|
||||
return 1
|
||||
|
||||
if not store_name:
|
||||
log("Error: No store name provided")
|
||||
return 1
|
||||
|
||||
# Normalize hash
|
||||
file_hash = normalize_hash(file_hash)
|
||||
if not file_hash:
|
||||
log("Error: Invalid hash format")
|
||||
return 1
|
||||
|
||||
# Get backend and retrieve url
|
||||
try:
|
||||
storage = Store(config)
|
||||
backend = storage[store_name]
|
||||
|
||||
urls = backend.get_url(file_hash)
|
||||
|
||||
if urls:
|
||||
for u in urls:
|
||||
# Emit rich object for pipeline compatibility
|
||||
ctx.emit({
|
||||
"url": u,
|
||||
"hash": file_hash,
|
||||
"store": store_name,
|
||||
})
|
||||
return 0
|
||||
else:
|
||||
ctx.emit("No url found")
|
||||
return 0
|
||||
|
||||
except KeyError:
|
||||
log(f"Error: Storage backend '{store_name}' not configured")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Error retrieving url: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["get-url", "get_url"])(Get_Url)
|
||||
|
||||
|
||||
819
cmdlet/merge_file.py
Normal file
819
cmdlet/merge_file.py
Normal file
@@ -0,0 +1,819 @@
|
||||
"""Merge multiple files into a single output file."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, List
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
import subprocess as _subprocess
|
||||
import shutil as _shutil
|
||||
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
create_pipe_object_result,
|
||||
get_field,
|
||||
get_pipe_object_hash,
|
||||
get_pipe_object_path,
|
||||
normalize_result_input,
|
||||
parse_cmdlet_args,
|
||||
should_show_help,
|
||||
)
|
||||
|
||||
import pipeline as ctx
|
||||
|
||||
try:
|
||||
from PyPDF2 import PdfWriter, PdfReader
|
||||
HAS_PYPDF2 = True
|
||||
except ImportError:
|
||||
HAS_PYPDF2 = False
|
||||
PdfWriter = None
|
||||
PdfReader = None
|
||||
|
||||
try:
|
||||
from metadata import (
|
||||
read_tags_from_file,
|
||||
write_tags_to_file,
|
||||
dedup_tags_by_namespace,
|
||||
write_metadata
|
||||
)
|
||||
HAS_METADATA_API = True
|
||||
except ImportError:
|
||||
HAS_METADATA_API = False
|
||||
|
||||
def read_tags_from_file(file_path: Path) -> List[str]:
|
||||
return []
|
||||
|
||||
def write_tags_to_file(
|
||||
file_path: Path,
|
||||
tags: List[str],
|
||||
source_hashes: Optional[List[str]] = None,
|
||||
url: Optional[List[str]] = None,
|
||||
append: bool = False,
|
||||
) -> bool:
|
||||
return False
|
||||
|
||||
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
|
||||
return tags
|
||||
|
||||
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Merge multiple files into one."""
|
||||
|
||||
# Parse help
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
output_override: Optional[Path] = None
|
||||
output_arg = parsed.get("output")
|
||||
if output_arg:
|
||||
try:
|
||||
output_override = Path(str(output_arg)).expanduser()
|
||||
except Exception:
|
||||
output_override = None
|
||||
|
||||
format_spec = parsed.get("format")
|
||||
if format_spec:
|
||||
format_spec = str(format_spec).lower().strip()
|
||||
|
||||
# Collect files from piped results
|
||||
# Use normalize_result_input to handle both single items and lists
|
||||
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
|
||||
|
||||
if not files_to_merge:
|
||||
log("No files provided to merge", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if len(files_to_merge) < 2:
|
||||
# Only 1 file - pass it through unchanged
|
||||
# (merge only happens when multiple files are collected)
|
||||
item = files_to_merge[0]
|
||||
ctx.emit(item)
|
||||
return 0
|
||||
|
||||
# Extract file paths and metadata from result objects
|
||||
source_files: List[Path] = []
|
||||
source_hashes: List[str] = []
|
||||
source_url: List[str] = []
|
||||
source_tags: List[str] = [] # NEW: collect tags from source files
|
||||
source_relationships: List[str] = [] # NEW: collect relationships from source files
|
||||
|
||||
for item in files_to_merge:
|
||||
raw_path = get_pipe_object_path(item)
|
||||
target_path = None
|
||||
if isinstance(raw_path, Path):
|
||||
target_path = raw_path
|
||||
elif isinstance(raw_path, str) and raw_path.strip():
|
||||
candidate = Path(raw_path).expanduser()
|
||||
if candidate.exists():
|
||||
target_path = candidate
|
||||
|
||||
if target_path and target_path.exists():
|
||||
source_files.append(target_path)
|
||||
|
||||
# Track tags from the .tag sidecar for this source (if present)
|
||||
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
|
||||
if tags_file.exists() and HAS_METADATA_API:
|
||||
try:
|
||||
source_tags.extend(read_tags_from_file(tags_file) or [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract hash if available in item (as fallback)
|
||||
hash_value = get_pipe_object_hash(item)
|
||||
if hash_value and hash_value not in source_hashes:
|
||||
source_hashes.append(str(hash_value))
|
||||
|
||||
# Extract known url if available
|
||||
url = get_field(item, 'url', [])
|
||||
if isinstance(url, str):
|
||||
source_url.append(url)
|
||||
elif isinstance(url, list):
|
||||
source_url.extend(url)
|
||||
else:
|
||||
title = get_field(item, 'title', 'unknown') or get_field(item, 'id', 'unknown')
|
||||
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
|
||||
|
||||
if len(source_files) < 2:
|
||||
log("At least 2 valid files required to merge", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Detect file types
|
||||
file_types = set()
|
||||
for f in source_files:
|
||||
suffix = f.suffix.lower()
|
||||
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
||||
file_types.add('audio')
|
||||
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||
file_types.add('video')
|
||||
elif suffix in {'.pdf'}:
|
||||
file_types.add('pdf')
|
||||
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
|
||||
file_types.add('text')
|
||||
else:
|
||||
file_types.add('other')
|
||||
|
||||
if len(file_types) > 1 and 'other' not in file_types:
|
||||
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
|
||||
log(f"Can only merge files of the same type", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_kind = list(file_types)[0] if file_types else 'other'
|
||||
|
||||
# Determine output format
|
||||
output_format = format_spec or 'auto'
|
||||
if output_format == 'auto':
|
||||
if file_kind == 'audio':
|
||||
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
|
||||
elif file_kind == 'video':
|
||||
output_format = 'mp4' # Default video codec
|
||||
elif file_kind == 'pdf':
|
||||
output_format = 'pdf'
|
||||
else:
|
||||
output_format = 'txt'
|
||||
|
||||
# Determine output path
|
||||
if output_override:
|
||||
if output_override.is_dir():
|
||||
base_title = get_field(files_to_merge[0], 'title', 'merged')
|
||||
base_name = _sanitize_name(str(base_title or 'merged'))
|
||||
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
|
||||
else:
|
||||
output_path = output_override
|
||||
else:
|
||||
first_file = source_files[0]
|
||||
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
|
||||
|
||||
# Ensure output directory exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Perform merge based on file type
|
||||
if file_kind == 'audio':
|
||||
success = _merge_audio(source_files, output_path, output_format)
|
||||
elif file_kind == 'video':
|
||||
success = _merge_video(source_files, output_path, output_format)
|
||||
elif file_kind == 'pdf':
|
||||
success = _merge_pdf(source_files, output_path)
|
||||
elif file_kind == 'text':
|
||||
success = _merge_text(source_files, output_path)
|
||||
else:
|
||||
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not success:
|
||||
log("Merge failed", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
|
||||
|
||||
merged_tags: List[str] = [f"title:{output_path.stem}"]
|
||||
|
||||
# Create .tag sidecar file for the merged output using unified API
|
||||
tags_path = output_path.with_suffix(output_path.suffix + '.tag')
|
||||
try:
|
||||
# Merge tags from source files using metadata API
|
||||
if source_tags and HAS_METADATA_API:
|
||||
# Use dedup function to normalize and deduplicate
|
||||
merged_source_tags = dedup_tags_by_namespace(source_tags)
|
||||
merged_tags.extend(merged_source_tags)
|
||||
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
|
||||
elif source_tags:
|
||||
# Fallback: simple deduplication if metadata API unavailable
|
||||
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
|
||||
|
||||
# Write merged tags to sidecar file
|
||||
if HAS_METADATA_API and write_tags_to_file:
|
||||
# Use unified API for file writing
|
||||
source_hashes_list = source_hashes if source_hashes else None
|
||||
source_url_list = source_url if source_url else None
|
||||
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_url_list)
|
||||
else:
|
||||
# Fallback: manual file writing
|
||||
tags_lines = []
|
||||
|
||||
# Add hash first (if available)
|
||||
if source_hashes:
|
||||
tags_lines.append(f"hash:{source_hashes[0]}")
|
||||
|
||||
# Add regular tags
|
||||
tags_lines.extend(merged_tags)
|
||||
|
||||
# Add known url
|
||||
if source_url:
|
||||
for url in source_url:
|
||||
tags_lines.append(f"url:{url}")
|
||||
|
||||
# Add relationships (if available)
|
||||
if source_relationships:
|
||||
for rel in source_relationships:
|
||||
tags_lines.append(f"relationship:{rel}")
|
||||
|
||||
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(tags_lines) + '\n')
|
||||
|
||||
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
|
||||
|
||||
# Also create .metadata file using centralized function
|
||||
try:
|
||||
if HAS_METADATA_API and write_metadata:
|
||||
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
|
||||
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
|
||||
|
||||
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
merged_hash = sha256_file(output_path)
|
||||
merged_item = create_pipe_object_result(
|
||||
source="local",
|
||||
identifier=output_path.name,
|
||||
file_path=str(output_path),
|
||||
cmdlet_name="merge-file",
|
||||
title=output_path.stem,
|
||||
hash_value=merged_hash,
|
||||
tag=merged_tags,
|
||||
url=source_url,
|
||||
media_kind=file_kind,
|
||||
)
|
||||
# Clear previous results to ensure only the merged file is passed down
|
||||
ctx.clear_last_result()
|
||||
ctx.emit(merged_item)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
|
||||
# Still emit a string representation for feedback
|
||||
ctx.emit(f"Merged: {output_path}")
|
||||
|
||||
# Cleanup
|
||||
# - Delete source files only when -delete is set.
|
||||
if delete_after:
|
||||
for f in source_files:
|
||||
try:
|
||||
# Delete sidecar tags for the source (if any)
|
||||
tag_file = f.with_suffix(f.suffix + '.tag')
|
||||
if tag_file.exists():
|
||||
try:
|
||||
tag_file.unlink()
|
||||
log(f"Deleted: {tag_file.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if f.exists():
|
||||
f.unlink()
|
||||
log(f"Deleted: {f.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _sanitize_name(text: str) -> str:
|
||||
"""Sanitize filename."""
|
||||
allowed = []
|
||||
for ch in text:
|
||||
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
|
||||
return (" ".join("".join(allowed).split()) or "merged").strip()
|
||||
|
||||
|
||||
def _ext_for_format(fmt: str) -> str:
|
||||
"""Get file extension for format."""
|
||||
format_map = {
|
||||
'mp3': 'mp3',
|
||||
'm4a': 'm4a',
|
||||
'm4b': 'm4b',
|
||||
'aac': 'aac',
|
||||
'opus': 'opus',
|
||||
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
|
||||
'mkv': 'mkv',
|
||||
'mp4': 'mp4',
|
||||
'webm': 'webm',
|
||||
'pdf': 'pdf',
|
||||
'txt': 'txt',
|
||||
'auto': 'mka', # Default - MKA for chapters
|
||||
}
|
||||
return format_map.get(fmt.lower(), 'mka')
|
||||
|
||||
|
||||
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
"""Merge audio files with chapters based on file boundaries."""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ffmpeg_path = _shutil.which('ffmpeg')
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Step 1: Get duration of each file to calculate chapter timestamps
|
||||
chapters = []
|
||||
current_time_ms = 0
|
||||
|
||||
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Analyzing files for chapters")
|
||||
|
||||
for file_path in files:
|
||||
# Get duration using ffprobe
|
||||
try:
|
||||
ffprobe_cmd = [
|
||||
'ffprobe', '-v', 'error', '-show_entries',
|
||||
'format=duration', '-print_format',
|
||||
'default=noprint_wrappers=1:nokey=1', str(file_path)
|
||||
]
|
||||
|
||||
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
|
||||
if probe_result.returncode == 0 and probe_result.stdout.strip():
|
||||
try:
|
||||
duration_sec = float(probe_result.stdout.strip())
|
||||
except ValueError:
|
||||
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
|
||||
duration_sec = 0
|
||||
else:
|
||||
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
|
||||
duration_sec = 0
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
|
||||
duration_sec = 0
|
||||
|
||||
# Create chapter entry - use title: tag from metadata if available
|
||||
title = file_path.stem # Default to filename without extension
|
||||
if HAS_METADATA_API:
|
||||
try:
|
||||
# Try to read tags from .tag sidecar file
|
||||
tags_file = file_path.with_suffix(file_path.suffix + '.tag')
|
||||
if tags_file.exists():
|
||||
tags = read_tags_from_file(tags_file)
|
||||
if tags:
|
||||
# Look for title: tag
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith('title:'):
|
||||
# Extract the title value after the colon
|
||||
title = tag.split(':', 1)[1].strip()
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
|
||||
pass # Fall back to filename
|
||||
|
||||
# Convert seconds to HH:MM:SS.mmm format
|
||||
hours = int(current_time_ms // 3600000)
|
||||
minutes = int((current_time_ms % 3600000) // 60000)
|
||||
seconds = int((current_time_ms % 60000) // 1000)
|
||||
millis = int(current_time_ms % 1000)
|
||||
|
||||
chapters.append({
|
||||
'time_ms': current_time_ms,
|
||||
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
|
||||
'title': title,
|
||||
'duration_sec': duration_sec
|
||||
})
|
||||
|
||||
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
|
||||
current_time_ms += int(duration_sec * 1000)
|
||||
|
||||
# Step 2: Create concat demuxer file
|
||||
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||
concat_lines = []
|
||||
for f in files:
|
||||
# Escape quotes in path
|
||||
safe_path = str(f).replace("'", "'\\''")
|
||||
concat_lines.append(f"file '{safe_path}'")
|
||||
|
||||
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||
|
||||
# Step 3: Create FFmpeg metadata file with chapters
|
||||
metadata_file = output.parent / f".metadata_{output.stem}.txt"
|
||||
metadata_lines = [';FFMETADATA1']
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
|
||||
metadata_lines.append('[CHAPTER]')
|
||||
metadata_lines.append('TIMEBASE=1/1000')
|
||||
metadata_lines.append(f'START={chapter["time_ms"]}')
|
||||
# Calculate end time (start of next chapter or end of file)
|
||||
if i < len(chapters) - 1:
|
||||
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
|
||||
else:
|
||||
metadata_lines.append(f'END={current_time_ms}')
|
||||
metadata_lines.append(f'title={chapter["title"]}')
|
||||
|
||||
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
|
||||
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Created {len(chapters)} chapters")
|
||||
|
||||
# Step 4: Build FFmpeg command to merge and embed chapters
|
||||
# Strategy: First merge audio, then add metadata in separate pass
|
||||
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||
|
||||
# Add threading options for speed
|
||||
cmd.extend(['-threads', '0']) # Use all available threads
|
||||
|
||||
# Audio codec selection for first input
|
||||
if output_format == 'mp3':
|
||||
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
|
||||
elif output_format in {'m4a', 'm4b'}:
|
||||
# Use copy if possible (much faster), otherwise re-encode
|
||||
# Check if inputs are already AAC/M4A to avoid re-encoding
|
||||
# For now, default to copy if format matches, otherwise re-encode
|
||||
# But since we are merging potentially different codecs, re-encoding is safer
|
||||
# To speed up re-encoding, we can use a faster preset or hardware accel if available
|
||||
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
|
||||
elif output_format == 'aac':
|
||||
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
|
||||
elif output_format == 'opus':
|
||||
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
|
||||
elif output_format == 'mka':
|
||||
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
|
||||
# If we want speed, copy is best. If we want compatibility, re-encode.
|
||||
# Let's try copy first if inputs are same format, but that's hard to detect here.
|
||||
# Defaulting to copy for MKA as it's a container that supports many codecs
|
||||
cmd.extend(['-c:a', 'copy'])
|
||||
else:
|
||||
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
|
||||
|
||||
# Add the output file
|
||||
cmd.append(str(output))
|
||||
|
||||
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
|
||||
|
||||
# Run ffmpeg with progress monitoring
|
||||
try:
|
||||
from SYS.progress import print_progress, print_final_progress
|
||||
import re
|
||||
|
||||
process = _subprocess.Popen(
|
||||
cmd,
|
||||
stdout=_subprocess.PIPE,
|
||||
stderr=_subprocess.PIPE,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace'
|
||||
)
|
||||
|
||||
# Monitor progress
|
||||
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
|
||||
total_duration_sec = current_time_ms / 1000.0
|
||||
|
||||
while True:
|
||||
# Read stderr line by line (ffmpeg writes progress to stderr)
|
||||
if process.stderr:
|
||||
line = process.stderr.readline()
|
||||
if not line and process.poll() is not None:
|
||||
break
|
||||
|
||||
if line:
|
||||
# Parse time=HH:MM:SS.mm
|
||||
match = duration_re.search(line)
|
||||
if match and total_duration_sec > 0:
|
||||
h, m, s, cs = map(int, match.groups())
|
||||
current_sec = h * 3600 + m * 60 + s + cs / 100.0
|
||||
|
||||
# Calculate speed/bitrate if available (optional)
|
||||
# For now just show percentage
|
||||
print_progress(
|
||||
output.name,
|
||||
int(current_sec * 1000), # Use ms as "bytes" for progress bar
|
||||
int(total_duration_sec * 1000),
|
||||
speed=0
|
||||
)
|
||||
else:
|
||||
break
|
||||
|
||||
# Wait for completion
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
|
||||
|
||||
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] ffmpeg process error: {e}")
|
||||
raise
|
||||
|
||||
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
|
||||
|
||||
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
|
||||
if output_format == 'mka' or output.suffix.lower() == '.mka':
|
||||
# MKA/MKV format has native chapter support via FFMetadata
|
||||
# Re-mux the file with chapters embedded (copy streams, no re-encode)
|
||||
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
|
||||
|
||||
temp_output = output.parent / f".temp_{output.stem}.mka"
|
||||
|
||||
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
|
||||
mkvmerge_path = _shutil.which('mkvmerge')
|
||||
|
||||
if mkvmerge_path:
|
||||
# mkvmerge is the best tool for embedding chapters in Matroska files
|
||||
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
|
||||
cmd2 = [
|
||||
mkvmerge_path, '-o', str(temp_output),
|
||||
'--chapters', str(metadata_file),
|
||||
str(output)
|
||||
]
|
||||
else:
|
||||
# Fallback to ffmpeg with proper chapter embedding for Matroska
|
||||
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
|
||||
# For Matroska files, the metadata must be provided via -f ffmetadata input
|
||||
cmd2 = [
|
||||
ffmpeg_path, '-y',
|
||||
'-i', str(output), # Input: merged audio
|
||||
'-i', str(metadata_file), # Input: FFMetadata file
|
||||
'-c:a', 'copy', # Copy audio without re-encoding
|
||||
'-threads', '0', # Use all threads
|
||||
'-map', '0', # Map all from first input
|
||||
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||
str(temp_output) # Output
|
||||
]
|
||||
|
||||
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
|
||||
|
||||
try:
|
||||
# Run chapter embedding silently (progress handled by worker thread)
|
||||
_subprocess.run(
|
||||
cmd2,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
stdin=_subprocess.DEVNULL,
|
||||
timeout=600,
|
||||
check=False
|
||||
)
|
||||
|
||||
# Replace original with temp if successful
|
||||
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||
try:
|
||||
import shutil
|
||||
if output.exists():
|
||||
output.unlink()
|
||||
shutil.move(str(temp_output), str(output))
|
||||
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Chapters embedded successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||
try:
|
||||
temp_output.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
logger.warning(f"[merge-file] Chapter embedding did not create output")
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
|
||||
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
|
||||
# MP4/M4A format has native chapter support via iTunes metadata atoms
|
||||
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
|
||||
|
||||
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
|
||||
|
||||
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
|
||||
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
|
||||
cmd2 = [
|
||||
ffmpeg_path, '-y',
|
||||
'-i', str(output), # Input: merged audio
|
||||
'-i', str(metadata_file), # Input: FFMetadata file
|
||||
'-c:a', 'copy', # Copy audio without re-encoding
|
||||
'-threads', '0', # Use all threads
|
||||
'-map', '0', # Map all from first input
|
||||
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
|
||||
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||
str(temp_output) # Output
|
||||
]
|
||||
|
||||
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
|
||||
|
||||
try:
|
||||
# Run MP4 chapter embedding silently (progress handled by worker thread)
|
||||
_subprocess.run(
|
||||
cmd2,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
stdin=_subprocess.DEVNULL,
|
||||
timeout=600,
|
||||
check=False
|
||||
)
|
||||
|
||||
# Replace original with temp if successful
|
||||
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||
try:
|
||||
import shutil
|
||||
if output.exists():
|
||||
output.unlink()
|
||||
shutil.move(str(temp_output), str(output))
|
||||
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
|
||||
logger.info(f"[merge-file] MP4 chapters embedded successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||
try:
|
||||
temp_output.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
|
||||
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||
else:
|
||||
# For other formats, chapters would require external tools
|
||||
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
|
||||
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
|
||||
|
||||
# Clean up temp files
|
||||
try:
|
||||
concat_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
metadata_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Audio merge error: {e}", file=sys.stderr)
|
||||
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
"""Merge video files."""
|
||||
ffmpeg_path = _shutil.which('ffmpeg')
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create concat demuxer file
|
||||
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||
concat_lines = []
|
||||
for f in files:
|
||||
safe_path = str(f).replace("'", "'\\''")
|
||||
concat_lines.append(f"file '{safe_path}'")
|
||||
|
||||
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||
|
||||
# Build FFmpeg command for video merge
|
||||
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||
|
||||
# Video codec selection
|
||||
if output_format == 'mp4':
|
||||
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
|
||||
elif output_format == 'mkv':
|
||||
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
|
||||
else:
|
||||
cmd.extend(['-c', 'copy']) # Copy without re-encoding
|
||||
|
||||
cmd.append(str(output))
|
||||
|
||||
log(f"Merging {len(files)} video files...", file=sys.stderr)
|
||||
result = _subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
# Clean up concat file
|
||||
try:
|
||||
concat_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = (result.stderr or '').strip()
|
||||
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Video merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_text(files: List[Path], output: Path) -> bool:
|
||||
"""Merge text files."""
|
||||
try:
|
||||
with open(output, 'w', encoding='utf-8') as outf:
|
||||
for i, f in enumerate(files):
|
||||
if i > 0:
|
||||
outf.write('\n---\n') # Separator between files
|
||||
try:
|
||||
content = f.read_text(encoding='utf-8', errors='replace')
|
||||
outf.write(content)
|
||||
except Exception as e:
|
||||
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Text merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||
"""Merge PDF files."""
|
||||
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
|
||||
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
writer = PdfWriter()
|
||||
|
||||
for f in files:
|
||||
try:
|
||||
reader = PdfReader(f)
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
with open(output, 'wb') as outf:
|
||||
writer.write(outf)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"PDF merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="merge-file",
|
||||
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
arg=[
|
||||
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||
CmdletArg("-output", description="Override output file path."),
|
||||
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||
],
|
||||
detail=[
|
||||
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
|
||||
"- Audio files merge with minimal quality loss using specified codec.",
|
||||
"- Video files merge into MP4 or MKV containers.",
|
||||
"- PDF files merge into a single PDF document.",
|
||||
"- Text/document files are concatenated.",
|
||||
"- Output name derived from first file with ' (merged)' suffix.",
|
||||
"- -delete flag removes all source files after successful merge.",
|
||||
],
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
721
cmdlet/screen_shot.py
Normal file
721
cmdlet/screen_shot.py
Normal file
@@ -0,0 +1,721 @@
|
||||
"""Screen-shot cmdlet for capturing screenshots of url in a pipeline.
|
||||
|
||||
This cmdlet processes files through the pipeline and creates screenshots using
|
||||
Playwright, marking them as temporary artifacts for cleanup.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import sys
|
||||
import time
|
||||
import httpx
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from urllib.parse import urlsplit, quote, urljoin
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from API.HTTP import HTTPClient
|
||||
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
|
||||
import pipeline as pipeline_context
|
||||
|
||||
# ============================================================================
|
||||
# CMDLET Metadata Declaration
|
||||
# ============================================================================
|
||||
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Playwright & Screenshot Dependencies
|
||||
# ============================================================================
|
||||
|
||||
try:
|
||||
from playwright.sync_api import (
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
sync_playwright,
|
||||
)
|
||||
HAS_PLAYWRIGHT = True
|
||||
except Exception:
|
||||
HAS_PLAYWRIGHT = False
|
||||
PlaywrightTimeoutError = TimeoutError # type: ignore
|
||||
|
||||
def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore
|
||||
raise RuntimeError(
|
||||
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
|
||||
)
|
||||
|
||||
try:
|
||||
from config import resolve_output_dir
|
||||
except ImportError:
|
||||
try:
|
||||
_parent_dir = str(Path(__file__).parent.parent)
|
||||
if _parent_dir not in sys.path:
|
||||
sys.path.insert(0, _parent_dir)
|
||||
from config import resolve_output_dir
|
||||
except ImportError:
|
||||
resolve_output_dir = None
|
||||
|
||||
# ============================================================================
|
||||
# Screenshot Constants & Configuration
|
||||
# ============================================================================
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200}
|
||||
ARCHIVE_TIMEOUT = 30.0
|
||||
|
||||
# Configurable selectors for specific websites
|
||||
SITE_SELECTORS: Dict[str, List[str]] = {
|
||||
"twitter.com": [
|
||||
"article[role='article']",
|
||||
"div[data-testid='tweet']",
|
||||
"div[data-testid='cellInnerDiv'] article",
|
||||
],
|
||||
"x.com": [
|
||||
"article[role='article']",
|
||||
"div[data-testid='tweet']",
|
||||
"div[data-testid='cellInnerDiv'] article",
|
||||
],
|
||||
"instagram.com": [
|
||||
"article[role='presentation']",
|
||||
"article[role='article']",
|
||||
"div[role='dialog'] article",
|
||||
"section main article",
|
||||
],
|
||||
"reddit.com": [
|
||||
"shreddit-post",
|
||||
"div[data-testid='post-container']",
|
||||
"div[data-click-id='background']",
|
||||
"article",
|
||||
],
|
||||
"rumble.com": [
|
||||
"rumble-player, iframe.rumble",
|
||||
"div.video-item--main",
|
||||
"main article",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
|
||||
class ScreenshotError(RuntimeError):
|
||||
"""Raised when screenshot capture or upload fails."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ScreenshotOptions:
|
||||
"""Options controlling screenshot capture and post-processing."""
|
||||
|
||||
output_dir: Path
|
||||
url: str = ""
|
||||
output_path: Optional[Path] = None
|
||||
full_page: bool = True
|
||||
headless: bool = True
|
||||
wait_after_load: float = 2.0
|
||||
wait_for_article: bool = False
|
||||
replace_video_posters: bool = True
|
||||
tag: Sequence[str] = ()
|
||||
archive: bool = False
|
||||
archive_timeout: float = ARCHIVE_TIMEOUT
|
||||
output_format: Optional[str] = None
|
||||
prefer_platform_target: bool = False
|
||||
target_selectors: Optional[Sequence[str]] = None
|
||||
selector_timeout_ms: int = 10_000
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ScreenshotResult:
|
||||
"""Details about the captured screenshot."""
|
||||
|
||||
path: Path
|
||||
tag_applied: List[str]
|
||||
archive_url: List[str]
|
||||
url: List[str]
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions
|
||||
# ============================================================================
|
||||
|
||||
def _slugify_url(url: str) -> str:
|
||||
"""Convert URL to filesystem-safe slug."""
|
||||
parsed = urlsplit(url)
|
||||
candidate = f"{parsed.netloc}{parsed.path}"
|
||||
if parsed.query:
|
||||
candidate += f"?{parsed.query}"
|
||||
slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
|
||||
slug = slug.strip("-") or "screenshot"
|
||||
return slug[:100]
|
||||
|
||||
|
||||
def _normalise_format(fmt: Optional[str]) -> str:
|
||||
"""Normalize output format to valid values."""
|
||||
if not fmt:
|
||||
return "png"
|
||||
value = fmt.strip().lower()
|
||||
if value in {"jpg", "jpeg"}:
|
||||
return "jpeg"
|
||||
if value in {"png", "pdf"}:
|
||||
return value
|
||||
return "png"
|
||||
|
||||
|
||||
def _format_suffix(fmt: str) -> str:
|
||||
"""Get file suffix for format."""
|
||||
if fmt == "jpeg":
|
||||
return ".jpg"
|
||||
return f".{fmt}"
|
||||
|
||||
|
||||
def _selectors_for_url(url: str) -> List[str]:
|
||||
"""Return a list of likely content selectors for known platforms."""
|
||||
u = url.lower()
|
||||
sels: List[str] = []
|
||||
|
||||
for domain, selectors in SITE_SELECTORS.items():
|
||||
if domain in u:
|
||||
sels.extend(selectors)
|
||||
|
||||
return sels or ["article"]
|
||||
|
||||
|
||||
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
|
||||
"""Best-effort page tweaks for popular platforms before capture."""
|
||||
u = url.lower()
|
||||
|
||||
def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
|
||||
clicks = 0
|
||||
for _ in range(max(1, passes)):
|
||||
for t in texts:
|
||||
try:
|
||||
page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
|
||||
clicks += 1
|
||||
except PlaywrightTimeoutError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
return clicks
|
||||
|
||||
# Dismiss common cookie/consent prompts
|
||||
_try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
|
||||
|
||||
# Platform-specific expansions
|
||||
if "reddit.com" in u:
|
||||
_try_click_texts(["see more", "read more", "show more", "more"])
|
||||
if ("twitter.com" in u) or ("x.com" in u):
|
||||
_try_click_texts(["show more", "more"])
|
||||
if "instagram.com" in u:
|
||||
_try_click_texts(["more", "see more"])
|
||||
if "tiktok.com" in u:
|
||||
_try_click_texts(["more", "see more"])
|
||||
if ("facebook.com" in u) or ("fb.watch" in u):
|
||||
_try_click_texts(["see more", "show more", "more"])
|
||||
if "rumble.com" in u:
|
||||
_try_click_texts(["accept", "agree", "close"])
|
||||
|
||||
|
||||
def _submit_wayback(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Internet Archive Wayback Machine."""
|
||||
encoded = quote(url, safe="/:?=&")
|
||||
with HTTPClient() as client:
|
||||
response = client.get(f"https://web.archive.org/save/{encoded}")
|
||||
response.raise_for_status()
|
||||
content_location = response.headers.get("Content-Location")
|
||||
if content_location:
|
||||
return urljoin("https://web.archive.org", content_location)
|
||||
return str(response.url)
|
||||
|
||||
|
||||
def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Archive.today."""
|
||||
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||
response = client.get(f"https://archive.today/submit/?url={encoded}")
|
||||
response.raise_for_status()
|
||||
final = str(response.url)
|
||||
if final and ("archive.today" in final or "archive.ph" in final):
|
||||
return final
|
||||
return None
|
||||
|
||||
|
||||
def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Archive.ph."""
|
||||
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||
response = client.get(f"https://archive.ph/submit/?url={encoded}")
|
||||
response.raise_for_status()
|
||||
final = str(response.url)
|
||||
if final and "archive.ph" in final:
|
||||
return final
|
||||
return None
|
||||
|
||||
|
||||
def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
|
||||
"""Submit URL to all available archive services."""
|
||||
archives: List[str] = []
|
||||
warnings: List[str] = []
|
||||
for submitter, label in (
|
||||
(_submit_wayback, "wayback"),
|
||||
(_submit_archive_today, "archive.today"),
|
||||
(_submit_archive_ph, "archive.ph"),
|
||||
):
|
||||
try:
|
||||
log(f"Archiving to {label}...", flush=True)
|
||||
archived = submitter(url, timeout)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 429:
|
||||
warnings.append(f"archive {label} rate limited (HTTP 429)")
|
||||
log(f"{label}: Rate limited (HTTP 429)", flush=True)
|
||||
else:
|
||||
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
|
||||
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
|
||||
except httpx.RequestError as exc:
|
||||
warnings.append(f"archive {label} failed: {exc}")
|
||||
log(f"{label}: Connection error: {exc}", flush=True)
|
||||
except Exception as exc:
|
||||
warnings.append(f"archive {label} failed: {exc}")
|
||||
log(f"{label}: {exc}", flush=True)
|
||||
else:
|
||||
if archived:
|
||||
archives.append(archived)
|
||||
log(f"{label}: Success - {archived}", flush=True)
|
||||
else:
|
||||
log(f"{label}: No archive link returned", flush=True)
|
||||
return archives, warnings
|
||||
|
||||
|
||||
def _prepare_output_path(options: ScreenshotOptions) -> Path:
|
||||
"""Prepare and validate output path for screenshot."""
|
||||
ensure_directory(options.output_dir)
|
||||
explicit_format = _normalise_format(options.output_format) if options.output_format else None
|
||||
inferred_format: Optional[str] = None
|
||||
if options.output_path is not None:
|
||||
path = options.output_path
|
||||
if not path.is_absolute():
|
||||
path = options.output_dir / path
|
||||
suffix = path.suffix.lower()
|
||||
if suffix:
|
||||
inferred_format = _normalise_format(suffix[1:])
|
||||
else:
|
||||
stamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{_slugify_url(options.url)}_{stamp}"
|
||||
path = options.output_dir / filename
|
||||
final_format = explicit_format or inferred_format or "png"
|
||||
if not path.suffix:
|
||||
path = path.with_suffix(_format_suffix(final_format))
|
||||
else:
|
||||
current_suffix = path.suffix.lower()
|
||||
expected = _format_suffix(final_format)
|
||||
if current_suffix != expected:
|
||||
path = path.with_suffix(expected)
|
||||
options.output_format = final_format
|
||||
return unique_path(path)
|
||||
|
||||
|
||||
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
|
||||
"""Capture screenshot using Playwright."""
|
||||
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
|
||||
playwright = None
|
||||
browser = None
|
||||
context = None
|
||||
try:
|
||||
debug("Starting Playwright...", flush=True)
|
||||
playwright = sync_playwright().start()
|
||||
log("Launching Chromium browser...", flush=True)
|
||||
format_name = _normalise_format(options.output_format)
|
||||
headless = options.headless or format_name == "pdf"
|
||||
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
|
||||
|
||||
if format_name == "pdf" and not options.headless:
|
||||
warnings.append("pdf output requires headless Chromium; overriding headless mode")
|
||||
browser = playwright.chromium.launch(
|
||||
headless=headless,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
log("Creating browser context...", flush=True)
|
||||
context = browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport=DEFAULT_VIEWPORT,
|
||||
ignore_https_errors=True,
|
||||
)
|
||||
page = context.new_page()
|
||||
log(f"Navigating to {options.url}...", flush=True)
|
||||
try:
|
||||
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
|
||||
log("Page loaded successfully", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("navigation timeout; capturing current page state")
|
||||
log("Navigation timeout; proceeding with current state", flush=True)
|
||||
|
||||
# Skip article lookup by default (wait_for_article defaults to False)
|
||||
if options.wait_for_article:
|
||||
try:
|
||||
log("Waiting for article element...", flush=True)
|
||||
page.wait_for_selector("article", timeout=10_000)
|
||||
log("Article element found", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("<article> selector not found; capturing fallback")
|
||||
log("Article element not found; using fallback", flush=True)
|
||||
|
||||
if options.wait_after_load > 0:
|
||||
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
if options.replace_video_posters:
|
||||
log("Replacing video elements with posters...", flush=True)
|
||||
page.evaluate(
|
||||
"""
|
||||
document.querySelectorAll('video').forEach(v => {
|
||||
if (v.poster) {
|
||||
const img = document.createElement('img');
|
||||
img.src = v.poster;
|
||||
img.style.maxWidth = '100%';
|
||||
img.style.borderRadius = '12px';
|
||||
v.replaceWith(img);
|
||||
}
|
||||
});
|
||||
"""
|
||||
)
|
||||
# Attempt platform-specific target capture if requested (and not PDF)
|
||||
element_captured = False
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
log("Attempting platform-specific content capture...", flush=True)
|
||||
try:
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
except Exception as e:
|
||||
debug(f"[_capture] Platform preprocess failed: {e}")
|
||||
pass
|
||||
selectors = list(options.target_selectors or [])
|
||||
if not selectors:
|
||||
selectors = _selectors_for_url(options.url)
|
||||
|
||||
debug(f"[_capture] Trying selectors: {selectors}")
|
||||
for sel in selectors:
|
||||
try:
|
||||
log(f"Trying selector: {sel}", flush=True)
|
||||
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
|
||||
except PlaywrightTimeoutError:
|
||||
log(f"Selector not found: {sel}", flush=True)
|
||||
continue
|
||||
try:
|
||||
if el is not None:
|
||||
log(f"Found element with selector: {sel}", flush=True)
|
||||
try:
|
||||
el.scroll_into_view_if_needed(timeout=1000)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"Capturing element to {destination}...", flush=True)
|
||||
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||
element_captured = True
|
||||
log("Element captured successfully", flush=True)
|
||||
break
|
||||
except Exception as exc:
|
||||
warnings.append(f"element capture failed for '{sel}': {exc}")
|
||||
log(f"Failed to capture element: {exc}", flush=True)
|
||||
# Fallback to default capture paths
|
||||
if element_captured:
|
||||
pass
|
||||
elif format_name == "pdf":
|
||||
log("Generating PDF...", flush=True)
|
||||
page.emulate_media(media="print")
|
||||
page.pdf(path=str(destination), print_background=True)
|
||||
log(f"PDF saved to {destination}", flush=True)
|
||||
else:
|
||||
log(f"Capturing full page to {destination}...", flush=True)
|
||||
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
if options.full_page:
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
else:
|
||||
article = page.query_selector("article")
|
||||
if article is not None:
|
||||
article_kwargs = dict(screenshot_kwargs)
|
||||
article_kwargs.pop("full_page", None)
|
||||
article.screenshot(**article_kwargs)
|
||||
else:
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
log(f"Screenshot saved to {destination}", flush=True)
|
||||
except Exception as exc:
|
||||
debug(f"[_capture] Exception: {exc}")
|
||||
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
|
||||
finally:
|
||||
log("Cleaning up browser resources...", flush=True)
|
||||
with contextlib.suppress(Exception):
|
||||
if context is not None:
|
||||
context.close()
|
||||
with contextlib.suppress(Exception):
|
||||
if browser is not None:
|
||||
browser.close()
|
||||
with contextlib.suppress(Exception):
|
||||
if playwright is not None:
|
||||
playwright.stop()
|
||||
log("Cleanup complete", flush=True)
|
||||
|
||||
|
||||
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
"""Capture a screenshot for the given options."""
|
||||
debug(f"[_capture_screenshot] Preparing capture for {options.url}")
|
||||
destination = _prepare_output_path(options)
|
||||
warnings: List[str] = []
|
||||
_capture(options, destination, warnings)
|
||||
|
||||
# Build URL list from captured url and any archives
|
||||
url: List[str] = [options.url] if options.url else []
|
||||
archive_url: List[str] = []
|
||||
if options.archive and options.url:
|
||||
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
|
||||
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||
archive_url.extend(archives)
|
||||
warnings.extend(archive_warnings)
|
||||
if archives:
|
||||
url = unique_preserve_order([*url, *archives])
|
||||
|
||||
applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))
|
||||
|
||||
return ScreenshotResult(
|
||||
path=destination,
|
||||
tag_applied=applied_tag,
|
||||
archive_url=archive_url,
|
||||
url=url,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Main Cmdlet Function
|
||||
# ============================================================================
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Take screenshots of url in the pipeline.
|
||||
|
||||
Accepts:
|
||||
- Single result object (dict or PipeObject) with 'path' field
|
||||
- List of result objects to screenshot each
|
||||
- Direct URL as string
|
||||
|
||||
Emits PipeObject-formatted results for each screenshot with:
|
||||
- action: 'cmdlet:screen-shot'
|
||||
- is_temp: True (screenshots are temporary artifacts)
|
||||
- parent_id: hash of the original file/URL
|
||||
|
||||
Screenshots are created using Playwright and marked as temporary
|
||||
so they can be cleaned up later with the cleanup cmdlet.
|
||||
"""
|
||||
from ._shared import parse_cmdlet_args
|
||||
|
||||
debug(f"[_run] screen-shot invoked with args: {args}")
|
||||
|
||||
# Help check
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
if not HAS_PLAYWRIGHT:
|
||||
log(
|
||||
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# ========================================================================
|
||||
# ARGUMENT PARSING
|
||||
# ========================================================================
|
||||
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
format_value = parsed.get("format")
|
||||
storage_value = parsed.get("storage")
|
||||
selector_arg = parsed.get("selector")
|
||||
selectors = [selector_arg] if selector_arg else []
|
||||
archive_enabled = parsed.get("archive", False)
|
||||
|
||||
# Positional URL argument (if provided)
|
||||
url_arg = parsed.get("url")
|
||||
positional_url = [str(url_arg)] if url_arg else []
|
||||
|
||||
# ========================================================================
|
||||
# INPUT PROCESSING - Extract url from pipeline or command arguments
|
||||
# ========================================================================
|
||||
|
||||
piped_results = normalize_result_input(result)
|
||||
url_to_process = []
|
||||
|
||||
# Extract url from piped results
|
||||
if piped_results:
|
||||
for item in piped_results:
|
||||
url = (
|
||||
get_field(item, 'path')
|
||||
or get_field(item, 'url')
|
||||
or get_field(item, 'target')
|
||||
)
|
||||
|
||||
if url:
|
||||
url_to_process.append(str(url))
|
||||
|
||||
# Use positional arguments if no pipeline input
|
||||
if not url_to_process and positional_url:
|
||||
url_to_process = positional_url
|
||||
|
||||
if not url_to_process:
|
||||
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
debug(f"[_run] url to process: {url_to_process}")
|
||||
|
||||
# ========================================================================
|
||||
# OUTPUT DIRECTORY RESOLUTION - Priority chain
|
||||
# ========================================================================
|
||||
|
||||
screenshot_dir: Optional[Path] = None
|
||||
|
||||
# Primary: Use --storage if provided (highest priority)
|
||||
if storage_value:
|
||||
try:
|
||||
screenshot_dir = SharedArgs.resolve_storage(storage_value)
|
||||
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
|
||||
except ValueError as e:
|
||||
log(str(e), file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Secondary: Use config-based resolver ONLY if --storage not provided
|
||||
if screenshot_dir is None and resolve_output_dir is not None:
|
||||
try:
|
||||
screenshot_dir = resolve_output_dir(config)
|
||||
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
|
||||
if screenshot_dir is None and config and config.get("outfile"):
|
||||
try:
|
||||
screenshot_dir = Path(config["outfile"]).expanduser()
|
||||
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default: User's Videos directory
|
||||
if screenshot_dir is None:
|
||||
screenshot_dir = Path.home() / "Videos"
|
||||
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
|
||||
|
||||
ensure_directory(screenshot_dir)
|
||||
|
||||
# ========================================================================
|
||||
# PREPARE SCREENSHOT OPTIONS
|
||||
# ========================================================================
|
||||
|
||||
format_name = _normalise_format(format_value)
|
||||
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
|
||||
target_selectors = filtered_selectors if filtered_selectors else None
|
||||
|
||||
all_emitted = []
|
||||
exit_code = 0
|
||||
# ========================================================================
|
||||
# PROCESS url AND CAPTURE SCREENSHOTS
|
||||
# ========================================================================
|
||||
|
||||
for url in url_to_process:
|
||||
# Validate URL format
|
||||
if not url.lower().startswith(("http://", "https://", "file://")):
|
||||
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
try:
|
||||
# Create screenshot with provided options
|
||||
options = ScreenshotOptions(
|
||||
url=url,
|
||||
output_dir=screenshot_dir,
|
||||
output_format=format_name,
|
||||
archive=archive_enabled,
|
||||
target_selectors=target_selectors,
|
||||
prefer_platform_target=False,
|
||||
wait_for_article=False,
|
||||
full_page=True,
|
||||
)
|
||||
|
||||
screenshot_result = _capture_screenshot(options)
|
||||
|
||||
# Log results and warnings
|
||||
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
|
||||
if screenshot_result.archive_url:
|
||||
log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True)
|
||||
for warning in screenshot_result.warnings:
|
||||
log(f"Warning: {warning}", flush=True)
|
||||
|
||||
# Compute hash of screenshot file
|
||||
screenshot_hash = None
|
||||
try:
|
||||
with open(screenshot_result.path, 'rb') as f:
|
||||
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create PipeObject result - marked as TEMP since derivative artifact
|
||||
pipe_obj = create_pipe_object_result(
|
||||
source='screenshot',
|
||||
identifier=Path(screenshot_result.path).stem,
|
||||
file_path=str(screenshot_result.path),
|
||||
cmdlet_name='screen-shot',
|
||||
title=f"Screenshot: {Path(screenshot_result.path).name}",
|
||||
hash_value=screenshot_hash,
|
||||
is_temp=True,
|
||||
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
|
||||
extra={
|
||||
'source_url': url,
|
||||
'archive_url': screenshot_result.archive_url,
|
||||
'url': screenshot_result.url,
|
||||
'target': str(screenshot_result.path), # Explicit target for add-file
|
||||
}
|
||||
)
|
||||
|
||||
# Emit the result so downstream cmdlet (like add-file) can use it
|
||||
pipeline_context.emit(pipe_obj)
|
||||
all_emitted.append(pipe_obj)
|
||||
|
||||
except ScreenshotError as exc:
|
||||
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
exit_code = 1
|
||||
except Exception as exc:
|
||||
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
exit_code = 1
|
||||
|
||||
if not all_emitted:
|
||||
log(f"No screenshots were successfully captured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Log completion message
|
||||
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
|
||||
|
||||
return exit_code
|
||||
CMDLET = Cmdlet(
|
||||
name="screen-shot",
|
||||
summary="Capture a website screenshot",
|
||||
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
|
||||
alias=["screenshot", "ss"],
|
||||
arg=[
|
||||
SharedArgs.URL,
|
||||
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
|
||||
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
|
||||
|
||||
],
|
||||
detail=
|
||||
["""
|
||||
|
||||
|
||||
|
||||
"""]
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
169
cmdlet/search_provider.py
Normal file
169
cmdlet/search_provider.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
import sys
|
||||
import json
|
||||
import uuid
|
||||
import importlib
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from ProviderCore.registry import get_search_provider, list_search_providers
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, should_show_help
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from config import get_local_storage_path
|
||||
except Exception: # pragma: no cover
|
||||
get_local_storage_path = None # type: ignore
|
||||
|
||||
class Search_Provider(Cmdlet):
|
||||
"""Search external content providers."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="search-provider",
|
||||
summary="Search external providers (bandcamp, libgen, soulseek, youtube)",
|
||||
usage="search-provider <provider> <query> [-limit N]",
|
||||
arg=[
|
||||
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"),
|
||||
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
|
||||
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
|
||||
],
|
||||
detail=[
|
||||
"Search external content providers:",
|
||||
"- bandcamp: Search for music albums/tracks",
|
||||
" Example: search-provider bandcamp \"artist:altrusian grace\"",
|
||||
"- libgen: Search Library Genesis for books",
|
||||
" Example: search-provider libgen \"python programming\"",
|
||||
"- soulseek: Search P2P network for music",
|
||||
" Example: search-provider soulseek \"pink floyd\"",
|
||||
"- youtube: Search YouTube for videos",
|
||||
" Example: search-provider youtube \"tutorial\"",
|
||||
"",
|
||||
"Query syntax:",
|
||||
"- bandcamp: Use 'artist:Name' to search by artist",
|
||||
"- libgen: Supports isbn:, author:, title: prefixes",
|
||||
"- soulseek: Plain text search",
|
||||
"- youtube: Plain text search",
|
||||
"",
|
||||
"Results can be piped to other cmdlet:",
|
||||
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
|
||||
],
|
||||
exec=self.run
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Execute search-provider cmdlet."""
|
||||
if should_show_help(args):
|
||||
ctx.emit(self.__dict__)
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
if len(args) < 2:
|
||||
log("Error: search-provider requires <provider> and <query> arguments", file=sys.stderr)
|
||||
log(f"Usage: {self.usage}", file=sys.stderr)
|
||||
log("Available providers:", file=sys.stderr)
|
||||
providers = list_search_providers(config)
|
||||
for name, available in sorted(providers.items()):
|
||||
status = "✓" if available else "✗"
|
||||
log(f" {status} {name}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
provider_name = args[0]
|
||||
query = args[1]
|
||||
|
||||
# Parse optional limit
|
||||
limit = 50
|
||||
if len(args) >= 4 and args[2] in ("-limit", "--limit"):
|
||||
try:
|
||||
limit = int(args[3])
|
||||
except ValueError:
|
||||
log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr)
|
||||
|
||||
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
|
||||
|
||||
# Get provider
|
||||
provider = get_search_provider(provider_name, config)
|
||||
if not provider:
|
||||
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
|
||||
log("Available providers:", file=sys.stderr)
|
||||
providers = list_search_providers(config)
|
||||
for name, available in sorted(providers.items()):
|
||||
if available:
|
||||
log(f" - {name}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from API.folder import API_folder_store
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Use context manager to ensure database is always closed
|
||||
with API_folder_store(library_root) as db:
|
||||
try:
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search-provider",
|
||||
title=f"Search: {query}",
|
||||
description=f"Provider: {provider_name}, Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
results_list = []
|
||||
import result_table
|
||||
importlib.reload(result_table)
|
||||
from result_table import ResultTable
|
||||
|
||||
table_title = f"Search: {query} [{provider_name}]"
|
||||
preserve_order = provider_name.lower() in ('youtube', 'openlibrary')
|
||||
table = ResultTable(table_title).set_preserve_order(preserve_order)
|
||||
table.set_table(provider_name)
|
||||
|
||||
debug(f"[search-provider] Calling {provider_name}.search()")
|
||||
results = provider.search(query, limit=limit)
|
||||
debug(f"[search-provider] Got {len(results)} results")
|
||||
|
||||
if not results:
|
||||
log(f"No results found for query: {query}", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
# Emit results for pipeline
|
||||
for search_result in results:
|
||||
item_dict = search_result.to_dict() if hasattr(search_result, 'to_dict') else dict(search_result)
|
||||
|
||||
# Ensure table field is set (should be by provider, but just in case)
|
||||
if 'table' not in item_dict:
|
||||
item_dict['table'] = provider_name
|
||||
|
||||
table.add_result(search_result) # ResultTable handles SearchResult objects
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
|
||||
log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error searching {provider_name}: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
debug(traceback.format_exc())
|
||||
try:
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
except Exception:
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet instance
|
||||
Search_Provider_Instance = Search_Provider()
|
||||
345
cmdlet/search_store.py
Normal file
345
cmdlet/search_store.py
Normal file
@@ -0,0 +1,345 @@
|
||||
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusNetwork = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tag: str | None = None
|
||||
hash: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tag:
|
||||
payload["tag"] = self.tag
|
||||
if self.hash:
|
||||
payload["hash"] = self.hash
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
|
||||
|
||||
|
||||
class Search_Store(Cmdlet):
|
||||
"""Class-based search-store cmdlet for searching storage backends."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="search-store",
|
||||
summary="Search storage backends (Folder, Hydrus) for files.",
|
||||
usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
|
||||
arg=[
|
||||
CmdletArg("query", description="Search query string"),
|
||||
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||
CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
|
||||
],
|
||||
detail=[
|
||||
"Search across storage backends: Folder stores and Hydrus instances",
|
||||
"Use -store to search a specific backend by name",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results include hash for downstream commands (get-file, add-tag, etc.)",
|
||||
"Examples:",
|
||||
"search-store foo # Search all storage backends",
|
||||
"search-store -store home '*' # Search 'home' Hydrus instance",
|
||||
"search-store -store test 'video' # Search 'test' folder store",
|
||||
"search-store song -type audio # Search for audio files",
|
||||
"search-store movie -tag action # Search with tag filter",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
# --- Helper methods -------------------------------------------------
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
ext = str(ext_value or "").strip().lstrip(".")
|
||||
for sep in (" ", "|", "(", "[", "{", ",", ";"):
|
||||
if sep in ext:
|
||||
ext = ext.split(sep, 1)[0]
|
||||
break
|
||||
if "." in ext:
|
||||
ext = ext.split(".")[-1]
|
||||
ext = "".join(ch for ch in ext if ch.isalnum())
|
||||
return ext[:5]
|
||||
|
||||
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Ensure storage results have the necessary fields for result_table display."""
|
||||
store_value = str(payload.get("store") or "").lower()
|
||||
if store_value not in STORAGE_ORIGINS:
|
||||
return payload
|
||||
|
||||
# Ensure we have title field
|
||||
if "title" not in payload:
|
||||
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
|
||||
# Ensure we have ext field
|
||||
if "ext" not in payload:
|
||||
title = str(payload.get("title", ""))
|
||||
path_obj = Path(title)
|
||||
if path_obj.suffix:
|
||||
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
|
||||
else:
|
||||
payload["ext"] = payload.get("ext", "")
|
||||
|
||||
# Ensure size_bytes is present for display (already set by search_file())
|
||||
# result_table will handle formatting it
|
||||
|
||||
# Don't create manual columns - let result_table handle display
|
||||
# This allows the table to respect max_columns and apply consistent formatting
|
||||
return payload
|
||||
|
||||
# --- Execution ------------------------------------------------------
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Search storage backends for files."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
limit = 100
|
||||
searched_backends: List[str] = []
|
||||
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
if low in {"-store", "--store"} and i + 1 < len(args_list):
|
||||
storage_backend = args_list[i + 1]
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = int(args_list[i + 1])
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not arg.startswith("-"):
|
||||
query = f"{query} {arg}".strip() if query else arg
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
store_filter: Optional[str] = None
|
||||
if query:
|
||||
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
|
||||
if match:
|
||||
store_filter = match.group(1).strip() or None
|
||||
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
|
||||
query = re.sub(r"\s{2,}", " ", query)
|
||||
query = query.strip().strip(',')
|
||||
|
||||
if store_filter and not storage_backend:
|
||||
storage_backend = store_filter
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from API.folder import API_folder_store
|
||||
from config import get_local_storage_path
|
||||
import uuid
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Use context manager to ensure database is always closed
|
||||
with API_folder_store(library_root) as db:
|
||||
try:
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search-store",
|
||||
title=f"Search: {query}",
|
||||
description=f"Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
results_list = []
|
||||
import result_table
|
||||
import importlib
|
||||
importlib.reload(result_table)
|
||||
from result_table import ResultTable
|
||||
|
||||
table_title = f"Search: {query}"
|
||||
if storage_backend:
|
||||
table_title += f" [{storage_backend}]"
|
||||
|
||||
table = ResultTable(table_title)
|
||||
|
||||
from Store import Store
|
||||
storage = Store(config=config or {})
|
||||
from Store._base import Store as BaseStore
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if backend_to_search:
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend = storage[backend_to_search]
|
||||
if type(target_backend).search is BaseStore.search:
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
results = target_backend.search(query, limit=limit)
|
||||
else:
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
from Store.HydrusNetwork import HydrusNetwork
|
||||
|
||||
all_results = []
|
||||
for backend_name in storage.list_searchable_backends():
|
||||
try:
|
||||
backend = storage[backend_name]
|
||||
if isinstance(backend, HydrusNetwork) and not hydrus_available:
|
||||
continue
|
||||
searched_backends.append(backend_name)
|
||||
|
||||
backend_results = backend.search(query, limit=limit - len(all_results))
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
break
|
||||
except Exception as exc:
|
||||
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||
results = all_results[:limit]
|
||||
|
||||
def _format_storage_label(name: str) -> str:
|
||||
clean = str(name or "").strip()
|
||||
if not clean:
|
||||
return "Unknown"
|
||||
return clean.replace("_", " ").title()
|
||||
|
||||
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
|
||||
for item in results or []:
|
||||
store = get_field(item, "store")
|
||||
if not store:
|
||||
continue
|
||||
key = str(store).lower()
|
||||
if key not in storage_counts:
|
||||
storage_counts[key] = 0
|
||||
storage_counts[key] += 1
|
||||
|
||||
if storage_counts or query:
|
||||
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
|
||||
summary_line = table.set_storage_summary(display_counts, query, inline=True)
|
||||
if summary_line:
|
||||
table.title = summary_line
|
||||
|
||||
if results:
|
||||
for item in results:
|
||||
def _as_dict(obj: Any) -> Dict[str, Any]:
|
||||
if isinstance(obj, dict):
|
||||
return dict(obj)
|
||||
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
|
||||
return obj.to_dict() # type: ignore[arg-type]
|
||||
return {"title": str(obj)}
|
||||
|
||||
item_dict = _as_dict(item)
|
||||
if store_filter:
|
||||
store_val = str(item_dict.get("store") or "").lower()
|
||||
if store_filter != store_val:
|
||||
continue
|
||||
normalized = self._ensure_storage_columns(item_dict)
|
||||
|
||||
# Make hash/store available for downstream cmdlet without rerunning search
|
||||
hash_val = normalized.get("hash")
|
||||
store_val = normalized.get("store") or item_dict.get("store")
|
||||
if hash_val and not normalized.get("hash"):
|
||||
normalized["hash"] = hash_val
|
||||
if store_val and not normalized.get("store"):
|
||||
normalized["store"] = store_val
|
||||
|
||||
table.add_result(normalized)
|
||||
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
|
||||
# Debug: Verify table rows match items list
|
||||
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
|
||||
if len(table.rows) != len(results_list):
|
||||
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Search failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
try:
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
except Exception:
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
CMDLET = Search_Store()
|
||||
297
cmdlet/trim_file.py
Normal file
297
cmdlet/trim_file.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""Trim a media file using ffmpeg."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
import shutil
|
||||
import re
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.utils import sha256_file
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
extract_tag_from_result,
|
||||
extract_title_from_result
|
||||
)
|
||||
import pipeline as ctx
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="trim-file",
|
||||
summary="Trim a media file using ffmpeg.",
|
||||
usage="trim-file [-path <path>] -range <start-end> [-delete]",
|
||||
arg=[
|
||||
CmdletArg("-path", description="Path to the file (optional if piped)."),
|
||||
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55' or '00:03:45-00:03:55')."),
|
||||
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
|
||||
],
|
||||
detail=[
|
||||
"Creates a new file with 'clip_' prefix in the filename/title.",
|
||||
"Inherits tag values from the source file.",
|
||||
"Adds a relationship to the source file (if hash is available).",
|
||||
"Output can be piped to add-file.",
|
||||
]
|
||||
)
|
||||
|
||||
def _parse_time(time_str: str) -> float:
|
||||
"""Convert time string (HH:MM:SS or MM:SS or SS) to seconds."""
|
||||
parts = time_str.strip().split(':')
|
||||
if len(parts) == 3:
|
||||
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
|
||||
elif len(parts) == 2:
|
||||
return float(parts[0]) * 60 + float(parts[1])
|
||||
elif len(parts) == 1:
|
||||
return float(parts[0])
|
||||
else:
|
||||
raise ValueError(f"Invalid time format: {time_str}")
|
||||
|
||||
def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time: str) -> bool:
|
||||
"""Trim media file using ffmpeg."""
|
||||
ffmpeg_path = shutil.which('ffmpeg')
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
# Calculate duration to avoid seeking issues if possible, or just use -to
|
||||
# Using -ss before -i is faster (input seeking) but might be less accurate.
|
||||
# Using -ss after -i is slower (output seeking) but accurate.
|
||||
# For trimming, accuracy is usually preferred, but for long files input seeking is better.
|
||||
# We'll use input seeking (-ss before -i) and -to.
|
||||
|
||||
cmd = [
|
||||
ffmpeg_path, '-y',
|
||||
'-ss', start_time,
|
||||
'-i', str(input_path),
|
||||
'-to', end_time,
|
||||
'-c', 'copy', # Stream copy for speed and quality preservation
|
||||
'-map_metadata', '0', # Copy metadata
|
||||
str(output_path)
|
||||
]
|
||||
|
||||
# If stream copy fails (e.g. cutting not on keyframe), we might need re-encoding.
|
||||
# But let's try copy first as it's standard for "trimming" without quality loss.
|
||||
# Note: -to with input seeking (-ss before -i) resets timestamp, so -to refers to duration?
|
||||
# No, -to refers to position in output if used after -ss?
|
||||
# Actually, if -ss is before -i, the timestamps are reset to 0.
|
||||
# So -to should be (end - start).
|
||||
# Alternatively, use -t (duration).
|
||||
|
||||
try:
|
||||
s = _parse_time(start_time)
|
||||
e = _parse_time(end_time)
|
||||
duration = e - s
|
||||
if duration <= 0:
|
||||
log(f"Invalid range: start {start_time} >= end {end_time}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
cmd = [
|
||||
ffmpeg_path, '-y',
|
||||
'-ss', start_time,
|
||||
'-i', str(input_path),
|
||||
'-t', str(duration),
|
||||
'-c', 'copy',
|
||||
'-map_metadata', '0',
|
||||
str(output_path)
|
||||
]
|
||||
|
||||
debug(f"Running ffmpeg: {' '.join(cmd)}")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Trim a media file."""
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
range_arg = parsed.get("range")
|
||||
if not range_arg or '-' not in range_arg:
|
||||
log("Error: -range argument required (format: start-end)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
start_str, end_str = range_arg.split('-', 1)
|
||||
|
||||
delete_original = parsed.get("delete", False)
|
||||
path_arg = parsed.get("path")
|
||||
|
||||
# Collect inputs
|
||||
inputs = normalize_result_input(result)
|
||||
|
||||
# If path arg provided, add it to inputs
|
||||
if path_arg:
|
||||
inputs.append({"path": path_arg})
|
||||
|
||||
if not inputs:
|
||||
log("No input files provided.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
success_count = 0
|
||||
|
||||
for item in inputs:
|
||||
# Resolve file path
|
||||
file_path = None
|
||||
if isinstance(item, dict):
|
||||
file_path = item.get("path") or item.get("target")
|
||||
elif hasattr(item, "path"):
|
||||
file_path = item.path
|
||||
elif isinstance(item, str):
|
||||
file_path = item
|
||||
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
path_obj = Path(file_path)
|
||||
if not path_obj.exists():
|
||||
log(f"File not found: {file_path}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Determine output path
|
||||
# Prepend clip_ to filename
|
||||
new_filename = f"clip_{path_obj.name}"
|
||||
output_path = path_obj.parent / new_filename
|
||||
|
||||
# Trim
|
||||
log(f"Trimming {path_obj.name} ({start_str} to {end_str})...", file=sys.stderr)
|
||||
if _trim_media(path_obj, output_path, start_str, end_str):
|
||||
log(f"Created clip: {output_path}", file=sys.stderr)
|
||||
success_count += 1
|
||||
|
||||
# Prepare result for pipeline
|
||||
|
||||
# 1. Get source hash for relationship
|
||||
source_hash = None
|
||||
if isinstance(item, dict):
|
||||
source_hash = item.get("hash")
|
||||
elif hasattr(item, "hash"):
|
||||
source_hash = item.hash
|
||||
|
||||
if not source_hash:
|
||||
try:
|
||||
source_hash = sha256_file(path_obj)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Get tag values
|
||||
tags = extract_tag_from_result(item)
|
||||
|
||||
# 3. Get title and modify it
|
||||
title = extract_title_from_result(item)
|
||||
if not title:
|
||||
title = path_obj.stem
|
||||
|
||||
new_title = f"clip_{title}"
|
||||
|
||||
# Update title tag if present
|
||||
new_tags = []
|
||||
has_title_tag = False
|
||||
for t in tags:
|
||||
if t.lower().startswith("title:"):
|
||||
new_tags.append(f"title:{new_title}")
|
||||
has_title_tag = True
|
||||
else:
|
||||
new_tags.append(t)
|
||||
|
||||
if not has_title_tag:
|
||||
new_tags.append(f"title:{new_title}")
|
||||
|
||||
# 4. Calculate clip hash and update original file's relationships
|
||||
clip_hash = None
|
||||
try:
|
||||
clip_hash = sha256_file(output_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if source_hash and clip_hash:
|
||||
# Update original file in local DB if possible
|
||||
try:
|
||||
from config import get_local_storage_path
|
||||
from API.folder import API_folder_store
|
||||
|
||||
storage_path = get_local_storage_path(config)
|
||||
if storage_path:
|
||||
with API_folder_store(storage_path) as db:
|
||||
# Get original file metadata
|
||||
# We need to find the original file by hash or path
|
||||
# Try path first
|
||||
orig_meta = db.get_metadata(path_obj)
|
||||
if not orig_meta and source_hash:
|
||||
# Try by hash
|
||||
orig_path_resolved = db.search_hash(source_hash)
|
||||
if orig_path_resolved:
|
||||
orig_meta = db.get_metadata(orig_path_resolved)
|
||||
|
||||
if orig_meta:
|
||||
# Update relationships
|
||||
rels = orig_meta.get("relationships", {})
|
||||
if not isinstance(rels, dict):
|
||||
rels = {}
|
||||
|
||||
# Add clip as "derivative" (since original is the source)
|
||||
if "derivative" not in rels:
|
||||
rels["derivative"] = []
|
||||
|
||||
if clip_hash not in rels["derivative"]:
|
||||
rels["derivative"].append(clip_hash)
|
||||
|
||||
# Save back to DB
|
||||
# We need to preserve other metadata
|
||||
orig_meta["relationships"] = rels
|
||||
|
||||
# Ensure hash is set in metadata if we have it
|
||||
if source_hash and not orig_meta.get("hash"):
|
||||
orig_meta["hash"] = source_hash
|
||||
|
||||
# We need the path to save
|
||||
save_path = Path(orig_meta.get("path") or path_obj)
|
||||
db.save_metadata(save_path, orig_meta)
|
||||
log(f"Updated relationship for original file: {save_path.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Failed to update original file relationships: {e}", file=sys.stderr)
|
||||
|
||||
# 5. Construct result
|
||||
result_dict = {
|
||||
"path": str(output_path),
|
||||
"title": new_title,
|
||||
"tag": new_tags,
|
||||
"media_kind": "video", # Assumption, or derive
|
||||
"hash": clip_hash, # Pass calculated hash
|
||||
"relationships": {
|
||||
# The source is the KING of this clip
|
||||
"king": [source_hash] if source_hash else []
|
||||
}
|
||||
}
|
||||
|
||||
# Emit result
|
||||
ctx.emit(result_dict)
|
||||
|
||||
# Delete original if requested
|
||||
if delete_original:
|
||||
try:
|
||||
path_obj.unlink()
|
||||
log(f"Deleted original file: {path_obj}", file=sys.stderr)
|
||||
# Also try to delete sidecars?
|
||||
# Maybe leave that to user or cleanup cmdlet
|
||||
except Exception as e:
|
||||
log(f"Failed to delete original: {e}", file=sys.stderr)
|
||||
|
||||
else:
|
||||
log(f"Failed to trim {path_obj.name}", file=sys.stderr)
|
||||
|
||||
return 0 if success_count > 0 else 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
Reference in New Issue
Block a user