This commit is contained in:
nose
2025-12-12 21:55:38 -08:00
parent e2ffcab030
commit 85750247cc
78 changed files with 5726 additions and 6239 deletions

93
cmdlet/__init__.py Normal file
View File

@@ -0,0 +1,93 @@
from __future__ import annotations
from typing import Any, Callable, Dict, Iterable, Sequence
from importlib import import_module as _import_module
# A cmdlet is a callable taking (result, args, config) -> int
Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
# Registry of command-name -> cmdlet function
REGISTRY: Dict[str, Cmdlet] = {}
def _normalize_cmd_name(name: str) -> str:
return str(name or "").replace('_', '-').lower().strip()
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
"""Register a callable under one or more command names.
This is the single registration mechanism used by both:
- legacy function cmdlet (decorator form)
- class-based cmdlet (Cmdlet.register())
"""
for name in names:
key = _normalize_cmd_name(name)
if key:
REGISTRY[key] = fn
return fn
def register(names: Iterable[str]):
"""Decorator to register a function under one or more command names.
Usage:
@register(["add-tags"])
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
return register_callable(names, fn)
return _wrap
def get(cmd_name: str) -> Cmdlet | None:
return REGISTRY.get(_normalize_cmd_name(cmd_name))
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
# cmdlet self-register when instantiated via their __init__ method
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if not (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
):
continue
mod_name = filename[:-3]
# Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file)
# Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe)
if "_" not in mod_name:
continue
try:
_import_module(f".{mod_name}", __name__)
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlet
try:
from cmdnat import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable
pass
# Import root-level modules that also register cmdlet
for _root_mod in ("select_cmdlet",):
try:
_import_module(_root_mod)
except Exception:
# Allow missing optional modules
continue
# Also import helper modules that register cmdlet
try:
import API.alldebrid as _alldebrid
except Exception:
pass

1708
cmdlet/_shared.py Normal file

File diff suppressed because it is too large Load Diff

955
cmdlet/add_file.py Normal file
View File

@@ -0,0 +1,955 @@
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
from pathlib import Path
import sys
import shutil
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
from Store import Store
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
)
from ._shared import collapse_namespace_tag
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
from metadata import write_metadata
# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
class Add_File(Cmdlet):
"""Add file into the DB"""
def __init__(self) -> None:
"""Initialize add-file cmdlet."""
super().__init__(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del"),
],
detail=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Initialize state
path_arg = parsed.get("path")
location = parsed.get("store") # Fixed: was "storage", should be "store"
provider_name = parsed.get("provider")
delete_after = parsed.get("delete", False)
# Coerce result to PipeObject; if result is a list, prefer the first element
effective_result = result
if isinstance(result, list) and result:
first_item = result[0]
# Prefer first item if it's a dict or PipeObject
if isinstance(first_item, (dict, )):
effective_result = first_item
pipe_obj = coerce_to_pipe_object(effective_result, path_arg)
# Debug: Log input result details
debug(f"[add-file] INPUT result type={type(result).__name__}")
if isinstance(result, list):
debug(f"[add-file] INPUT result is list with {len(result)} items")
if result and isinstance(result[0], dict):
first = result[0]
hash_val = first.get('hash')
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}")
elif isinstance(result, dict):
hash_val = result.get('hash')
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}")
# Debug: Log parsed arguments
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
# Resolve source - returns (media_path_or_url, file_hash)
media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config)
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
if not media_path_or_url:
debug(f"[add-file] ERROR: Could not resolve source file/URL")
return 1
# Update pipe_obj with resolved path
pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url)
# Check if it's a URL before validating as file
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL target, delegating to download-data: {media_path_or_url}")
return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config)
# Convert to Path and validate
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
# Validate source
if not self._validate_source(media_path):
debug(f"[add-file] ERROR: Source validation failed for {media_path}")
return 1
# Debug: Log execution path decision
debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}")
debug(f" media_path={media_path}, exists={media_path.exists()}")
# Execute transfer based on destination (using Store registry)
if provider_name:
debug(f"[add-file] ROUTE: file provider upload")
return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after)
elif location:
# Check if location is a registered backend name
try:
store = Store(config)
backends = store.list_backends()
if location in backends:
debug(f"[add-file] ROUTE: storage backend '{location}'")
return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after)
else:
# Treat as local export path
debug(f"[add-file] ROUTE: local export to path '{location}'")
return self._handle_local_export(media_path, location, pipe_obj, config, delete_after)
except Exception as exc:
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
return 1
else:
debug(f"[add-file] ERROR: No location or provider specified")
log(f"No storage location or provider specified", file=sys.stderr)
return 1
@staticmethod
def _resolve_source(
result: Any,
path_arg: Optional[str],
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[Optional[Path | str], Optional[str]]:
"""Resolve the source file path from args or pipeline result.
PRIORITY: hash+store pattern is preferred over path-based resolution.
This ensures consistency when @N selections pass hash+store identifiers.
Returns (media_path_or_url, file_hash)
where media_path_or_url can be a Path object or a URL string.
"""
# PRIORITY 1: Try hash+store from result dict (most reliable for @N selections)
if isinstance(result, dict):
result_hash = result.get("hash")
result_store = result.get("store")
if result_hash and result_store:
debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}")
# Use get_file to retrieve from the specific store
try:
store = Store(config)
if result_store in store.list_backends():
backend = store[result_store]
media_path = backend.get_file(result_hash)
if isinstance(media_path, Path) and media_path.exists():
pipe_obj.path = str(media_path)
debug(f"[add-file] Retrieved file from {result_store}: {media_path}")
return media_path, result_hash
if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")):
pipe_obj.path = media_path
debug(f"[add-file] Retrieved URL from {result_store}: {media_path}")
return media_path, result_hash
except Exception as exc:
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
# PRIORITY 2: Try explicit path argument
if path_arg:
media_path = Path(path_arg)
pipe_obj.path = str(media_path)
debug(f"[add-file] Using explicit path argument: {media_path}")
return media_path, None
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
pipe_path = getattr(pipe_obj, "path", None)
if pipe_path:
pipe_path_str = str(pipe_path)
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
return media_path, file_hash if success else None
# Check if pipe_path is a URL - skip to URL handling below
if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
media_path = Path(pipe_path_str)
return media_path, None
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
pipe_url = getattr(pipe_obj, "url", None)
if pipe_url and isinstance(pipe_url, str):
# Check if it's a URL
if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in pipe_obj.url: {pipe_url}")
return pipe_url, None
# Try from hydrus hash in pipe_obj.extra or hash
hydrus_hash = None
if isinstance(pipe_obj.extra, dict):
hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash")
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config)
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
if isinstance(result, str):
debug(f"Checking result string: {result}")
# Check if result is a URL before treating as file path
if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result string: {result}")
return result, None # Return URL string directly
media_path = Path(result)
pipe_obj.path = str(media_path)
return media_path, None
# Try from result if it's a list (pipeline emits multiple results)
if isinstance(result, list) and result:
first_item = result[0]
# If the first item is a string, it's either a URL or a file path
if isinstance(first_item, str):
debug(f"Checking result list[0]: {first_item}")
if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result list: {first_item}")
return first_item, None # Return URL string directly
media_path = Path(first_item)
pipe_obj.path = str(media_path)
return media_path, None
# If the first item is a dict, interpret it as a PipeObject-style result
if isinstance(first_item, dict):
# Look for path or path-like keys
path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"):
path_candidate = first_item.get("paths")[0]
if path_candidate:
debug(f"Resolved path from result dict: {path_candidate}")
try:
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, first_item.get("hash")
except Exception:
# Fallback to returning string if not a path
return str(path_candidate), first_item.get("hash")
# If first item is a PipeObject object
try:
# models.PipeObject is an actual class; check attribute presence
import models as _models
if isinstance(first_item, _models.PipeObject):
path_candidate = getattr(first_item, "path", None)
if path_candidate:
debug(f"Resolved path from PipeObject: {path_candidate}")
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, getattr(first_item, "hash", None)
except Exception:
pass
debug(f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}")
log("File path could not be resolved")
return None, None
@staticmethod
def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]:
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
try:
client = hydrus_wrapper.get_client(config)
if not client:
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
response = client.get_file_path(file_hash)
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
return None, False
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
return None, False
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
return media_path, True
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
return None, False
@staticmethod
def _validate_source(media_path: Optional[Path]) -> bool:
"""Validate that the source file exists and is supported."""
if media_path is None:
return False
target_str = str(media_path)
# If it's a URL target, we skip file existence checks
if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return True
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
# Validate file type
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
return False
return True
@staticmethod
def _is_url_target(media_path: Optional[Path]) -> bool:
"""Check if the target is a URL that needs downloading."""
if media_path and str(media_path).lower().startswith(("http://", "https://")):
return True
return False
def _delegate_to_download_data(
self,
result: Any,
url_str: str,
location: Optional[str],
provider_name: Optional[str],
args: Sequence[str],
config: Dict[str, Any],
) -> int:
"""Delegate URL handling to download-media cmdlet."""
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
from cmdlet.download_media import CMDLET as dl_cmdlet
dl_args = list(args) if args else []
# Add the URL to the argument list for download-media
dl_args.insert(0, url_str)
# If result has selection_args (like -item from @N selection), include them
if isinstance(result, dict) and "_selection_args" in result:
selection_args = result["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra:
selection_args = result.extra["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
# download-media doesn't support -storage flag
# It downloads to the configured directory, then add-file will handle storage
# Note: Provider uploads (0x0) are not supported via this path
# Call download-media with the URL in args
return dl_cmdlet.run(None, dl_args, config)
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
url: List[str] = []
try:
if isinstance(pipe_obj.extra, dict):
url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or [])
except Exception:
pass
if not url and isinstance(result, dict):
url = list(result.get("url") or result.get("url") or [])
if not url:
url = list(extract_url_from_result(result) or [])
return url
@staticmethod
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
try:
rels = pipe_obj.get_relationships()
if rels:
return rels
except Exception:
pass
if isinstance(result, dict) and result.get("relationships"):
return result.get("relationships")
try:
return extract_relationships(result)
except Exception:
return None
@staticmethod
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
if getattr(pipe_obj, "duration", None) is not None:
return pipe_obj.duration
try:
return extract_duration(result)
except Exception:
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
*,
hash_value: str,
store: str,
path: Optional[str],
tag: List[str],
title: Optional[str],
extra_updates: Optional[Dict[str, Any]] = None,
) -> None:
pipe_obj.hash = hash_value
pipe_obj.store = store
pipe_obj.path = path
pipe_obj.tag = tag
if title:
pipe_obj.title = title
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.update(extra_updates or {})
else:
pipe_obj.extra = dict(extra_updates or {})
@staticmethod
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
from result_table import format_result
log(format_result(pipe_obj, title="Result"), file=sys.stderr)
ctx.emit(pipe_obj.to_dict())
ctx.set_current_stage_table(None)
@staticmethod
def _prepare_metadata(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[List[str], List[str], Optional[str], Optional[str]]:
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
tags_from_result = list(pipe_obj.tag or [])
if not tags_from_result:
try:
tags_from_result = list(extract_tag_from_result(result) or [])
except Exception:
tags_from_result = []
url_from_result = Add_File._get_url(result, pipe_obj)
preferred_title = pipe_obj.title
if not preferred_title:
for t in tags_from_result:
if str(t).strip().lower().startswith("title:"):
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
if candidate:
preferred_title = candidate
break
if not preferred_title:
preferred_title = extract_title_from_result(result)
if preferred_title:
preferred_title = preferred_title.replace("_", " ").strip()
store = getattr(pipe_obj, "store", None)
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
media_path, store, config
)
def normalize_title_tag(tag: str) -> str:
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")]
sidecar_tags = collapse_namespace_tag([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True)
if preferred_title:
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
# Persist back to PipeObject
pipe_obj.tag = merged_tags
if preferred_title and not pipe_obj.title:
pipe_obj.title = preferred_title
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.setdefault("url", merged_url)
return merged_tags, merged_url, preferred_title, file_hash
@staticmethod
def _handle_local_export(
media_path: Path,
location: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle exporting to a specific local path (Copy)."""
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
return 1
log(f"Exporting to local path: {destination_root}", file=sys.stderr)
result = None
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Determine Filename (Title-based)
title_value = title
if not title_value:
# Try to find title in tags
title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None)
if title_tag:
title_value = title_tag.split(":", 1)[1].strip()
if not title_value:
title_value = media_path.stem.replace("_", " ").strip()
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
base_name = safe_title or media_path.stem
new_name = base_name + media_path.suffix
destination_root.mkdir(parents=True, exist_ok=True)
target_path = destination_root / new_name
if target_path.exists():
target_path = unique_path(target_path)
# COPY Operation (Safe Export)
try:
shutil.copy2(str(media_path), target_path)
except Exception as exc:
log(f"❌ Failed to export file: {exc}", file=sys.stderr)
return 1
# Copy Sidecars
Add_File._copy_sidecars(media_path, target_path)
# Ensure hash for exported copy
if not f_hash:
try:
f_hash = sha256_file(target_path)
except Exception:
f_hash = None
# Write Metadata Sidecars (since it's an export)
relationships = Add_File._get_relationships(result, pipe_obj)
try:
write_sidecar(target_path, tags, url, f_hash)
write_metadata(target_path, hash_value=f_hash, url=url, relationships=relationships or [])
except Exception:
pass
# Update PipeObject and emit
extra_updates = {
"url": url,
"export_path": str(destination_root),
}
if relationships:
extra_updates["relationships"] = relationships
chosen_title = title or title_value or pipe_obj.title or target_path.name
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=f_hash or "unknown",
store="local",
path=str(target_path),
tag=tags,
title=chosen_title,
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
# Cleanup
# Only delete if explicitly requested!
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
@staticmethod
def _download_soulseek_file(
result: Any,
config: Dict[str, Any]
) -> Optional[Path]:
"""
Download a file from Soulseek peer.
Extracts username and filename from soulseek result metadata and initiates download.
"""
try:
import asyncio
from ProviderCore.registry import download_soulseek_file
from pathlib import Path
# Extract metadata from result
full_metadata = {}
if isinstance(result, dict):
full_metadata = result.get("full_metadata", {})
elif hasattr(result, "extra") and isinstance(result.extra, dict) and "full_metadata" in result.extra:
full_metadata = result.extra.get("full_metadata", {})
elif hasattr(result, "full_metadata"):
# Direct attribute access (fallback)
val = getattr(result, "full_metadata", {})
if isinstance(val, dict):
full_metadata = val
username = full_metadata.get("username")
filename = full_metadata.get("filename")
if not username or not filename:
debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
if hasattr(result, "extra"):
debug(f"[add-file] Result extra keys: {list(result.extra.keys())}")
return None
if not username or not filename:
debug(f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})")
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
# Determine output directory (prefer downloads folder in config)
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
output_dir.mkdir(parents=True, exist_ok=True)
# Run async download in event loop
try:
loop = asyncio.get_event_loop()
if loop.is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
downloaded_path = loop.run_until_complete(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
timeout=1200 # 20 minutes
)
)
return downloaded_path
except Exception as e:
log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr)
debug(f"[add-file] Soulseek download traceback: {e}")
return None
@staticmethod
def _handle_provider_upload(
media_path: Path,
provider_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a file provider (e.g. 0x0)."""
from ProviderCore.registry import get_file_provider
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
try:
file_provider = get_file_provider(provider_name, config)
if not file_provider:
log(f"File provider '{provider_name}' not available", file=sys.stderr)
return 1
hoster_url = file_provider.upload(str(media_path))
log(f"File uploaded: {hoster_url}", file=sys.stderr)
# Associate URL with Hydrus if possible
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if f_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(f_hash, hoster_url)
except Exception:
pass
except Exception as exc:
log(f"Upload failed: {exc}", file=sys.stderr)
return 1
# Update PipeObject and emit
extra_updates: Dict[str, Any] = {
"provider": provider_name,
"provider_url": hoster_url,
}
if isinstance(pipe_obj.extra, dict):
# Also track hoster URL as a url for downstream steps
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=f_hash or "unknown",
store=provider_name or "provider",
path=file_path,
tag=pipe_obj.tag,
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
@staticmethod
def _handle_storage_backend(
media_path: Path,
backend_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
try:
store = Store(config)
backend = store[backend_name]
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config)
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
media_path,
title=title,
tags=tags,
url=url
)
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
stored_path: Optional[str] = None
try:
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
# Some backends may return a browser URL
stored_path = maybe_path
except Exception:
stored_path = None
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown",
store=backend_name,
path=stored_path,
tag=tags,
title=title or pipe_obj.title or media_path.name,
extra_updates={
"url": url,
},
)
Add_File._emit_pipe_object(pipe_obj)
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
except Exception as exc:
log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# --- Helpers ---
@staticmethod
def _load_sidecar_bundle(
media_path: Path,
store: Optional[str],
config: Dict[str, Any],
) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]:
"""Load sidecar metadata."""
if store and store.lower() == "local":
try:
from config import get_local_storage_path
db_root = get_local_storage_path(config)
if db_root:
with API_folder_store(Path(db_root)) as db:
file_hash = db.get_file_hash(media_path)
if file_hash:
tags = db.get_tags(file_hash) or []
metadata = db.get_metadata(file_hash) or {}
url = metadata.get("url") or []
f_hash = metadata.get("hash") or file_hash
if tags or url or f_hash:
return None, f_hash, tags, url
except Exception:
pass
try:
sidecar_path = find_sidecar(media_path)
if sidecar_path and sidecar_path.exists():
h, t, u = read_sidecar(sidecar_path)
return sidecar_path, h, t or [], u or []
except Exception:
pass
return None, None, [], []
@staticmethod
def _resolve_file_hash(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
fallback_hash: Optional[str],
) -> Optional[str]:
if pipe_obj.hash and pipe_obj.hash != "unknown":
return pipe_obj.hash
if fallback_hash:
return fallback_hash
if isinstance(result, dict):
candidate = result.get('hash')
if candidate:
return str(candidate)
try:
return sha256_file(media_path)
except Exception:
return None
@staticmethod
def _resolve_media_kind(path: Path) -> str:
# Reusing logic
suffix = path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
@staticmethod
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: List[str],
url: List[str],
f_hash: Optional[str],
relationships: Any,
duration: Any,
media_kind: str,
):
payload = {
'hash': f_hash,
'url': url,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
try:
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
with API_folder_store(library_root) as db:
try:
db.save_file_info(dest_path, payload, tags)
except Exception as exc:
log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr)
@staticmethod
def _copy_sidecars(source_path: Path, target_path: Path):
possible_sidecars = [
source_path.with_suffix(source_path.suffix + ".json"),
source_path.with_name(source_path.name + ".tag"),
source_path.with_name(source_path.name + ".metadata"),
source_path.with_name(source_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(source_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(sc), dest_sidecar)
except Exception:
pass
@staticmethod
def _cleanup_after_success(media_path: Path, delete_source: bool):
if not delete_source:
return
# Check if it's a temp file that should always be deleted
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
if delete_source or is_temp_merge:
log(f"Deleting source file...", file=sys.stderr)
try:
media_path.unlink()
Add_File._cleanup_sidecar_files(media_path)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
@staticmethod
def _cleanup_sidecar_files(media_path: Path):
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tag'),
]
for target in targets:
try:
if target.exists():
target.unlink()
except Exception:
pass
# Create and register the cmdlet
CMDLET = Add_File()

148
cmdlet/add_note.py Normal file
View File

@@ -0,0 +1,148 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Add_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="add-note",
summary="Add or set a named note on a file in a store.",
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
alias=["set-note", "add_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
],
detail=[
"- Notes are stored via the selected store backend.",
"- For lyrics: store LRC text in a note named 'lyric'.",
],
exec=self.run,
)
# Populate dynamic store choices for autocomplete
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
note_name = str(parsed.get("name") or "").strip()
text_parts = parsed.get("text")
if not note_name:
log("[add_note] Error: Requires <name>", file=sys.stderr)
return 1
if isinstance(text_parts, list):
note_text = " ".join([str(p) for p in text_parts]).strip()
else:
note_text = str(text_parts or "").strip()
if not note_text:
log("[add_note] Error: Empty note text", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
updated = 0
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[add_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)
ctx.emit(res)
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
ok = False
try:
ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
ok = False
if ok:
updated += 1
ctx.emit(res)
log(f"[add_note] Updated {updated} item(s)", file=sys.stderr)
return 0 if updated > 0 else 1
CMDLET = Add_Note()

492
cmdlet/add_relationship.py Normal file
View File

@@ -0,0 +1,492 @@
"""Add file relationships in Hydrus based on relationship tags in sidecar."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence
import json
import re
from pathlib import Path
import sys
from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field
from API.folder import read_sidecar, find_sidecar
CMDLET = Cmdlet(
name="add-relationship",
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
],
detail=[
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
"- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4",
"- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...')",
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
"- When using -king, all piped items become the specified relationship type to the king",
],
)
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
"""Normalize a hash hex string to lowercase 64-char format."""
if not value or not isinstance(value, str):
return None
normalized = value.strip().lower()
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
return normalized
return None
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
"""
result: Dict[str, list[str]] = {}
if not isinstance(tag_value, str):
return result
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
matches = re.findall(pattern, tag_value)
for rel_type, hash_value in matches:
normalized = _normalise_hash_hex(hash_value)
if normalized:
if rel_type not in result:
result[rel_type] = []
result[rel_type].append(normalized)
return result
def _resolve_king_reference(king_arg: str) -> Optional[str]:
"""Resolve a king reference like '@4' to its actual hash or path.
Supports:
- Direct hash: '0123456789abcdef...' (64 chars)
- Selection reference: '@4' (resolves from pipeline context)
Returns:
- For Hydrus items: normalized hash
- For local storage items: file path
- None if not found
"""
if not king_arg:
return None
# Check if it's already a valid hash
normalized = _normalise_hash_hex(king_arg)
if normalized:
return normalized
# Try to resolve as @N selection from pipeline context
if king_arg.startswith('@'):
try:
# Get the result items from the pipeline context
from pipeline import get_last_result_items
items = get_last_result_items()
if not items:
log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr)
return None
# Parse @N to get the index (1-based)
index_str = king_arg[1:] # Remove '@'
index = int(index_str) - 1 # Convert to 0-based
if 0 <= index < len(items):
item = items[index]
# Try to extract hash from the item (could be dict or object)
item_hash = (
get_field(item, 'hash_hex')
or get_field(item, 'hash')
or get_field(item, 'file_hash')
)
if item_hash:
normalized = _normalise_hash_hex(item_hash)
if normalized:
return normalized
# If no hash, try to get file path (for local storage)
file_path = (
get_field(item, 'file_path')
or get_field(item, 'path')
or get_field(item, 'target')
)
if file_path:
return str(file_path)
log(f"Item {king_arg} has no hash or path information", file=sys.stderr)
return None
else:
log(f"Index {king_arg} out of range", file=sys.stderr)
return None
except (ValueError, IndexError) as e:
log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr)
return None
return None
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
get_relationship = None
try:
get_relationship = get_cmdlet("get-relationship")
except Exception:
get_relationship = None
if not callable(get_relationship):
return
try:
subject = ctx.get_last_result_subject()
if subject is None:
return
def norm(val: Any) -> str:
return str(val).lower()
target_hashes = [norm(v) for v in [target_hash, other] if v]
target_paths = [norm(v) for v in [target_path, other] if v]
subj_hashes: list[str] = []
subj_paths: list[str] = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v]
else:
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)]
is_match = False
if target_hashes and any(h in subj_hashes for h in target_hashes):
is_match = True
if target_paths and any(p in subj_paths for p in target_paths):
is_match = True
if not is_match:
return
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
get_relationship(subject, refresh_args, config)
except Exception:
pass
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus.
Two modes of operation:
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
Returns 0 on success, non-zero on failure.
"""
# Help
if should_show_help(_args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
king_arg = parsed.get("king")
rel_type = parsed.get("type", "alt")
raw_path = parsed.get("path")
if raw_path:
try:
arg_path = Path(str(raw_path)).expanduser()
except Exception:
arg_path = Path(str(raw_path))
# Handle @N selection which creates a list
# Use normalize_result_input to handle both single items and lists
items_to_process = normalize_result_input(result)
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
return 1
# If no items from pipeline, just process the -path arg
if not items_to_process and arg_path:
items_to_process = [{"file_path": arg_path}]
# Import local storage utilities
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
local_storage_path = get_local_storage_path(config) if config else None
# Check if any items have Hydrus hashes (file_hash or hash_hex fields)
has_hydrus_hashes = any(
(isinstance(item, dict) and (item.get('hash_hex') or item.get('hash')))
or (hasattr(item, 'hash_hex') or hasattr(item, 'hash'))
for item in items_to_process
)
# Only try to initialize Hydrus if we actually have Hydrus hashes to work with
hydrus_client = None
if has_hydrus_hashes:
try:
hydrus_client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr)
# Use local storage if it's available and either Hydrus is not available or items are local files
use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists()))
# Resolve the king reference once (if provided)
king_hash = None
if king_arg:
# Resolve the king reference (could be @4 or a direct hash)
king_hash = _resolve_king_reference(king_arg)
if not king_hash:
log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr)
return 1
log(f"Using king hash: {king_hash}", file=sys.stderr)
# Process each item in the list
for item_idx, item in enumerate(items_to_process):
# Extract hash and path from current item
file_hash = None
file_path_from_result = None
if isinstance(item, dict):
file_hash = item.get("hash_hex") or item.get("hash")
file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
else:
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
# PIPELINE MODE with Hydrus: Track relationships using hash
if file_hash and hydrus_client:
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# If explicit -king provided, use it
if king_hash:
try:
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
)
_refresh_relationship_view_if_current(file_hash, file_path_from_result, king_hash, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
else:
# Original behavior: no explicit king, first becomes king, rest become alts
try:
existing_king = ctx.load_value("relationship_king")
except Exception:
existing_king = None
# If this is the first item, make it the king
if not existing_king:
try:
ctx.store_value("relationship_king", file_hash)
log(f"Established king hash: {file_hash}", file=sys.stderr)
continue # Move to next item
except Exception:
pass
# If we already have a king and this is a different hash, link them
if existing_king and existing_king != file_hash:
try:
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
)
_refresh_relationship_view_if_current(file_hash, file_path_from_result, existing_king, config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
# LOCAL STORAGE MODE: Handle relationships for local files
elif use_local_storage and file_path_from_result:
try:
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
if king_hash:
# king_hash is a file path from _resolve_king_reference (or a Hydrus hash)
king_file_path = Path(str(king_hash)) if king_hash else None
if king_file_path and king_file_path.exists():
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, king_file_path, rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_file_path), config)
else:
log(f"King file not found or invalid: {king_hash}", file=sys.stderr)
return 1
else:
# Original behavior: first becomes king, rest become alts
try:
king_path = ctx.load_value("relationship_king_path")
except Exception:
king_path = None
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
continue # Move to next item
except Exception:
pass
if king_path and king_path != str(file_path_obj):
try:
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, Path(king_path), rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
_refresh_relationship_view_if_current(None, str(file_path_obj), str(king_path), config)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
return 0
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
# Resolve media path from -path arg or result target
target = getattr(result, "target", None) or getattr(result, "path", None)
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
if media_path is None:
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
return 1
# Validate local file
if str(media_path).lower().startswith(("http://", "https://")):
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
return 1
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}", file=sys.stderr)
return 1
# Build Hydrus client
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
# Read sidecar to find relationship tags
sidecar_path = find_sidecar(media_path)
if sidecar_path is None:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
return 1
try:
_, tags, _ = read_sidecar(sidecar_path)
except Exception as exc:
log(f"Failed to read sidecar: {exc}", file=sys.stderr)
return 1
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
if not relationship_tags:
log(f"No relationship tags found in sidecar", file=sys.stderr)
return 0 # Not an error, just nothing to do
# Get the file hash from result (should have been set by add-file)
file_hash = getattr(result, "hash_hex", None)
if not file_hash:
log("File hash not available (run add-file first)", file=sys.stderr)
return 1
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# Parse relationships from tags and apply them
success_count = 0
error_count = 0
for rel_tag in relationship_tags:
try:
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
# Parse relationships
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
# Set the relationships in Hydrus
for rel_type, related_hashes in rels.items():
if not related_hashes:
continue
for related_hash in related_hashes:
# Don't set relationship between hash and itself
if file_hash == related_hash:
continue
try:
hydrus_client.set_relationship(file_hash, related_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}",
file=sys.stderr
)
success_count += 1
except Exception as exc:
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
error_count += 1
except Exception as exc:
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
error_count += 1
if success_count > 0:
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
return 0
elif error_count == 0:
log(f"No relationships to set", file=sys.stderr)
return 0 # Success with nothing to do
else:
log(f"Failed with {error_count} error(s)", file=sys.stderr)
return 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.alias = ["add-rel"]
CMDLET.register()

488
cmdlet/add_tag.py Normal file
View File

@@ -0,0 +1,488 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
from pathlib import Path
import sys
from SYS.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_tag_arguments,
expand_tag_groups,
parse_cmdlet_args,
collapse_namespace_tag,
should_show_help,
get_field,
)
from Store import Store
from SYS.utils import sha256_file
def _extract_title_tag(tags: List[str]) -> Optional[str]:
"""Return the value of the first title: tag if present."""
for t in tags:
if t.lower().startswith("title:"):
value = t.split(":", 1)[1].strip()
return value or None
return None
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
return
if isinstance(res, models.PipeObject):
res.title = title_value
# Update columns if present (Title column assumed index 0)
columns = getattr(res, "columns", None)
if isinstance(columns, list) and columns:
label, *_ = columns[0]
if str(label).lower() == "title":
columns[0] = (label, title_value)
elif isinstance(res, dict):
res["title"] = title_value
cols = res.get("columns")
if isinstance(cols, list):
updated = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, _val = col
if str(label).lower() == "title":
updated.append((label, title_value))
changed = True
else:
updated.append(col)
else:
updated.append(col)
if changed:
res["columns"] = updated
def _matches_target(
item: Any,
target_hash: Optional[str],
target_path: Optional[str],
target_store: Optional[str] = None,
) -> bool:
"""Determine whether a result item refers to the given target.
Important: hashes can collide across backends in this app's UX (same media in
multiple stores). When target_store is provided, it must match too.
"""
def norm(val: Any) -> Optional[str]:
return str(val).lower() if val is not None else None
target_hash_l = target_hash.lower() if target_hash else None
target_path_l = target_path.lower() if target_path else None
target_store_l = target_store.lower() if target_store else None
if isinstance(item, dict):
hashes = [norm(item.get("hash"))]
paths = [norm(item.get("path"))]
stores = [norm(item.get("store"))]
else:
hashes = [norm(get_field(item, "hash"))]
paths = [norm(get_field(item, "path"))]
stores = [norm(get_field(item, "store"))]
if target_store_l:
if target_store_l not in stores:
return False
if target_hash_l and target_hash_l in hashes:
return True
if target_path_l and target_path_l in paths:
return True
return False
def _update_item_title_fields(item: Any, new_title: str) -> None:
"""Mutate an item to reflect a new title in plain fields and columns."""
if isinstance(item, models.PipeObject):
item.title = new_title
columns = getattr(item, "columns", None)
if isinstance(columns, list) and columns:
label, *_ = columns[0]
if str(label).lower() == "title":
columns[0] = (label, new_title)
elif isinstance(item, dict):
item["title"] = new_title
cols = item.get("columns")
if isinstance(cols, list):
updated_cols = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, _val = col
if str(label).lower() == "title":
updated_cols.append((label, new_title))
changed = True
else:
updated_cols.append(col)
else:
updated_cols.append(col)
if changed:
item["columns"] = updated_cols
def _refresh_result_table_title(
new_title: str,
target_hash: Optional[str],
target_store: Optional[str],
target_path: Optional[str],
) -> None:
"""Refresh the cached result table with an updated title and redisplay it."""
try:
last_table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if not last_table or not items:
return
updated_items = []
match_found = False
for item in items:
try:
if _matches_target(item, target_hash, target_path, target_store):
_update_item_title_fields(item, new_title)
match_found = True
except Exception:
pass
updated_items.append(item)
if not match_found:
return
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
for item in updated_items:
new_table.add_result(item)
# Keep the underlying history intact; update only the overlay so @.. can
# clear the overlay then continue back to prior tables (e.g., the search list).
ctx.set_last_result_table_overlay(new_table, updated_items)
except Exception:
pass
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
if not target_hash or not store_name:
return
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
get_tag = None
try:
get_tag = get_cmdlet("get-tag")
except Exception:
get_tag = None
if not callable(get_tag):
return
try:
subject = ctx.get_last_result_subject()
if subject and _matches_target(subject, target_hash, target_path, store_name):
get_tag(subject, refresh_args, config)
return
except Exception:
pass
try:
get_tag(res, refresh_args, config)
except Exception:
pass
class Add_Tag(Cmdlet):
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
def __init__(self) -> None:
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
],
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
"- Requires a store backend: use -store or pipe items that include store.",
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add tag to a file with smart filtering for pipeline results."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Check for --all flag
include_temp = parsed.get("all", False)
# Normalize input to list
results = normalize_result_input(result)
# Filter by temp status (unless --all is set)
if not include_temp:
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tag from arguments (or fallback to pipeline payload)
raw_tag = parsed.get("tag", [])
if isinstance(raw_tag, str):
raw_tag = [raw_tag]
# Fallback: if no tag provided explicitly, try to pull from first result payload
if not raw_tag and results:
first = results[0]
payload_tag = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: getattr(x, "tag", None),
lambda x: x.get("tag") if isinstance(x, dict) else None,
]
for lookup in tag_lookups:
try:
payload_tag = lookup(first)
if payload_tag:
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tag:
if isinstance(payload_tag, str):
raw_tag = [payload_tag]
elif isinstance(payload_tag, list):
raw_tag = payload_tag
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
l = l.strip()
if l:
raw_tag.append(f"{{{l}}}")
# Parse and expand tag
tag_to_add = parse_tag_arguments(raw_tag)
tag_to_add = expand_tag_groups(tag_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tag: List[str] = []
for tag in tag_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tag.append(tag)
tag_to_add = filtered_tag
if not tag_to_add:
log("No tag provided to add", file=sys.stderr)
return 1
# Get other flags (hash override can come from -hash or hash: token)
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
duplicate_arg = parsed.get("duplicate")
# tag ARE provided - apply them to each store-backed result
total_added = 0
total_modified = 0
store_override = parsed.get("store")
for res in results:
store_name: Optional[str]
raw_hash: Optional[str]
raw_path: Optional[str]
if isinstance(res, models.PipeObject):
store_name = store_override or res.store
raw_hash = res.hash
raw_path = res.path
elif isinstance(res, dict):
store_name = store_override or res.get("store")
raw_hash = res.get("hash")
raw_path = res.get("path")
else:
ctx.emit(res)
continue
if not store_name:
log("[add_tag] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
resolved_hash = stem.lower()
elif p.exists() and p.is_file():
resolved_hash = sha256_file(p)
except Exception:
resolved_hash = None
if not resolved_hash:
log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
ctx.emit(res)
continue
try:
backend = Store(config)[str(store_name)]
except Exception as exc:
log(f"[add_tag] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
try:
existing_tag, _src = backend.get_tag(resolved_hash, config=config)
except Exception:
existing_tag = []
existing_tag_list = [t for t in (existing_tag or []) if isinstance(t, str)]
existing_lower = {t.lower() for t in existing_tag_list}
original_title = _extract_title_tag(existing_tag_list)
# Per-item tag list (do not mutate shared list)
item_tag_to_add = list(tag_to_add)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tag to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
if source_ns and targets:
source_prefix = source_ns.lower() + ":"
for t in existing_tag_list:
if not t.lower().startswith(source_prefix):
continue
value = t.split(":", 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
# Namespace replacement: delete old namespace:* when adding namespace:value
removed_namespace_tag: list[str] = []
for new_tag in item_tag_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
actual_tag_to_add = [t for t in item_tag_to_add if isinstance(t, str) and t.lower() not in existing_lower]
changed = False
if removed_namespace_tag:
try:
ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
if ok_del:
changed = True
except Exception as exc:
log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr)
if actual_tag_to_add:
try:
ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
if ok_add:
changed = True
else:
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
except Exception as exc:
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
if changed:
total_added += len(actual_tag_to_add)
total_modified += 1
try:
refreshed_tag, _src2 = backend.get_tag(resolved_hash, config=config)
refreshed_list = [t for t in (refreshed_tag or []) if isinstance(t, str)]
except Exception:
refreshed_list = existing_tag_list
# Update the result's tag using canonical field
if isinstance(res, models.PipeObject):
res.tag = refreshed_list
elif isinstance(res, dict):
res["tag"] = refreshed_list
final_title = _extract_title_tag(refreshed_list)
_apply_title_to_result(res, final_title)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
_refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path)
if changed:
_refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config)
ctx.emit(res)
log(
f"[add_tag] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
file=sys.stderr,
)
return 0
CMDLET = Add_Tag()

456
cmdlet/add_tags.py Normal file
View File

@@ -0,0 +1,456 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
from pathlib import Path
import sys
from SYS.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_tag_arguments,
expand_tag_groups,
parse_cmdlet_args,
collapse_namespace_tags,
should_show_help,
get_field,
)
from Store import Store
from SYS.utils import sha256_file
def _extract_title_tag(tags: List[str]) -> Optional[str]:
"""Return the value of the first title: tag if present."""
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
value = tag.split(":", 1)[1].strip()
if value:
return value
return None
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
return
if isinstance(res, models.PipeObject):
res.title = title_value
# Update columns if present (Title column assumed index 0)
if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
label, *_ = res.columns[0]
if str(label).lower() == "title":
res.columns[0] = (res.columns[0][0], title_value)
elif isinstance(res, dict):
res["title"] = title_value
cols = res.get("columns")
if isinstance(cols, list):
updated = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated.append((label, title_value))
changed = True
else:
updated.append(col)
else:
updated.append(col)
if changed:
res["columns"] = updated
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
def norm(val: Any) -> Optional[str]:
return str(val).lower() if val is not None else None
target_hash_l = target_hash.lower() if target_hash else None
target_path_l = target_path.lower() if target_path else None
if isinstance(item, dict):
hashes = [norm(item.get("hash"))]
paths = [norm(item.get("path"))]
else:
hashes = [norm(get_field(item, "hash"))]
paths = [norm(get_field(item, "path"))]
if target_hash_l and target_hash_l in hashes:
return True
if target_path_l and target_path_l in paths:
return True
return False
def _update_item_title_fields(item: Any, new_title: str) -> None:
"""Mutate an item to reflect a new title in plain fields and columns."""
if isinstance(item, models.PipeObject):
item.title = new_title
if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
label, *_ = item.columns[0]
if str(label).lower() == "title":
item.columns[0] = (label, new_title)
elif isinstance(item, dict):
item["title"] = new_title
cols = item.get("columns")
if isinstance(cols, list):
updated_cols = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated_cols.append((label, new_title))
changed = True
else:
updated_cols.append(col)
else:
updated_cols.append(col)
if changed:
item["columns"] = updated_cols
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
"""Refresh the cached result table with an updated title and redisplay it."""
try:
last_table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if not last_table or not items:
return
updated_items = []
match_found = False
for item in items:
try:
if _matches_target(item, target_hash, target_path):
_update_item_title_fields(item, new_title)
match_found = True
except Exception:
pass
updated_items.append(item)
if not match_found:
return
from result_table import ResultTable # Local import to avoid circular dependency
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
for item in updated_items:
new_table.add_result(item)
# Keep the underlying history intact; update only the overlay so @.. can
# clear the overlay then continue back to prior tables (e.g., the search list).
ctx.set_last_result_table_overlay(new_table, updated_items)
except Exception:
pass
def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get_tag as get_tag_cmd # type: ignore
except Exception:
return
if not target_hash or not store_name:
return
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
try:
subject = ctx.get_last_result_subject()
if subject and _matches_target(subject, target_hash, target_path):
get_tag_cmd._run(subject, refresh_args, config)
return
except Exception:
pass
try:
get_tag_cmd._run(res, refresh_args, config)
except Exception:
pass
class Add_Tag(Cmdlet):
"""Class-based add-tags cmdlet with Cmdlet metadata inheritance."""
def __init__(self) -> None:
super().__init__(
name="add-tags",
summary="Add tags to a file in a store.",
usage="add-tags -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
],
detail=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Requires a store backend: use -store or pipe items that include store.",
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add tags to a file with smart filtering for pipeline results."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Check for --all flag
include_temp = parsed.get("all", False)
# Normalize input to list
results = normalize_result_input(result)
# Filter by temp status (unless --all is set)
if not include_temp:
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tags from arguments (or fallback to pipeline payload)
raw_tags = parsed.get("tags", [])
if isinstance(raw_tags, str):
raw_tags = [raw_tags]
# Fallback: if no tags provided explicitly, try to pull from first result payload
if not raw_tags and results:
first = results[0]
payload_tags = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: getattr(x, "tags", None),
lambda x: x.get("tags") if isinstance(x, dict) else None,
]
for lookup in tag_lookups:
try:
payload_tags = lookup(first)
if payload_tags:
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tags:
if isinstance(payload_tags, str):
raw_tags = [payload_tags]
elif isinstance(payload_tags, list):
raw_tags = payload_tags
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
l = l.strip()
if l:
raw_tags.append(f"{{{l}}}")
# Parse and expand tags
tags_to_add = parse_tag_arguments(raw_tags)
tags_to_add = expand_tag_groups(tags_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tags: List[str] = []
for tag in tags_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tags.append(tag)
tags_to_add = filtered_tags
if not tags_to_add:
log("No tags provided to add", file=sys.stderr)
return 1
# Get other flags (hash override can come from -hash or hash: token)
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
duplicate_arg = parsed.get("duplicate")
# Tags ARE provided - apply them to each store-backed result
total_added = 0
total_modified = 0
store_override = parsed.get("store")
for res in results:
store_name: Optional[str]
raw_hash: Optional[str]
raw_path: Optional[str]
if isinstance(res, models.PipeObject):
store_name = store_override or res.store
raw_hash = res.hash
raw_path = res.path
elif isinstance(res, dict):
store_name = store_override or res.get("store")
raw_hash = res.get("hash")
raw_path = res.get("path")
else:
ctx.emit(res)
continue
if not store_name:
log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
resolved_hash = stem.lower()
elif p.exists() and p.is_file():
resolved_hash = sha256_file(p)
except Exception:
resolved_hash = None
if not resolved_hash:
log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
ctx.emit(res)
continue
try:
backend = Store(config)[str(store_name)]
except Exception as exc:
log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
try:
existing_tags, _src = backend.get_tag(resolved_hash, config=config)
except Exception:
existing_tags = []
existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)]
existing_lower = {t.lower() for t in existing_tags_list}
original_title = _extract_title_tag(existing_tags_list)
# Per-item tag list (do not mutate shared list)
item_tags_to_add = list(tags_to_add)
item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tags to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
if source_ns and targets:
source_prefix = source_ns.lower() + ":"
for t in existing_tags_list:
if not t.lower().startswith(source_prefix):
continue
value = t.split(":", 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag.lower() not in existing_lower:
item_tags_to_add.append(new_tag)
# Namespace replacement: delete old namespace:* when adding namespace:value
removed_namespace_tags: list[str] = []
for new_tag in item_tags_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tags_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tags.append(t)
removed_namespace_tags = sorted({t for t in removed_namespace_tags})
actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower]
changed = False
if removed_namespace_tags:
try:
backend.delete_tag(resolved_hash, removed_namespace_tags, config=config)
changed = True
except Exception as exc:
log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr)
if actual_tags_to_add:
try:
backend.add_tag(resolved_hash, actual_tags_to_add, config=config)
changed = True
except Exception as exc:
log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr)
if changed:
total_added += len(actual_tags_to_add)
total_modified += 1
try:
refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config)
refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)]
except Exception:
refreshed_list = existing_tags_list
# Update the result's tags using canonical field
if isinstance(res, models.PipeObject):
res.tags = refreshed_list
elif isinstance(res, dict):
res["tags"] = refreshed_list
final_title = _extract_title_tag(refreshed_list)
_apply_title_to_result(res, final_title)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
_refresh_result_table_title(final_title, resolved_hash, raw_path)
if changed:
_refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config)
ctx.emit(res)
log(
f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
file=sys.stderr,
)
return 0
CMDLET = Add_Tag()

85
cmdlet/add_url.py Normal file
View File

@@ -0,0 +1,85 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import sys
from . import register
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from SYS.logger import log
from Store import Store
class Add_Url(Cmdlet):
"""Add URL associations to files via hash+store."""
NAME = "add-url"
SUMMARY = "Associate a URL with a file"
USAGE = "@1 | add-url <url>"
ARGS = [
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to associate"),
]
DETAIL = [
"- Associates URL with file identified by hash+store",
"- Multiple url can be comma-separated",
]
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
# Get backend and add url
try:
storage = Store(config)
backend = storage[store_name]
backend.add_url(file_hash, urls)
for u in urls:
ctx.emit(f"Added URL: {u}")
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
except Exception as exc:
log(f"Error adding URL: {exc}", file=sys.stderr)
return 1
# Register cmdlet
register(["add-url", "add_url"])(Add_Url)

215
cmdlet/catalog.py Normal file
View File

@@ -0,0 +1,215 @@
from __future__ import annotations
from importlib import import_module
from typing import Any, Dict, List, Optional
try:
from cmdlet import REGISTRY
except Exception:
REGISTRY = {} # type: ignore
try:
from cmdnat import register_native_commands as _register_native_commands
except Exception:
_register_native_commands = None
def ensure_registry_loaded() -> None:
"""Ensure native commands are registered into REGISTRY (idempotent)."""
if _register_native_commands and REGISTRY is not None:
try:
_register_native_commands(REGISTRY)
except Exception:
pass
def _normalize_mod_name(mod_name: str) -> str:
"""Normalize a command/module name for import resolution."""
normalized = (mod_name or "").strip()
if normalized.startswith('.'):
normalized = normalized.lstrip('.')
normalized = normalized.replace('-', '_')
return normalized
def import_cmd_module(mod_name: str):
"""Import a cmdlet/native module from cmdnat or cmdlet packages."""
normalized = _normalize_mod_name(mod_name)
if not normalized:
return None
for package in ("cmdnat", "cmdlet", None):
try:
qualified = f"{package}.{normalized}" if package else normalized
return import_module(qualified)
except ModuleNotFoundError:
continue
except Exception:
continue
return None
def _normalize_arg(arg: Any) -> Dict[str, Any]:
"""Convert a CmdletArg/dict into a plain metadata dict."""
if isinstance(arg, dict):
name = arg.get("name", "")
return {
"name": str(name).lstrip("-"),
"type": arg.get("type", "string"),
"required": bool(arg.get("required", False)),
"description": arg.get("description", ""),
"choices": arg.get("choices", []) or [],
"alias": arg.get("alias", ""),
"variadic": arg.get("variadic", False),
}
name = getattr(arg, "name", "") or ""
return {
"name": str(name).lstrip("-"),
"type": getattr(arg, "type", "string"),
"required": bool(getattr(arg, "required", False)),
"description": getattr(arg, "description", ""),
"choices": getattr(arg, "choices", []) or [],
"alias": getattr(arg, "alias", ""),
"variadic": getattr(arg, "variadic", False),
}
def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
ensure_registry_loaded()
normalized = cmd_name.replace("-", "_")
mod = import_cmd_module(normalized)
data = getattr(mod, "CMDLET", None) if mod else None
if data is None:
try:
reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
if reg_fn:
owner_mod = getattr(reg_fn, "__module__", "")
if owner_mod:
owner = import_module(owner_mod)
data = getattr(owner, "CMDLET", None)
except Exception:
data = None
if not data:
return None
if hasattr(data, "to_dict"):
base = data.to_dict()
elif isinstance(data, dict):
base = data
else:
base = {}
name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name
aliases = getattr(data, "alias", base.get("alias", [])) or []
usage = getattr(data, "usage", base.get("usage", ""))
summary = getattr(data, "summary", base.get("summary", ""))
details = getattr(data, "detail", base.get("detail", [])) or []
args_list = getattr(data, "arg", base.get("arg", [])) or []
args = [_normalize_arg(arg) for arg in args_list]
return {
"name": str(name).replace("_", "-").lower(),
"aliases": [str(a).replace("_", "-").lower() for a in aliases if a],
"usage": usage,
"summary": summary,
"details": details,
"args": args,
"raw": data,
}
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
"""Collect metadata for all registered cmdlet keyed by canonical name."""
ensure_registry_loaded()
entries: Dict[str, Dict[str, Any]] = {}
for reg_name in (REGISTRY or {}).keys():
meta = get_cmdlet_metadata(reg_name)
canonical = str(reg_name).replace("_", "-").lower()
if meta:
canonical = meta.get("name", canonical)
aliases = meta.get("aliases", [])
base = entries.get(
canonical,
{
"name": canonical,
"aliases": [],
"usage": "",
"summary": "",
"details": [],
"args": [],
"raw": meta.get("raw"),
},
)
merged_aliases = set(base.get("aliases", [])) | set(aliases)
if canonical != reg_name:
merged_aliases.add(reg_name)
base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical)
if not base.get("usage") and meta.get("usage"):
base["usage"] = meta["usage"]
if not base.get("summary") and meta.get("summary"):
base["summary"] = meta["summary"]
if not base.get("details") and meta.get("details"):
base["details"] = meta["details"]
if not base.get("args") and meta.get("args"):
base["args"] = meta["args"]
if not base.get("raw"):
base["raw"] = meta.get("raw")
entries[canonical] = base
else:
entries.setdefault(
canonical,
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
)
return entries
def list_cmdlet_names(include_aliases: bool = True) -> List[str]:
"""Return sorted cmdlet names (optionally including aliases)."""
ensure_registry_loaded()
entries = list_cmdlet_metadata()
names = set()
for meta in entries.values():
names.add(meta.get("name", ""))
if include_aliases:
for alias in meta.get("aliases", []):
names.add(alias)
return sorted(n for n in names if n)
def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
"""Return flag variants for cmdlet arguments (e.g., -name/--name)."""
meta = get_cmdlet_metadata(cmd_name)
if not meta:
return []
# Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
# completions feel stable and predictable.
flags: List[str] = []
seen: set[str] = set()
for arg in meta.get("args", []):
name = str(arg.get("name") or "").strip().lstrip("-")
if not name:
continue
for candidate in (f"-{name}", f"--{name}"):
if candidate not in seen:
flags.append(candidate)
seen.add(candidate)
return flags
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
"""Return declared choices for a cmdlet argument."""
meta = get_cmdlet_metadata(cmd_name)
if not meta:
return []
target = arg_name.lstrip("-")
for arg in meta.get("args", []):
if arg.get("name") == target:
return list(arg.get("choices", []) or [])
return []

154
cmdlet/check_file_status.py Normal file
View File

@@ -0,0 +1,154 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from SYS.logger import log
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
CMDLET = Cmdlet(
name="check-file-status",
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
usage="check-file-status [-hash <sha256>]",
arg=[
SharedArgs.HASH,
],
detail=[
"- Shows whether file is active in Hydrus or marked as deleted",
"- Detects corrupted data (e.g., comma-separated url)",
"- Displays file metadata and service locations",
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
override_hash: str | None = None
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
i += 1
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("No hash provided and no result selected", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
result_data = client.fetch_file_metadata(hashes=[hash_hex])
if not result_data.get("metadata"):
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
return 1
file_info = result_data["metadata"][0]
# Status summary
is_deleted = file_info.get("is_deleted", False)
is_local = file_info.get("is_local", False)
is_trashed = file_info.get("is_trashed", False)
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
log(f"File status: {status_str}", file=sys.stderr)
# File info
log(f"\n📄 File Information:", file=sys.stderr)
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
log(f" MIME: {file_info['mime']}", file=sys.stderr)
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
# Service status
file_services = file_info.get("file_services", {})
current_services = file_services.get("current", {})
deleted_services = file_services.get("deleted", {})
if current_services:
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
for service_key, service_info in current_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
log(f" - {sname} ({stype})", file=sys.stderr)
if deleted_services:
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
for service_key, service_info in deleted_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
time_deleted = service_info.get("time_deleted", "?")
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
# URL check
url = file_info.get("url", [])
log(f"\n🔗 url ({len(url)}):", file=sys.stderr)
corrupted_count = 0
for i, url in enumerate(url, 1):
if "," in url:
corrupted_count += 1
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
else:
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
if corrupted_count > 0:
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
# Tags
tags_dict = file_info.get("tags", {})
total_tags = 0
for service_key, service_data in tags_dict.items():
service_name = service_data.get("name", "unknown")
display_tags = service_data.get("display_tags", {}).get("0", [])
total_tags += len(display_tags)
if total_tags > 0:
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
for service_key, service_data in tags_dict.items():
display_tags = service_data.get("display_tags", {}).get("0", [])
if display_tags:
service_name = service_data.get("name", "unknown")
log(f" {service_name}:", file=sys.stderr)
for tag in display_tags[:5]: # Show first 5
log(f" - {tag}", file=sys.stderr)
if len(display_tags) > 5:
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
log("\n", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error checking file status: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.alias = ["check-status", "file-status", "status"]
CMDLET.register()

105
cmdlet/cleanup.py Normal file
View File

@@ -0,0 +1,105 @@
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
then emits the remaining non-temporary results for further pipeline stages.
"""
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import sys
import json
from SYS.logger import log
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
import models
import pipeline as pipeline_context
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Remove temporary files from pipeline results.
Accepts:
- Single result object with is_temp field
- List of result objects to clean up
Process:
- Filters results by is_temp=True
- Deletes those files from disk
- Emits only non-temporary results
Typical pipeline usage:
download-data url | screen-shot | add-tag -store local "tag" --all | cleanup
"""
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Normalize input to list
results = normalize_result_input(result)
if not results:
log("[cleanup] No results to process", file=sys.stderr)
return 1
# Separate temporary and permanent results
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
# Delete temporary files
deleted_count = 0
for temp_result in temp_results:
try:
file_path = get_pipe_object_path(temp_result)
if file_path:
path_obj = Path(file_path)
if path_obj.exists():
# Delete the file
path_obj.unlink()
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
deleted_count += 1
# Clean up any associated sidecar files
for ext in ['.tag', '.metadata']:
sidecar = path_obj.parent / (path_obj.name + ext)
if sidecar.exists():
try:
sidecar.unlink()
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
else:
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
# Log summary
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
# Emit permanent results for downstream processing
for perm_result in perm_results:
pipeline_context.emit(perm_result)
return 0
CMDLET = Cmdlet(
name="cleanup",
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
usage="cleanup",
arg=[],
detail=[
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
"- Deletes files marked with is_temp=True from disk",
"- Also cleans up associated sidecar files (.tag, .metadata)",
"- Emits only non-temporary results for further processing",
"- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
"- Exit code 0 if cleanup successful, 1 if no results to process",
],
exec=_run,
).register()

242
cmdlet/delete_file.py Normal file
View File

@@ -0,0 +1,242 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, Sequence
import sys
from pathlib import Path
from SYS.logger import debug, log
from Store.Folder import Folder
from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help
from API import HydrusNetwork as hydrus_wrapper
import pipeline as ctx
class Delete_File(Cmdlet):
"""Class-based delete-file cmdlet with self-registration."""
def __init__(self) -> None:
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
alias=["del-file"],
arg=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
detail=[
"Default removes both the local file and Hydrus file.",
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
"Database entries are automatically cleaned up for local files.",
"Any remaining arguments are treated as the Hydrus reason text.",
],
exec=self.run,
)
self.register()
def _process_single_item(self, item: Any, override_hash: str | None, conserve: str | None,
lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool:
"""Process deletion for a single item."""
# Handle item as either dict or object
if isinstance(item, dict):
hash_hex_raw = item.get("hash_hex") or item.get("hash")
target = item.get("target") or item.get("file_path") or item.get("path")
else:
hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash")
target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path")
store = None
if isinstance(item, dict):
store = item.get("store")
else:
store = get_field(item, "store")
store_lower = str(store).lower() if store else ""
is_hydrus_store = bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"})
# For Hydrus files, the target IS the hash
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
if conserve != "local" and local_target:
path = Path(str(target))
# If lib_root is provided and this is from a folder store, use the Folder class
if lib_root:
try:
folder = Folder(Path(lib_root), name=store or "local")
if folder.delete_file(str(path)):
local_deleted = True
ctx.emit(f"Removed file: {path.name}")
log(f"Deleted: {path.name}", file=sys.stderr)
except Exception as exc:
debug(f"Folder.delete_file failed: {exc}", file=sys.stderr)
# Fallback to manual deletion
try:
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
ctx.emit(f"Removed local file: {path}")
log(f"Deleted: {path.name}", file=sys.stderr)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
else:
# No lib_root, just delete the file
try:
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
ctx.emit(f"Removed local file: {path}")
log(f"Deleted: {path.name}", file=sys.stderr)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
# Remove common sidecars regardless of file removal success
for sidecar in (
path.with_suffix(".tag"),
path.with_suffix(".metadata"),
path.with_suffix(".notes"),
):
try:
if sidecar.exists() and sidecar.is_file():
sidecar.unlink()
except Exception:
pass
hydrus_deleted = False
should_try_hydrus = is_hydrus_store
# If conserve is set to hydrus, definitely don't delete
if conserve == "hydrus":
should_try_hydrus = False
if should_try_hydrus and hash_hex:
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return False
else:
if client is None:
if not local_deleted:
log("Hydrus client unavailable", file=sys.stderr)
return False
else:
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
hydrus_deleted = True
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
debug(f"Deleted from Hydrus: {preview}", file=sys.stderr)
except Exception as exc:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return False
if hydrus_deleted and hash_hex:
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
if reason:
ctx.emit(f"Deleted {preview} (reason: {reason}).")
else:
ctx.emit(f"Deleted {preview}.")
if hydrus_deleted or local_deleted:
return True
log("Selected result has neither Hydrus hash nor local file target")
return False
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute delete-file command."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
# Parse arguments
override_hash: str | None = None
conserve: str | None = None
lib_root: str | None = None
reason_tokens: list[str] = []
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
value = str(args[i + 1]).strip().lower()
if value in {"local", "hydrus"}:
conserve = value
i += 2
continue
if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
lib_root = str(args[i + 1]).strip()
i += 2
continue
reason_tokens.append(token)
i += 1
# If no lib_root provided, try to get the first folder store from config
if not lib_root:
try:
storage_config = config.get("storage", {})
folder_config = storage_config.get("folder", {})
if folder_config:
# Get first folder store path
for store_name, store_config in folder_config.items():
if isinstance(store_config, dict):
path = store_config.get("path")
if path:
lib_root = path
break
except Exception:
pass
reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
items = []
if isinstance(result, list):
items = result
elif result:
items = [result]
if not items:
log("No items to delete", file=sys.stderr)
return 1
success_count = 0
for item in items:
if self._process_single_item(item, override_hash, conserve, lib_root, reason, config):
success_count += 1
if success_count > 0:
# Clear cached tables/items so deleted entries are not redisplayed
try:
ctx.set_last_result_table_overlay(None, None, None)
ctx.set_last_result_table(None, [])
ctx.set_last_result_items_only([])
ctx.set_current_stage_table(None)
except Exception:
pass
return 0 if success_count > 0 else 1
# Instantiate and register the cmdlet
Delete_File()

140
cmdlet/delete_note.py Normal file
View File

@@ -0,0 +1,140 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
get_field,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Delete_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-hash <sha256>] <name>",
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
],
detail=[
"- Deletes the named note from the selected store backend.",
],
exec=self.run,
)
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
note_name_override = str(parsed.get("name") or "").strip()
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
if not note_name_override and not inferred_note_name:
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
deleted = 0
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
# Resolve which note name to delete for this item.
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
if not note_name:
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
return 1
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
ctx.emit(res)
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[delete_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
ok = False
try:
ok = bool(backend.delete_note(resolved_hash, note_name, config=config))
except Exception as exc:
log(f"[delete_note] Error: Failed to delete note: {exc}", file=sys.stderr)
ok = False
if ok:
deleted += 1
ctx.emit(res)
log(f"[delete_note] Deleted note on {deleted} item(s)", file=sys.stderr)
return 0 if deleted > 0 else 1
CMDLET = Delete_Note()

View File

@@ -0,0 +1,216 @@
"""Delete file relationships."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence
import json
from pathlib import Path
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
try:
subject = ctx.get_last_result_subject()
if subject is None:
return
def norm(val: Any) -> str:
return str(val).lower()
target_hashes = [norm(v) for v in [target_hash, other] if v]
target_paths = [norm(v) for v in [target_path, other] if v]
subj_hashes: list[str] = []
subj_paths: list[str] = []
for field in ("hydrus_hash", "hash", "hash_hex", "file_hash"):
val = get_field(subject, field)
if val:
subj_hashes.append(norm(val))
for field in ("file_path", "path", "target"):
val = get_field(subject, field)
if val:
subj_paths.append(norm(val))
is_match = False
if target_hashes and any(h in subj_hashes for h in target_hashes):
is_match = True
if target_paths and any(p in subj_paths for p in target_paths):
is_match = True
if not is_match:
return
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
cmd = get_cmdlet("get-relationship")
if not cmd:
return
cmd(subject, refresh_args, config)
except Exception:
pass
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete relationships from files.
Args:
result: Input result(s) from previous cmdlet
args: Command arguments
config: CLI configuration
Returns:
Exit code (0 = success)
"""
try:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
parsed_args = parse_cmdlet_args(args, CMDLET)
delete_all_flag = parsed_args.get("all", False)
rel_type_filter = parsed_args.get("type")
# Get storage path
local_storage_path = get_local_storage_path(config)
if not local_storage_path:
log("Local storage path not configured", file=sys.stderr)
return 1
# Normalize input
results = normalize_result_input(result)
if not results:
log("No results to process", file=sys.stderr)
return 1
deleted_count = 0
for single_result in results:
try:
# Get file path from result
file_path_from_result = (
get_field(single_result, "file_path")
or get_field(single_result, "path")
or get_field(single_result, "target")
or (str(single_result) if not isinstance(single_result, dict) else None)
)
if not file_path_from_result:
log("Could not extract file path from result", file=sys.stderr)
return 1
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
with LocalLibrarySearchOptimizer(local_storage_path) as db:
file_id = db.db.get_file_id(file_path_obj)
if not file_id:
log(f"File not in database: {file_path_obj.name}", file=sys.stderr)
continue
# Get current relationships
cursor = db.db.connection.cursor()
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
if not row:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
relationships_str = row[0]
if not relationships_str:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
try:
relationships = json.loads(relationships_str)
except json.JSONDecodeError:
log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr)
continue
if not isinstance(relationships, dict):
relationships = {}
# Determine what to delete
if delete_all_flag:
# Delete all relationships
deleted_types = list(relationships.keys())
relationships = {}
log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr)
elif rel_type_filter:
# Delete specific type
if rel_type_filter in relationships:
deleted_count_for_type = len(relationships[rel_type_filter])
del relationships[rel_type_filter]
log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr)
else:
log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
return 1
# Save updated relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships) if relationships else None))
db.db.connection.commit()
_refresh_relationship_view_if_current(None, str(file_path_obj), None, config)
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
return 1
CMDLET = Cmdlet(
name="delete-relationship",
summary="Remove relationships from files.",
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
],
detail=[
"- Delete all relationships: pipe files | delete-relationship --all",
"- Delete specific type: pipe files | delete-relationship -type alt",
"- Delete all from file: delete-relationship -path <file> --all",
],
)
CMDLET.exec = _run
CMDLET.register()

335
cmdlet/delete_tag.py Normal file
View File

@@ -0,0 +1,335 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import json
import sys
import models
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
from SYS.logger import debug, log
from Store import Store
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh tags via get-tag."""
try:
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
get_tag = None
try:
get_tag = get_cmdlet("get-tag")
except Exception:
get_tag = None
if not callable(get_tag):
return
try:
subject = ctx.get_last_result_subject()
if subject is None:
return
def norm(val: Any) -> str:
return str(val).lower()
target_hash = norm(file_hash) if file_hash else None
target_path = norm(path) if path else None
subj_hashes: list[str] = []
subj_paths: list[str] = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hash")] if v]
subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v]
else:
subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)]
subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)]
is_match = False
if target_hash and target_hash in subj_hashes:
is_match = True
if target_path and target_path in subj_paths:
is_match = True
if not is_match:
return
refresh_args: list[str] = []
if file_hash:
refresh_args.extend(["-hash", file_hash])
if store_name:
refresh_args.extend(["-store", store_name])
get_tag(subject, refresh_args, config)
except Exception:
pass
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-hash <sha256>] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -hash override.",
"- Multiple tags can be comma-separated or space-separated.",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
has_piped_tag = (result and hasattr(result, '__class__') and
result.__class__.__name__ == 'TagItem' and
hasattr(result, 'tag_name'))
# Check if we have a piped list of TagItems (from @N selection)
has_piped_tag_list = (isinstance(result, list) and result and
hasattr(result[0], '__class__') and
result[0].__class__.__name__ == 'TagItem')
if not args and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Parse -hash override and collect tags from remaining args
override_hash: str | None = None
override_store: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args):
override_store = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
# Check if first argument is @ syntax (result table selection)
# @5 or @{2,5,8} to delete tags from ResultTable by index
tags_from_at_syntax = []
hash_from_at_syntax = None
path_from_at_syntax = None
store_from_at_syntax = None
if rest and str(rest[0]).startswith("@"):
selector_arg = str(rest[0])
pipe_selector = selector_arg[1:].strip()
# Parse @N or @{N,M,K} syntax
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
# @{2,5,8}
pipe_selector = pipe_selector[1:-1]
try:
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
except ValueError:
log("Invalid selection syntax. Use @2 or @{2,5,8}")
return 1
# Get the last ResultTable from pipeline context
try:
last_table = ctx._LAST_RESULT_TABLE
if last_table:
# Extract tags from selected rows
for idx in indices:
if 1 <= idx <= len(last_table.rows):
# Look for a TagItem in _LAST_RESULT_ITEMS by index
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
item = ctx._LAST_RESULT_ITEMS[idx - 1]
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
tag_name = get_field(item, 'tag_name')
if tag_name:
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
tags_from_at_syntax.append(tag_name)
# Also get hash from first item for consistency
if not hash_from_at_syntax:
hash_from_at_syntax = get_field(item, 'hash')
if not path_from_at_syntax:
path_from_at_syntax = get_field(item, 'path')
if not store_from_at_syntax:
store_from_at_syntax = get_field(item, 'store')
if not tags_from_at_syntax:
log(f"No tags found at indices: {indices}")
return 1
else:
log("No ResultTable in pipeline (use @ after running get-tag)")
return 1
except Exception as exc:
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
return 1
# Handle @N selection which creates a list - extract the first item
# If we have a list of TagItems, we want to process ALL of them if no args provided
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
# Also handles: delete-tag @1,2 (where we want to delete tags from multiple files)
# Normalize result to a list for processing
items_to_process = []
if isinstance(result, list):
items_to_process = result
elif result:
items_to_process = [result]
# If we have TagItems and no args, we are deleting the tags themselves
# If we have Files (or other objects) and args, we are deleting tags FROM those files
# Check if we are in "delete selected tags" mode (TagItems)
is_tag_item_mode = (items_to_process and hasattr(items_to_process[0], '__class__') and
items_to_process[0].__class__.__name__ == 'TagItem')
if is_tag_item_mode:
# Collect all tags to delete from the TagItems
# Group by hash/file_path to batch operations if needed, or just process one by one
# For simplicity, we'll process one by one or group by file
pass
else:
# "Delete tags from files" mode
# We need args (tags to delete)
if not args and not tags_from_at_syntax:
log("Requires at least one tag argument when deleting from files")
return 1
# Process each item
success_count = 0
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
# but we might need the piped result for the file context if @ selection was from a Tag table
# Actually, the @ selection logic above already extracted tags.
if tags_from_at_syntax:
# Special case: @ selection of tags.
# We already extracted tags and hash/path.
# Just run the deletion once using the extracted info.
# This preserves the existing logic for @ selection.
tags = tags_from_at_syntax
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
path = path_from_at_syntax
store_name = override_store or store_from_at_syntax
if _process_deletion(tags, file_hash, path, store_name, config):
success_count += 1
else:
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
tags_arg = parse_tag_arguments(rest)
for item in items_to_process:
tags_to_delete = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
)
item_store = override_store or get_field(item, "store")
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
# It's a TagItem
if tags_arg:
# User provided tags to delete FROM this file (ignoring the tag name in the item?)
# Or maybe they want to delete the tag in the item AND the args?
# Usually if piping TagItems, we delete THOSE tags.
# If args are present, maybe we should warn?
# For now, if args are present, assume they override or add to the tag item?
# Let's assume if args are present, we use args. If not, we use the tag name.
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
if tag_name:
tags_to_delete = [tag_name]
else:
# It's a File or other object
if tags_arg:
tags_to_delete = tags_arg
else:
# No tags provided for a file object - skip or error?
# We already logged an error if no args and not TagItem mode globally,
# but inside the loop we might have mixed items? Unlikely.
continue
if tags_to_delete:
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
success_count += 1
if success_count > 0:
return 0
return 1
def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool:
"""Helper to execute the deletion logic for a single target."""
if not tags:
return False
if not store_name:
log("Store is required (use -store or pipe a result with store)", file=sys.stderr)
return False
resolved_hash = normalize_hash(file_hash) if file_hash else None
if not resolved_hash and path:
try:
from SYS.utils import sha256_file
resolved_hash = sha256_file(Path(path))
except Exception:
resolved_hash = None
if not resolved_hash:
log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr)
return False
def _fetch_existing_tags() -> list[str]:
try:
backend = Store(config)[store_name]
existing, _src = backend.get_tag(resolved_hash, config=config)
return list(existing or [])
except Exception:
return []
# Safety: only block if this deletion would remove the final title tag
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
if title_tags:
existing_tags = _fetch_existing_tags()
current_titles = [t for t in existing_tags if isinstance(t, str) and t.lower().startswith("title:")]
del_title_set = {t.lower() for t in title_tags}
remaining_titles = [t for t in current_titles if t.lower() not in del_title_set]
if current_titles and not remaining_titles:
log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr)
return False
try:
backend = Store(config)[store_name]
ok = backend.delete_tag(resolved_hash, list(tags), config=config)
if ok:
preview = resolved_hash[:12] + ('' if len(resolved_hash) > 12 else '')
debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.")
_refresh_tag_view_if_current(resolved_hash, store_name, path, config)
return True
return False
except Exception as exc:
log(f"del-tag failed: {exc}")
return False
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()

82
cmdlet/delete_url.py Normal file
View File

@@ -0,0 +1,82 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import sys
from . import register
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from SYS.logger import log
from Store import Store
class Delete_Url(Cmdlet):
"""Delete URL associations from files via hash+store."""
NAME = "delete-url"
SUMMARY = "Remove a URL association from a file"
USAGE = "@1 | delete-url <url>"
ARGS = [
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to remove"),
]
DETAIL = [
"- Removes URL association from file identified by hash+store",
"- Multiple url can be comma-separated",
]
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
# Get backend and delete url
try:
storage = Store(config)
backend = storage[store_name]
backend.delete_url(file_hash, urls)
for u in urls:
ctx.emit(f"Deleted URL: {u}")
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
except Exception as exc:
log(f"Error deleting URL: {exc}", file=sys.stderr)
return 1
# Register cmdlet
register(["delete-url", "del-url", "delete_url"])(Delete_Url)

301
cmdlet/download_file.py Normal file
View File

@@ -0,0 +1,301 @@
"""Generic file downloader.
Supports:
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
- Piped provider items (uses provider.download when available)
No streaming site logic; use download-media for yt-dlp/streaming.
"""
from __future__ import annotations
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from SYS.download import DownloadError, _download_direct_file
from SYS.logger import log, debug
import pipeline as pipeline_context
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
parse_cmdlet_args,
register_url_with_local_library,
coerce_to_pipe_object,
get_field,
)
class Download_File(Cmdlet):
"""Class-based download-file cmdlet - direct HTTP downloads."""
def __init__(self) -> None:
"""Initialize download-file cmdlet."""
super().__init__(
name="download-file",
summary="Download files via HTTP or provider handlers",
usage="download-file <url> [options] OR @N | download-file [options]",
alias=["dl-file", "download-http"],
arg=[
CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"),
SharedArgs.URL,
],
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution method."""
stage_ctx = pipeline_context.get_stage_context()
in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
if in_pipeline and isinstance(config, dict):
config["_quiet_background_output"] = True
return self._run_impl(result, args, config)
def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main download implementation for direct HTTP files."""
try:
debug("Starting download-file")
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Extract explicit URL args (if any)
raw_url = parsed.get("url", [])
if isinstance(raw_url, str):
raw_url = [raw_url]
# If no URL args were provided, fall back to piped results (provider items)
piped_items: List[Any] = []
if not raw_url:
if isinstance(result, list):
piped_items = result
elif result:
piped_items = [result]
if not raw_url and not piped_items:
log("No url or piped items to download", file=sys.stderr)
return 1
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
if not final_output_dir:
return 1
debug(f"Output directory: {final_output_dir}")
# Download each URL and/or provider item
downloaded_count = 0
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
# Provider lookup is optional; keep import local to avoid overhead if unused
get_search_provider = None
SearchResult = None
try:
from ProviderCore.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
get_search_provider = _get_search_provider
SearchResult = _SearchResult
except Exception:
get_search_provider = None
SearchResult = None
def _emit_local_file(downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]]) -> None:
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
tag: List[str] = []
if tags_hint:
tag.extend([str(t) for t in tags_hint if t])
if not any(str(t).lower().startswith("title:") for t in tag):
tag.insert(0, f"title:{title_val}")
payload: Dict[str, Any] = {
"path": str(downloaded_path),
"hash": hash_value,
"title": title_val,
"action": "cmdlet:download-file",
"download_mode": "file",
"store": "local",
"media_kind": media_kind_hint or "file",
"tag": tag,
}
if full_metadata:
payload["full_metadata"] = full_metadata
if source and str(source).startswith("http"):
payload["url"] = source
elif source:
payload["source_url"] = source
pipeline_context.emit(payload)
# Automatically register url with local library
if payload.get("url"):
pipe_obj = coerce_to_pipe_object(payload)
register_url_with_local_library(pipe_obj, config)
# 1) Explicit URL downloads
for url in raw_url:
try:
debug(f"Processing URL: {url}")
result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode)
file_path = None
if hasattr(result_obj, "path"):
file_path = getattr(result_obj, "path")
elif isinstance(result_obj, dict):
file_path = result_obj.get("path")
if not file_path:
file_path = str(result_obj)
downloaded_path = Path(str(file_path))
_emit_local_file(
downloaded_path=downloaded_path,
source=url,
title_hint=downloaded_path.stem,
tags_hint=[f"title:{downloaded_path.stem}"],
media_kind_hint="file",
full_metadata=None,
)
downloaded_count += 1
debug("✓ Downloaded and emitted")
except DownloadError as e:
log(f"Download failed for {url}: {e}", file=sys.stderr)
except Exception as e:
log(f"Error processing {url}: {e}", file=sys.stderr)
# 2) Provider item downloads (piped results)
for item in piped_items:
try:
table = get_field(item, "table")
title = get_field(item, "title")
target = get_field(item, "path") or get_field(item, "url")
media_kind = get_field(item, "media_kind")
tags_val = get_field(item, "tag")
tags_list: Optional[List[str]]
if isinstance(tags_val, list):
tags_list = [str(t) for t in tags_val if t]
else:
tags_list = None
full_metadata = get_field(item, "full_metadata")
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
extra_md = item["extra"].get("full_metadata")
if isinstance(extra_md, dict):
full_metadata = extra_md
# If this looks like a provider item and providers are available, prefer provider.download()
downloaded_path: Optional[Path] = None
if table and get_search_provider and SearchResult:
provider = get_search_provider(str(table), config)
if provider is not None:
sr = SearchResult(
table=str(table),
title=str(title or "Unknown"),
path=str(target or ""),
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
)
debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
downloaded_path = provider.download(sr, final_output_dir)
# Fallback: if we have a direct HTTP URL, download it directly
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
file_path = None
if hasattr(result_obj, "path"):
file_path = getattr(result_obj, "path")
elif isinstance(result_obj, dict):
file_path = result_obj.get("path")
if not file_path:
file_path = str(result_obj)
downloaded_path = Path(str(file_path))
if downloaded_path is None:
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
continue
_emit_local_file(
downloaded_path=downloaded_path,
source=str(target) if target else None,
title_hint=str(title) if title else downloaded_path.stem,
tags_hint=tags_list,
media_kind_hint=str(media_kind) if media_kind else None,
full_metadata=full_metadata if isinstance(full_metadata, dict) else None,
)
downloaded_count += 1
except DownloadError as e:
log(f"Download failed: {e}", file=sys.stderr)
except Exception as e:
log(f"Error downloading item: {e}", file=sys.stderr)
if downloaded_count > 0:
debug(f"✓ Successfully processed {downloaded_count} file(s)")
return 0
log("No downloads completed", file=sys.stderr)
return 1
except Exception as e:
log(f"Error in download-file: {e}", file=sys.stderr)
return 1
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
"""Resolve the output directory from storage location or config."""
output_dir_arg = parsed.get("output")
if output_dir_arg:
try:
out_path = Path(str(output_dir_arg)).expanduser()
out_path.mkdir(parents=True, exist_ok=True)
return out_path
except Exception as e:
log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr)
return None
storage_location = parsed.get("storage")
# Priority 1: --storage flag
if storage_location:
try:
return SharedArgs.resolve_storage(storage_location)
except Exception as e:
log(f"Invalid storage location: {e}", file=sys.stderr)
return None
# Priority 2: Config outfile
if config and config.get("outfile"):
try:
return Path(config["outfile"]).expanduser()
except Exception:
pass
# Priority 3: Default (home/Downloads)
final_output_dir = Path.home() / "Downloads"
debug(f"Using default directory: {final_output_dir}")
# Ensure directory exists
try:
final_output_dir.mkdir(parents=True, exist_ok=True)
except Exception as e:
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
return None
return final_output_dir
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
# Module-level singleton registration
CMDLET = Download_File()

1483
cmdlet/download_media.py Normal file

File diff suppressed because it is too large Load Diff

127
cmdlet/download_torrent.py Normal file
View File

@@ -0,0 +1,127 @@
"""Download torrent/magnet links via AllDebrid in a dedicated cmdlet.
Features:
- Accepts magnet links and .torrent files/url
- Uses AllDebrid API for background downloads
- Progress tracking and worker management
- Self-registering class-based cmdlet
"""
from __future__ import annotations
import sys
import uuid
import threading
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from SYS.logger import log
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
class Download_Torrent(Cmdlet):
"""Class-based download-torrent cmdlet with self-registration."""
def __init__(self) -> None:
super().__init__(
name="download-torrent",
summary="Download torrent/magnet links via AllDebrid",
usage="download-torrent <magnet|.torrent> [options]",
alias=["torrent", "magnet"],
arg=[
CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True),
CmdletArg(name="output", type="string", description="Output directory for downloaded files"),
CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),
CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"),
],
detail=["Download torrents/magnets via AllDebrid API."],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, self)
magnet_args = parsed.get("magnet", [])
output_dir = Path(parsed.get("output") or Path.home() / "Downloads")
wait_timeout = int(float(parsed.get("wait", 600)))
background_mode = parsed.get("background", False)
api_key = config.get("alldebrid_api_key")
if not api_key:
log("AllDebrid API key not configured", file=sys.stderr)
return 1
for magnet_url in magnet_args:
if background_mode:
self._start_background_worker(magnet_url, output_dir, config, api_key, wait_timeout)
log(f"⧗ Torrent download queued in background: {magnet_url}")
else:
self._download_torrent_worker(str(uuid.uuid4()), magnet_url, output_dir, config, api_key, wait_timeout)
return 0
@staticmethod
def _download_torrent_worker(
worker_id: str,
magnet_url: str,
output_dir: Path,
config: Dict[str, Any],
api_key: str,
wait_timeout: int = 600,
worker_manager: Optional[Any] = None,
) -> None:
try:
from API.alldebrid import AllDebridClient
client = AllDebridClient(api_key)
log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
magnet_info = client.magnet_add(magnet_url)
magnet_id = int(magnet_info.get('id', 0))
if magnet_id <= 0:
log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr)
return
log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
# Poll for ready status (simplified)
import time
elapsed = 0
while elapsed < wait_timeout:
status = client.magnet_status(magnet_id)
if status.get('ready'):
break
time.sleep(5)
elapsed += 5
if elapsed >= wait_timeout:
log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr)
return
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {})
files_array = magnet_files.get('files', [])
if not files_array:
log(f"[Worker {worker_id}] No files found", file=sys.stderr)
return
for file_info in files_array:
file_url = file_info.get('link')
file_name = file_info.get('name')
if file_url:
Download_Torrent._download_file(file_url, output_dir / file_name)
log(f"[Worker {worker_id}] ✓ Downloaded {file_name}")
except Exception as e:
log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr)
@staticmethod
def _download_file(url: str, dest: Path) -> None:
try:
import requests
resp = requests.get(url, stream=True)
with open(dest, 'wb') as f:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
except Exception as e:
log(f"File download failed: {e}", file=sys.stderr)
def _start_background_worker(self, magnet_url, output_dir, config, api_key, wait_timeout):
worker_id = f"torrent_{uuid.uuid4().hex[:6]}"
thread = threading.Thread(
target=self._download_torrent_worker,
args=(worker_id, magnet_url, output_dir, config, api_key, wait_timeout),
daemon=False,
name=f"TorrentWorker_{worker_id}",
)
thread.start()
CMDLET = Download_Torrent()

204
cmdlet/get_file.py Normal file
View File

@@ -0,0 +1,204 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import sys
import shutil
from . import register
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from SYS.logger import log, debug
from Store import Store
from config import resolve_output_dir
class Get_File(Cmdlet):
"""Export files to local path via hash+store."""
def __init__(self) -> None:
"""Initialize get-file cmdlet."""
super().__init__(
name="get-file",
summary="Export file to local path",
usage="@1 | get-file -path C:\\Downloads",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-path", description="Output directory path (default: from config)"),
CmdletArg("-name", description="Output filename (default: from metadata title)"),
],
detail=[
"- Exports file from storage backend to local path",
"- Uses hash+store to retrieve file",
"- Preserves file extension and metadata",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
debug(f"[get-file] run() called with result type: {type(result)}")
parsed = parse_cmdlet_args(args, self)
debug(f"[get-file] parsed args: {parsed}")
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}")
if not file_hash:
log("Error: No file hash provided")
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
debug(f"[get-file] Getting storage backend: {store_name}")
# Get storage backend
store = Store(config)
backend = store[store_name]
debug(f"[get-file] Backend retrieved: {type(backend).__name__}")
# Get file metadata to determine name and extension
debug(f"[get-file] Getting metadata for hash...")
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"Error: File metadata not found for hash {file_hash[:12]}...")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
# Determine output filename
if output_name:
filename = output_name
else:
# Use title from metadata, sanitize it
title = metadata.get("title", "export")
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith('.'):
ext = '.' + ext
filename += ext
# Determine output directory
if output_path:
output_dir = Path(output_path).expanduser()
else:
output_dir = resolve_output_dir(config)
debug(f"[get-file] Output dir: {output_dir}")
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)")
# Get file from backend (may return Path or URL string depending on backend)
source_path = backend.get_file(file_hash)
debug(f"[get-file] backend.get_file returned: {source_path}")
# Check if backend returned a URL (HydrusNetwork case)
if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")):
log(f"File opened in browser: {source_path}", file=sys.stderr)
ctx.emit(f"Opened in browser: {source_path}")
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"url": source_path,
"title": filename,
})
return 0
# Otherwise treat as file path (local/folder backends)
if isinstance(source_path, str):
source_path = Path(source_path)
# Determine output directory
if output_path:
output_dir = Path(output_path).expanduser()
else:
output_dir = resolve_output_dir(config)
debug(f"[get-file] Output dir: {output_dir}")
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
# Build full output path
dest_path = output_dir / filename
# Make path unique if file exists
dest_path = self._unique_path(dest_path)
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...")
return 1
# Copy file to destination
debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr)
shutil.copy2(source_path, dest_path)
ctx.emit(f"Exported to: {dest_path}")
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit({
"hash": file_hash,
"store": store_name,
"path": str(dest_path),
"title": filename,
})
debug(f"[get-file] Completed successfully")
return 0
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {'-', '_', ' ', '.'}:
allowed_chars.append(ch)
else:
allowed_chars.append(' ')
# Collapse multiple spaces
sanitized = ' '.join(''.join(allowed_chars).split())
return sanitized or "export"
def _unique_path(self, path: Path) -> Path:
"""Generate unique path by adding (1), (2), etc. if file exists."""
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
counter = 1
while True:
new_path = parent / f"{stem} ({counter}){suffix}"
if not new_path.exists():
return new_path
counter += 1
# Instantiate and register cmdlet
Add_File_Instance = Get_File()

229
cmdlet/get_metadata.py Normal file
View File

@@ -0,0 +1,229 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
import json
import sys
from SYS.logger import log
from pathlib import Path
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
import pipeline as ctx
from result_table import ResultTable
class Get_Metadata(Cmdlet):
"""Class-based get-metadata cmdlet with self-registration."""
def __init__(self) -> None:
"""Initialize get-metadata cmdlet."""
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-hash <sha256>] [-store <backend>]",
alias=["meta"],
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Retrieves metadata from storage backend using file hash as identifier.",
"- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.",
"- Hash and store are taken from piped result or can be overridden with -hash/-store flags.",
"- All metadata is retrieved from the storage backend's database (single source of truth).",
],
exec=self.run,
)
self.register()
@staticmethod
def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]:
"""Extract an imported timestamp from metadata if available."""
if not isinstance(meta, dict):
return None
# Prefer explicit time_imported if present
explicit = meta.get("time_imported")
if isinstance(explicit, (int, float)):
return int(explicit)
# Try parsing string timestamps
if isinstance(explicit, str):
try:
import datetime as _dt
return int(_dt.datetime.fromisoformat(explicit).timestamp())
except Exception:
pass
return None
@staticmethod
def _format_imported(ts: Optional[int]) -> str:
"""Format timestamp as readable string."""
if not ts:
return ""
try:
import datetime as _dt
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return ""
@staticmethod
def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int],
dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str],
hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
"""Build a table row dict with metadata fields."""
size_mb = None
if isinstance(size_bytes, int):
try:
size_mb = int(size_bytes / (1024 * 1024))
except Exception:
size_mb = None
dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None
pages_int = int(pages) if isinstance(pages, (int, float)) else None
imported_label = Get_Metadata._format_imported(imported_ts)
duration_label = "Duration(s)"
duration_value = str(dur_int) if dur_int is not None else ""
if mime and mime.lower().startswith("application/pdf"):
duration_label = "Pages"
duration_value = str(pages_int) if pages_int is not None else ""
columns = [
("Title", title or ""),
("Hash", hash_value or ""),
("MIME", mime or ""),
("Size(MB)", str(size_mb) if size_mb is not None else ""),
(duration_label, duration_value),
("Imported", imported_label),
("Store", store or ""),
]
return {
"title": title or path,
"path": path,
"store": store,
"mime": mime,
"size_bytes": size_bytes,
"duration_seconds": dur_int,
"pages": pages_int,
"imported_ts": imported_ts,
"imported": imported_label,
"hash": hash_value,
"url": url,
"columns": columns,
}
@staticmethod
def _add_table_body_row(table: ResultTable, row: Dict[str, Any]) -> None:
"""Add a single row to the ResultTable using the prepared columns."""
columns = row.get("columns") if isinstance(row, dict) else None
lookup: Dict[str, Any] = {}
if isinstance(columns, list):
for col in columns:
if isinstance(col, tuple) and len(col) == 2:
label, value = col
lookup[str(label)] = value
row_obj = table.add_row()
row_obj.add_column("Hash", lookup.get("Hash", ""))
row_obj.add_column("MIME", lookup.get("MIME", ""))
row_obj.add_column("Size(MB)", lookup.get("Size(MB)", ""))
if "Duration(s)" in lookup:
row_obj.add_column("Duration(s)", lookup.get("Duration(s)", ""))
elif "Pages" in lookup:
row_obj.add_column("Pages", lookup.get("Pages", ""))
else:
row_obj.add_column("Duration(s)", "")
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Get hash and store from parsed args or result
file_hash = parsed.get("hash") or get_field(result, "hash")
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -hash to specify", file=sys.stderr)
return 1
if not storage_source:
log("No storage backend specified - use -store to specify", file=sys.stderr)
return 1
# Use storage backend to get metadata
try:
from Store import Store
storage = Store(config)
backend = storage[storage_source]
# Get metadata from backend
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr)
return 1
# Extract title from tags if available
title = get_field(result, "title") or file_hash[:16]
if not get_field(result, "title"):
# Try to get title from tags
try:
tags, _ = backend.get_tag(file_hash)
for tag in tags:
if tag.lower().startswith("title:"):
title = tag.split(":", 1)[1]
break
except Exception:
pass
# Extract metadata fields
mime_type = metadata.get("mime") or metadata.get("ext", "")
file_size = metadata.get("size")
duration_seconds = metadata.get("duration")
pages = metadata.get("pages")
url = metadata.get("url") or []
imported_ts = self._extract_imported_ts(metadata)
# Normalize url
if isinstance(url, str):
try:
url = json.loads(url)
except (json.JSONDecodeError, TypeError):
url = []
if not isinstance(url, list):
url = []
# Build display row
row = self._build_table_row(
title=title,
store=storage_source,
path=metadata.get("path", ""),
mime=mime_type,
size_bytes=file_size,
dur_seconds=duration_seconds,
imported_ts=imported_ts,
url=url,
hash_value=file_hash,
pages=pages,
)
table_title = title
table = ResultTable(table_title).init_command("get-metadata", list(args))
self._add_table_body_row(table, row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
return 0
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1
except Exception as exc:
log(f"Failed to get metadata: {exc}", file=sys.stderr)
return 1
CMDLET = Get_Metadata()

143
cmdlet/get_note.py Normal file
View File

@@ -0,0 +1,143 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Get_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-hash <sha256>]",
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
],
detail=[
"- Notes are retrieved via the selected store backend.",
"- Lyrics are stored in a note named 'lyric'.",
],
exec=self.run,
)
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
any_notes = False
for res in results:
if not isinstance(res, dict):
continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[get_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[get_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
notes = {}
try:
notes = backend.get_note(resolved_hash, config=config) or {}
except Exception:
notes = {}
if not notes:
continue
any_notes = True
# Emit each note as its own row so CLI renders a proper note table
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
v = notes.get(k)
raw_text = str(v or "")
preview = " ".join(raw_text.replace("\r", "").split("\n"))
ctx.emit(
{
"store": store_name,
"hash": resolved_hash,
"note_name": str(k),
"note_text": raw_text,
"columns": [
("Name", str(k)),
("Text", preview.strip()),
],
}
)
if not any_notes:
ctx.emit("No notes found.")
return 0
CMDLET = Get_Note()

429
cmdlet/get_relationship.py Normal file
View File

@@ -0,0 +1,429 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from pathlib import Path
from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
from API.folder import API_folder_store
from config import get_local_storage_path
from result_table import ResultTable
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
alias=[
"get-rel",
],
arg=[
SharedArgs.HASH,
],
detail=[
"- Lists relationship data as returned by Hydrus or Local DB.",
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
source_title = "Unknown"
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
return
found_relationships.append(entry)
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
if path_obj.exists():
storage_path = get_local_storage_path(config)
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
if storage_path:
with API_folder_store(storage_path) as db:
file_hash = db.get_file_hash(path_obj)
metadata = db.get_metadata(file_hash) if file_hash else None
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
if hashes:
for h in hashes:
# h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
resolved_path = db.search_hash(h)
title = h[:16] + "..."
path = None
if resolved_path and resolved_path.exists():
path = str(resolved_path)
# Try to get title from tags
try:
tags = db.get_tags(h)
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
entry_type = "king" if rel_type.lower() == "alt" else rel_type
_add_relationship({
"hash": h,
"type": entry_type,
"title": title,
"path": path,
"store": "local"
})
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
existing_parent = None
for r in found_relationships:
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
existing_parent = r
break
if not existing_parent:
parent_title = parent_path_obj.stem
try:
parent_hash = db.get_file_hash(parent_path_obj)
if parent_hash:
parent_tags = db.get_tags(parent_hash)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
_add_relationship({
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
"store": "local"
})
else:
# If already in results, ensure it's marked as king if appropriate
if rel_type.lower() == "alt":
existing_parent['type'] = "king"
# 1. Check forward relationships from parent (siblings)
parent_hash = db.get_file_hash(parent_path_obj)
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
child_path_obj = db.search_hash(child_h)
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
# Check if already added (case-insensitive hash/path check)
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
"store": "local"
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
# Skip if already added (check by path/hash, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
"store": "local"
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
# "store": "local"
# })
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = get_hash_for_operation(override_hash, result)
if hash_hex and not local_db_checked:
try:
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
rel_map = {
"0": "potential duplicate",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
"store": "hydrus"
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
log("No relationships found.")
return 0
# Display results
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
return 2
elif t == 'duplicate':
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Store", item['store'])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"store": item['store']
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)
return 0
CMDLET.exec = _run
CMDLET.register()

1382
cmdlet/get_tag.py Normal file

File diff suppressed because it is too large Load Diff

80
cmdlet/get_url.py Normal file
View File

@@ -0,0 +1,80 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import sys
from . import register
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
from SYS.logger import log
from Store import Store
class Get_Url(Cmdlet):
"""Get url associated with files via hash+store."""
NAME = "get-url"
SUMMARY = "List url associated with a file"
USAGE = "@1 | get-url"
ARGS = [
SharedArgs.HASH,
SharedArgs.STORE,
]
DETAIL = [
"- Lists all url associated with file identified by hash+store",
]
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided")
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Get backend and retrieve url
try:
storage = Store(config)
backend = storage[store_name]
urls = backend.get_url(file_hash)
if urls:
for u in urls:
# Emit rich object for pipeline compatibility
ctx.emit({
"url": u,
"hash": file_hash,
"store": store_name,
})
return 0
else:
ctx.emit("No url found")
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
except Exception as exc:
log(f"Error retrieving url: {exc}", file=sys.stderr)
return 1
# Register cmdlet
register(["get-url", "get_url"])(Get_Url)

819
cmdlet/merge_file.py Normal file
View File

@@ -0,0 +1,819 @@
"""Merge multiple files into a single output file."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, List
from pathlib import Path
import sys
from SYS.logger import log
import subprocess as _subprocess
import shutil as _shutil
from ._shared import (
Cmdlet,
CmdletArg,
create_pipe_object_result,
get_field,
get_pipe_object_hash,
get_pipe_object_path,
normalize_result_input,
parse_cmdlet_args,
should_show_help,
)
import pipeline as ctx
try:
from PyPDF2 import PdfWriter, PdfReader
HAS_PYPDF2 = True
except ImportError:
HAS_PYPDF2 = False
PdfWriter = None
PdfReader = None
try:
from metadata import (
read_tags_from_file,
write_tags_to_file,
dedup_tags_by_namespace,
write_metadata
)
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
def read_tags_from_file(file_path: Path) -> List[str]:
return []
def write_tags_to_file(
file_path: Path,
tags: List[str],
source_hashes: Optional[List[str]] = None,
url: Optional[List[str]] = None,
append: bool = False,
) -> bool:
return False
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
return tags
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
return None
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Merge multiple files into one."""
# Parse help
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
delete_after = parsed.get("delete", False)
output_override: Optional[Path] = None
output_arg = parsed.get("output")
if output_arg:
try:
output_override = Path(str(output_arg)).expanduser()
except Exception:
output_override = None
format_spec = parsed.get("format")
if format_spec:
format_spec = str(format_spec).lower().strip()
# Collect files from piped results
# Use normalize_result_input to handle both single items and lists
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
if not files_to_merge:
log("No files provided to merge", file=sys.stderr)
return 1
if len(files_to_merge) < 2:
# Only 1 file - pass it through unchanged
# (merge only happens when multiple files are collected)
item = files_to_merge[0]
ctx.emit(item)
return 0
# Extract file paths and metadata from result objects
source_files: List[Path] = []
source_hashes: List[str] = []
source_url: List[str] = []
source_tags: List[str] = [] # NEW: collect tags from source files
source_relationships: List[str] = [] # NEW: collect relationships from source files
for item in files_to_merge:
raw_path = get_pipe_object_path(item)
target_path = None
if isinstance(raw_path, Path):
target_path = raw_path
elif isinstance(raw_path, str) and raw_path.strip():
candidate = Path(raw_path).expanduser()
if candidate.exists():
target_path = candidate
if target_path and target_path.exists():
source_files.append(target_path)
# Track tags from the .tag sidecar for this source (if present)
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
if tags_file.exists() and HAS_METADATA_API:
try:
source_tags.extend(read_tags_from_file(tags_file) or [])
except Exception:
pass
# Extract hash if available in item (as fallback)
hash_value = get_pipe_object_hash(item)
if hash_value and hash_value not in source_hashes:
source_hashes.append(str(hash_value))
# Extract known url if available
url = get_field(item, 'url', [])
if isinstance(url, str):
source_url.append(url)
elif isinstance(url, list):
source_url.extend(url)
else:
title = get_field(item, 'title', 'unknown') or get_field(item, 'id', 'unknown')
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
if len(source_files) < 2:
log("At least 2 valid files required to merge", file=sys.stderr)
return 1
# Detect file types
file_types = set()
for f in source_files:
suffix = f.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_types.add('audio')
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_types.add('video')
elif suffix in {'.pdf'}:
file_types.add('pdf')
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
file_types.add('text')
else:
file_types.add('other')
if len(file_types) > 1 and 'other' not in file_types:
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
log(f"Can only merge files of the same type", file=sys.stderr)
return 1
file_kind = list(file_types)[0] if file_types else 'other'
# Determine output format
output_format = format_spec or 'auto'
if output_format == 'auto':
if file_kind == 'audio':
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
elif file_kind == 'video':
output_format = 'mp4' # Default video codec
elif file_kind == 'pdf':
output_format = 'pdf'
else:
output_format = 'txt'
# Determine output path
if output_override:
if output_override.is_dir():
base_title = get_field(files_to_merge[0], 'title', 'merged')
base_name = _sanitize_name(str(base_title or 'merged'))
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
else:
output_path = output_override
else:
first_file = source_files[0]
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
# Perform merge based on file type
if file_kind == 'audio':
success = _merge_audio(source_files, output_path, output_format)
elif file_kind == 'video':
success = _merge_video(source_files, output_path, output_format)
elif file_kind == 'pdf':
success = _merge_pdf(source_files, output_path)
elif file_kind == 'text':
success = _merge_text(source_files, output_path)
else:
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
return 1
if not success:
log("Merge failed", file=sys.stderr)
return 1
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
merged_tags: List[str] = [f"title:{output_path.stem}"]
# Create .tag sidecar file for the merged output using unified API
tags_path = output_path.with_suffix(output_path.suffix + '.tag')
try:
# Merge tags from source files using metadata API
if source_tags and HAS_METADATA_API:
# Use dedup function to normalize and deduplicate
merged_source_tags = dedup_tags_by_namespace(source_tags)
merged_tags.extend(merged_source_tags)
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
elif source_tags:
# Fallback: simple deduplication if metadata API unavailable
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
# Write merged tags to sidecar file
if HAS_METADATA_API and write_tags_to_file:
# Use unified API for file writing
source_hashes_list = source_hashes if source_hashes else None
source_url_list = source_url if source_url else None
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_url_list)
else:
# Fallback: manual file writing
tags_lines = []
# Add hash first (if available)
if source_hashes:
tags_lines.append(f"hash:{source_hashes[0]}")
# Add regular tags
tags_lines.extend(merged_tags)
# Add known url
if source_url:
for url in source_url:
tags_lines.append(f"url:{url}")
# Add relationships (if available)
if source_relationships:
for rel in source_relationships:
tags_lines.append(f"relationship:{rel}")
with open(tags_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(tags_lines) + '\n')
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
# Also create .metadata file using centralized function
try:
if HAS_METADATA_API and write_metadata:
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
try:
from SYS.utils import sha256_file
merged_hash = sha256_file(output_path)
merged_item = create_pipe_object_result(
source="local",
identifier=output_path.name,
file_path=str(output_path),
cmdlet_name="merge-file",
title=output_path.stem,
hash_value=merged_hash,
tag=merged_tags,
url=source_url,
media_kind=file_kind,
)
# Clear previous results to ensure only the merged file is passed down
ctx.clear_last_result()
ctx.emit(merged_item)
except Exception as e:
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
# Still emit a string representation for feedback
ctx.emit(f"Merged: {output_path}")
# Cleanup
# - Delete source files only when -delete is set.
if delete_after:
for f in source_files:
try:
# Delete sidecar tags for the source (if any)
tag_file = f.with_suffix(f.suffix + '.tag')
if tag_file.exists():
try:
tag_file.unlink()
log(f"Deleted: {tag_file.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr)
except Exception:
pass
try:
if f.exists():
f.unlink()
log(f"Deleted: {f.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
return 0
def _sanitize_name(text: str) -> str:
"""Sanitize filename."""
allowed = []
for ch in text:
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
return (" ".join("".join(allowed).split()) or "merged").strip()
def _ext_for_format(fmt: str) -> str:
"""Get file extension for format."""
format_map = {
'mp3': 'mp3',
'm4a': 'm4a',
'm4b': 'm4b',
'aac': 'aac',
'opus': 'opus',
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
'mkv': 'mkv',
'mp4': 'mp4',
'webm': 'webm',
'pdf': 'pdf',
'txt': 'txt',
'auto': 'mka', # Default - MKA for chapters
}
return format_map.get(fmt.lower(), 'mka')
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge audio files with chapters based on file boundaries."""
import logging
logger = logging.getLogger(__name__)
ffmpeg_path = _shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
# Step 1: Get duration of each file to calculate chapter timestamps
chapters = []
current_time_ms = 0
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
logger.info(f"[merge-file] Analyzing files for chapters")
for file_path in files:
# Get duration using ffprobe
try:
ffprobe_cmd = [
'ffprobe', '-v', 'error', '-show_entries',
'format=duration', '-print_format',
'default=noprint_wrappers=1:nokey=1', str(file_path)
]
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
if probe_result.returncode == 0 and probe_result.stdout.strip():
try:
duration_sec = float(probe_result.stdout.strip())
except ValueError:
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
duration_sec = 0
else:
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
duration_sec = 0
except Exception as e:
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
duration_sec = 0
# Create chapter entry - use title: tag from metadata if available
title = file_path.stem # Default to filename without extension
if HAS_METADATA_API:
try:
# Try to read tags from .tag sidecar file
tags_file = file_path.with_suffix(file_path.suffix + '.tag')
if tags_file.exists():
tags = read_tags_from_file(tags_file)
if tags:
# Look for title: tag
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith('title:'):
# Extract the title value after the colon
title = tag.split(':', 1)[1].strip()
break
except Exception as e:
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
pass # Fall back to filename
# Convert seconds to HH:MM:SS.mmm format
hours = int(current_time_ms // 3600000)
minutes = int((current_time_ms % 3600000) // 60000)
seconds = int((current_time_ms % 60000) // 1000)
millis = int(current_time_ms % 1000)
chapters.append({
'time_ms': current_time_ms,
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
'title': title,
'duration_sec': duration_sec
})
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
current_time_ms += int(duration_sec * 1000)
# Step 2: Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"
concat_lines = []
for f in files:
# Escape quotes in path
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
# Step 3: Create FFmpeg metadata file with chapters
metadata_file = output.parent / f".metadata_{output.stem}.txt"
metadata_lines = [';FFMETADATA1']
for i, chapter in enumerate(chapters):
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
metadata_lines.append('[CHAPTER]')
metadata_lines.append('TIMEBASE=1/1000')
metadata_lines.append(f'START={chapter["time_ms"]}')
# Calculate end time (start of next chapter or end of file)
if i < len(chapters) - 1:
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
else:
metadata_lines.append(f'END={current_time_ms}')
metadata_lines.append(f'title={chapter["title"]}')
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
logger.info(f"[merge-file] Created {len(chapters)} chapters")
# Step 4: Build FFmpeg command to merge and embed chapters
# Strategy: First merge audio, then add metadata in separate pass
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
# Add threading options for speed
cmd.extend(['-threads', '0']) # Use all available threads
# Audio codec selection for first input
if output_format == 'mp3':
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
elif output_format in {'m4a', 'm4b'}:
# Use copy if possible (much faster), otherwise re-encode
# Check if inputs are already AAC/M4A to avoid re-encoding
# For now, default to copy if format matches, otherwise re-encode
# But since we are merging potentially different codecs, re-encoding is safer
# To speed up re-encoding, we can use a faster preset or hardware accel if available
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
elif output_format == 'aac':
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'opus':
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
elif output_format == 'mka':
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
# If we want speed, copy is best. If we want compatibility, re-encode.
# Let's try copy first if inputs are same format, but that's hard to detect here.
# Defaulting to copy for MKA as it's a container that supports many codecs
cmd.extend(['-c:a', 'copy'])
else:
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
# Add the output file
cmd.append(str(output))
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
# Run ffmpeg with progress monitoring
try:
from SYS.progress import print_progress, print_final_progress
import re
process = _subprocess.Popen(
cmd,
stdout=_subprocess.PIPE,
stderr=_subprocess.PIPE,
text=True,
encoding='utf-8',
errors='replace'
)
# Monitor progress
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
total_duration_sec = current_time_ms / 1000.0
while True:
# Read stderr line by line (ffmpeg writes progress to stderr)
if process.stderr:
line = process.stderr.readline()
if not line and process.poll() is not None:
break
if line:
# Parse time=HH:MM:SS.mm
match = duration_re.search(line)
if match and total_duration_sec > 0:
h, m, s, cs = map(int, match.groups())
current_sec = h * 3600 + m * 60 + s + cs / 100.0
# Calculate speed/bitrate if available (optional)
# For now just show percentage
print_progress(
output.name,
int(current_sec * 1000), # Use ms as "bytes" for progress bar
int(total_duration_sec * 1000),
speed=0
)
else:
break
# Wait for completion
stdout, stderr = process.communicate()
if process.returncode != 0:
log(f"FFmpeg error: {stderr}", file=sys.stderr)
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
except Exception as e:
logger.exception(f"[merge-file] ffmpeg process error: {e}")
raise
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
if output_format == 'mka' or output.suffix.lower() == '.mka':
# MKA/MKV format has native chapter support via FFMetadata
# Re-mux the file with chapters embedded (copy streams, no re-encode)
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
temp_output = output.parent / f".temp_{output.stem}.mka"
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
mkvmerge_path = _shutil.which('mkvmerge')
if mkvmerge_path:
# mkvmerge is the best tool for embedding chapters in Matroska files
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
cmd2 = [
mkvmerge_path, '-o', str(temp_output),
'--chapters', str(metadata_file),
str(output)
]
else:
# Fallback to ffmpeg with proper chapter embedding for Matroska
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
# For Matroska files, the metadata must be provided via -f ffmetadata input
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
]
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
try:
# Run chapter embedding silently (progress handled by worker thread)
_subprocess.run(
cmd2,
capture_output=True,
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
logger.info(f"[merge-file] Chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
try:
temp_output.unlink()
except Exception:
pass
else:
logger.warning(f"[merge-file] Chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
# MP4/M4A format has native chapter support via iTunes metadata atoms
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
]
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
try:
# Run MP4 chapter embedding silently (progress handled by worker thread)
_subprocess.run(
cmd2,
capture_output=True,
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
logger.info(f"[merge-file] MP4 chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
try:
temp_output.unlink()
except Exception:
pass
else:
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
else:
# For other formats, chapters would require external tools
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
# Clean up temp files
try:
concat_file.unlink()
except Exception:
pass
try:
metadata_file.unlink()
except Exception:
pass
return True
except Exception as e:
log(f"Audio merge error: {e}", file=sys.stderr)
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
return False
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge video files."""
ffmpeg_path = _shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
# Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"
concat_lines = []
for f in files:
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
# Build FFmpeg command for video merge
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
# Video codec selection
if output_format == 'mp4':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'mkv':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
else:
cmd.extend(['-c', 'copy']) # Copy without re-encoding
cmd.append(str(output))
log(f"Merging {len(files)} video files...", file=sys.stderr)
result = _subprocess.run(cmd, capture_output=True, text=True)
# Clean up concat file
try:
concat_file.unlink()
except Exception:
pass
if result.returncode != 0:
stderr = (result.stderr or '').strip()
log(f"FFmpeg error: {stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Video merge error: {e}", file=sys.stderr)
return False
def _merge_text(files: List[Path], output: Path) -> bool:
"""Merge text files."""
try:
with open(output, 'w', encoding='utf-8') as outf:
for i, f in enumerate(files):
if i > 0:
outf.write('\n---\n') # Separator between files
try:
content = f.read_text(encoding='utf-8', errors='replace')
outf.write(content)
except Exception as e:
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
return True
except Exception as e:
log(f"Text merge error: {e}", file=sys.stderr)
return False
def _merge_pdf(files: List[Path], output: Path) -> bool:
"""Merge PDF files."""
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
return False
try:
writer = PdfWriter()
for f in files:
try:
reader = PdfReader(f)
for page in reader.pages:
writer.add_page(page)
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
except Exception as e:
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
return False
with open(output, 'wb') as outf:
writer.write(outf)
return True
except Exception as e:
log(f"PDF merge error: {e}", file=sys.stderr)
return False
CMDLET = Cmdlet(
name="merge-file",
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
arg=[
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
CmdletArg("-output", description="Override output file path."),
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
],
detail=[
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
"- Audio files merge with minimal quality loss using specified codec.",
"- Video files merge into MP4 or MKV containers.",
"- PDF files merge into a single PDF document.",
"- Text/document files are concatenated.",
"- Output name derived from first file with ' (merged)' suffix.",
"- -delete flag removes all source files after successful merge.",
],
)
CMDLET.exec = _run
CMDLET.register()

721
cmdlet/screen_shot.py Normal file
View File

@@ -0,0 +1,721 @@
"""Screen-shot cmdlet for capturing screenshots of url in a pipeline.
This cmdlet processes files through the pipeline and creates screenshots using
Playwright, marking them as temporary artifacts for cleanup.
"""
from __future__ import annotations
import contextlib
import hashlib
import sys
import time
import httpx
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
from urllib.parse import urlsplit, quote, urljoin
from SYS.logger import log, debug
from API.HTTP import HTTPClient
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
import pipeline as pipeline_context
# ============================================================================
# CMDLET Metadata Declaration
# ============================================================================
# ============================================================================
# Playwright & Screenshot Dependencies
# ============================================================================
try:
from playwright.sync_api import (
TimeoutError as PlaywrightTimeoutError,
sync_playwright,
)
HAS_PLAYWRIGHT = True
except Exception:
HAS_PLAYWRIGHT = False
PlaywrightTimeoutError = TimeoutError # type: ignore
def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore
raise RuntimeError(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
)
try:
from config import resolve_output_dir
except ImportError:
try:
_parent_dir = str(Path(__file__).parent.parent)
if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
from config import resolve_output_dir
except ImportError:
resolve_output_dir = None
# ============================================================================
# Screenshot Constants & Configuration
# ============================================================================
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200}
ARCHIVE_TIMEOUT = 30.0
# Configurable selectors for specific websites
SITE_SELECTORS: Dict[str, List[str]] = {
"twitter.com": [
"article[role='article']",
"div[data-testid='tweet']",
"div[data-testid='cellInnerDiv'] article",
],
"x.com": [
"article[role='article']",
"div[data-testid='tweet']",
"div[data-testid='cellInnerDiv'] article",
],
"instagram.com": [
"article[role='presentation']",
"article[role='article']",
"div[role='dialog'] article",
"section main article",
],
"reddit.com": [
"shreddit-post",
"div[data-testid='post-container']",
"div[data-click-id='background']",
"article",
],
"rumble.com": [
"rumble-player, iframe.rumble",
"div.video-item--main",
"main article",
],
}
class ScreenshotError(RuntimeError):
"""Raised when screenshot capture or upload fails."""
@dataclass(slots=True)
class ScreenshotOptions:
"""Options controlling screenshot capture and post-processing."""
output_dir: Path
url: str = ""
output_path: Optional[Path] = None
full_page: bool = True
headless: bool = True
wait_after_load: float = 2.0
wait_for_article: bool = False
replace_video_posters: bool = True
tag: Sequence[str] = ()
archive: bool = False
archive_timeout: float = ARCHIVE_TIMEOUT
output_format: Optional[str] = None
prefer_platform_target: bool = False
target_selectors: Optional[Sequence[str]] = None
selector_timeout_ms: int = 10_000
@dataclass(slots=True)
class ScreenshotResult:
"""Details about the captured screenshot."""
path: Path
tag_applied: List[str]
archive_url: List[str]
url: List[str]
warnings: List[str] = field(default_factory=list)
# ============================================================================
# Helper Functions
# ============================================================================
def _slugify_url(url: str) -> str:
"""Convert URL to filesystem-safe slug."""
parsed = urlsplit(url)
candidate = f"{parsed.netloc}{parsed.path}"
if parsed.query:
candidate += f"?{parsed.query}"
slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
slug = slug.strip("-") or "screenshot"
return slug[:100]
def _normalise_format(fmt: Optional[str]) -> str:
"""Normalize output format to valid values."""
if not fmt:
return "png"
value = fmt.strip().lower()
if value in {"jpg", "jpeg"}:
return "jpeg"
if value in {"png", "pdf"}:
return value
return "png"
def _format_suffix(fmt: str) -> str:
"""Get file suffix for format."""
if fmt == "jpeg":
return ".jpg"
return f".{fmt}"
def _selectors_for_url(url: str) -> List[str]:
"""Return a list of likely content selectors for known platforms."""
u = url.lower()
sels: List[str] = []
for domain, selectors in SITE_SELECTORS.items():
if domain in u:
sels.extend(selectors)
return sels or ["article"]
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
"""Best-effort page tweaks for popular platforms before capture."""
u = url.lower()
def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
clicks = 0
for _ in range(max(1, passes)):
for t in texts:
try:
page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
clicks += 1
except PlaywrightTimeoutError:
pass
except Exception:
pass
time.sleep(0.1)
return clicks
# Dismiss common cookie/consent prompts
_try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
# Platform-specific expansions
if "reddit.com" in u:
_try_click_texts(["see more", "read more", "show more", "more"])
if ("twitter.com" in u) or ("x.com" in u):
_try_click_texts(["show more", "more"])
if "instagram.com" in u:
_try_click_texts(["more", "see more"])
if "tiktok.com" in u:
_try_click_texts(["more", "see more"])
if ("facebook.com" in u) or ("fb.watch" in u):
_try_click_texts(["see more", "show more", "more"])
if "rumble.com" in u:
_try_click_texts(["accept", "agree", "close"])
def _submit_wayback(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Internet Archive Wayback Machine."""
encoded = quote(url, safe="/:?=&")
with HTTPClient() as client:
response = client.get(f"https://web.archive.org/save/{encoded}")
response.raise_for_status()
content_location = response.headers.get("Content-Location")
if content_location:
return urljoin("https://web.archive.org", content_location)
return str(response.url)
def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Archive.today."""
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
response = client.get(f"https://archive.today/submit/?url={encoded}")
response.raise_for_status()
final = str(response.url)
if final and ("archive.today" in final or "archive.ph" in final):
return final
return None
def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Archive.ph."""
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
response = client.get(f"https://archive.ph/submit/?url={encoded}")
response.raise_for_status()
final = str(response.url)
if final and "archive.ph" in final:
return final
return None
def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
"""Submit URL to all available archive services."""
archives: List[str] = []
warnings: List[str] = []
for submitter, label in (
(_submit_wayback, "wayback"),
(_submit_archive_today, "archive.today"),
(_submit_archive_ph, "archive.ph"),
):
try:
log(f"Archiving to {label}...", flush=True)
archived = submitter(url, timeout)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 429:
warnings.append(f"archive {label} rate limited (HTTP 429)")
log(f"{label}: Rate limited (HTTP 429)", flush=True)
else:
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
except httpx.RequestError as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: Connection error: {exc}", flush=True)
except Exception as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: {exc}", flush=True)
else:
if archived:
archives.append(archived)
log(f"{label}: Success - {archived}", flush=True)
else:
log(f"{label}: No archive link returned", flush=True)
return archives, warnings
def _prepare_output_path(options: ScreenshotOptions) -> Path:
"""Prepare and validate output path for screenshot."""
ensure_directory(options.output_dir)
explicit_format = _normalise_format(options.output_format) if options.output_format else None
inferred_format: Optional[str] = None
if options.output_path is not None:
path = options.output_path
if not path.is_absolute():
path = options.output_dir / path
suffix = path.suffix.lower()
if suffix:
inferred_format = _normalise_format(suffix[1:])
else:
stamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"{_slugify_url(options.url)}_{stamp}"
path = options.output_dir / filename
final_format = explicit_format or inferred_format or "png"
if not path.suffix:
path = path.with_suffix(_format_suffix(final_format))
else:
current_suffix = path.suffix.lower()
expected = _format_suffix(final_format)
if current_suffix != expected:
path = path.with_suffix(expected)
options.output_format = final_format
return unique_path(path)
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
"""Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
playwright = None
browser = None
context = None
try:
debug("Starting Playwright...", flush=True)
playwright = sync_playwright().start()
log("Launching Chromium browser...", flush=True)
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
if format_name == "pdf" and not options.headless:
warnings.append("pdf output requires headless Chromium; overriding headless mode")
browser = playwright.chromium.launch(
headless=headless,
args=["--disable-blink-features=AutomationControlled"],
)
log("Creating browser context...", flush=True)
context = browser.new_context(
user_agent=USER_AGENT,
viewport=DEFAULT_VIEWPORT,
ignore_https_errors=True,
)
page = context.new_page()
log(f"Navigating to {options.url}...", flush=True)
try:
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
log("Page loaded successfully", flush=True)
except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
log("Waiting for article element...", flush=True)
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
if (v.poster) {
const img = document.createElement('img');
img.src = v.poster;
img.style.maxWidth = '100%';
img.style.borderRadius = '12px';
v.replaceWith(img);
}
});
"""
)
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
try:
_platform_preprocess(options.url, page, warnings)
except Exception as e:
debug(f"[_capture] Platform preprocess failed: {e}")
pass
selectors = list(options.target_selectors or [])
if not selectors:
selectors = _selectors_for_url(options.url)
debug(f"[_capture] Trying selectors: {selectors}")
for sel in selectors:
try:
log(f"Trying selector: {sel}", flush=True)
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
continue
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
else:
log(f"Capturing full page to {destination}...", flush=True)
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
if options.full_page:
page.screenshot(full_page=True, **screenshot_kwargs)
else:
article = page.query_selector("article")
if article is not None:
article_kwargs = dict(screenshot_kwargs)
article_kwargs.pop("full_page", None)
article.screenshot(**article_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
except Exception as exc:
debug(f"[_capture] Exception: {exc}")
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
finally:
log("Cleaning up browser resources...", flush=True)
with contextlib.suppress(Exception):
if context is not None:
context.close()
with contextlib.suppress(Exception):
if browser is not None:
browser.close()
with contextlib.suppress(Exception):
if playwright is not None:
playwright.stop()
log("Cleanup complete", flush=True)
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
"""Capture a screenshot for the given options."""
debug(f"[_capture_screenshot] Preparing capture for {options.url}")
destination = _prepare_output_path(options)
warnings: List[str] = []
_capture(options, destination, warnings)
# Build URL list from captured url and any archives
url: List[str] = [options.url] if options.url else []
archive_url: List[str] = []
if options.archive and options.url:
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
archive_url.extend(archives)
warnings.extend(archive_warnings)
if archives:
url = unique_preserve_order([*url, *archives])
applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))
return ScreenshotResult(
path=destination,
tag_applied=applied_tag,
archive_url=archive_url,
url=url,
warnings=warnings,
)
# ============================================================================
# Main Cmdlet Function
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of url in the pipeline.
Accepts:
- Single result object (dict or PipeObject) with 'path' field
- List of result objects to screenshot each
- Direct URL as string
Emits PipeObject-formatted results for each screenshot with:
- action: 'cmdlet:screen-shot'
- is_temp: True (screenshots are temporary artifacts)
- parent_id: hash of the original file/URL
Screenshots are created using Playwright and marked as temporary
so they can be cleaned up later with the cleanup cmdlet.
"""
from ._shared import parse_cmdlet_args
debug(f"[_run] screen-shot invoked with args: {args}")
# Help check
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
if not HAS_PLAYWRIGHT:
log(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install",
file=sys.stderr,
)
return 1
# ========================================================================
# ARGUMENT PARSING
# ========================================================================
parsed = parse_cmdlet_args(args, CMDLET)
format_value = parsed.get("format")
storage_value = parsed.get("storage")
selector_arg = parsed.get("selector")
selectors = [selector_arg] if selector_arg else []
archive_enabled = parsed.get("archive", False)
# Positional URL argument (if provided)
url_arg = parsed.get("url")
positional_url = [str(url_arg)] if url_arg else []
# ========================================================================
# INPUT PROCESSING - Extract url from pipeline or command arguments
# ========================================================================
piped_results = normalize_result_input(result)
url_to_process = []
# Extract url from piped results
if piped_results:
for item in piped_results:
url = (
get_field(item, 'path')
or get_field(item, 'url')
or get_field(item, 'target')
)
if url:
url_to_process.append(str(url))
# Use positional arguments if no pipeline input
if not url_to_process and positional_url:
url_to_process = positional_url
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
debug(f"[_run] url to process: {url_to_process}")
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
# ========================================================================
screenshot_dir: Optional[Path] = None
# Primary: Use --storage if provided (highest priority)
if storage_value:
try:
screenshot_dir = SharedArgs.resolve_storage(storage_value)
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
# Secondary: Use config-based resolver ONLY if --storage not provided
if screenshot_dir is None and resolve_output_dir is not None:
try:
screenshot_dir = resolve_output_dir(config)
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
except Exception:
pass
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
if screenshot_dir is None and config and config.get("outfile"):
try:
screenshot_dir = Path(config["outfile"]).expanduser()
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
ensure_directory(screenshot_dir)
# ========================================================================
# PREPARE SCREENSHOT OPTIONS
# ========================================================================
format_name = _normalise_format(format_value)
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
target_selectors = filtered_selectors if filtered_selectors else None
all_emitted = []
exit_code = 0
# ========================================================================
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
for url in url_to_process:
# Validate URL format
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
continue
try:
# Create screenshot with provided options
options = ScreenshotOptions(
url=url,
output_dir=screenshot_dir,
output_format=format_name,
archive=archive_enabled,
target_selectors=target_selectors,
prefer_platform_target=False,
wait_for_article=False,
full_page=True,
)
screenshot_result = _capture_screenshot(options)
# Log results and warnings
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
if screenshot_result.archive_url:
log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True)
for warning in screenshot_result.warnings:
log(f"Warning: {warning}", flush=True)
# Compute hash of screenshot file
screenshot_hash = None
try:
with open(screenshot_result.path, 'rb') as f:
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
except Exception:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
pipe_obj = create_pipe_object_result(
source='screenshot',
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
title=f"Screenshot: {Path(screenshot_result.path).name}",
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,
'url': screenshot_result.url,
'target': str(screenshot_result.path), # Explicit target for add-file
}
)
# Emit the result so downstream cmdlet (like add-file) can use it
pipeline_context.emit(pipe_obj)
all_emitted.append(pipe_obj)
except ScreenshotError as exc:
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
exit_code = 1
except Exception as exc:
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
exit_code = 1
if not all_emitted:
log(f"No screenshots were successfully captured", file=sys.stderr)
return 1
# Log completion message
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
return exit_code
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
],
detail=
["""
"""]
)
CMDLET.exec = _run
CMDLET.register()

169
cmdlet/search_provider.py Normal file
View File

@@ -0,0 +1,169 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
import sys
import json
import uuid
import importlib
from SYS.logger import log, debug
from ProviderCore.registry import get_search_provider, list_search_providers
from ._shared import Cmdlet, CmdletArg, should_show_help
import pipeline as ctx
# Optional dependencies
try:
from config import get_local_storage_path
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube)",
usage="search-provider <provider> <query> [-limit N]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
],
detail=[
"Search external content providers:",
"- bandcamp: Search for music albums/tracks",
" Example: search-provider bandcamp \"artist:altrusian grace\"",
"- libgen: Search Library Genesis for books",
" Example: search-provider libgen \"python programming\"",
"- soulseek: Search P2P network for music",
" Example: search-provider soulseek \"pink floyd\"",
"- youtube: Search YouTube for videos",
" Example: search-provider youtube \"tutorial\"",
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
"- libgen: Supports isbn:, author:, title: prefixes",
"- soulseek: Plain text search",
"- youtube: Plain text search",
"",
"Results can be piped to other cmdlet:",
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
],
exec=self.run
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
ctx.emit(self.__dict__)
return 0
# Parse arguments
if len(args) < 2:
log("Error: search-provider requires <provider> and <query> arguments", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
provider_name = args[0]
query = args[1]
# Parse optional limit
limit = 50
if len(args) >= 4 and args[2] in ("-limit", "--limit"):
try:
limit = int(args[3])
except ValueError:
log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr)
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
from API.folder import API_folder_store
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
with API_folder_store(library_root) as db:
try:
db.insert_worker(
worker_id,
"search-provider",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query} [{provider_name}]"
preserve_order = provider_name.lower() in ('youtube', 'openlibrary')
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
debug(f"[search-provider] Calling {provider_name}.search()")
results = provider.search(query, limit=limit)
debug(f"[search-provider] Got {len(results)} results")
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
# Emit results for pipeline
for search_result in results:
item_dict = search_result.to_dict() if hasattr(search_result, 'to_dict') else dict(search_result)
# Ensure table field is set (should be by provider, but just in case)
if 'table' not in item_dict:
item_dict['table'] = provider_name
table.add_result(search_result) # ResultTable handles SearchResult objects
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr)
return 0
except Exception as e:
log(f"Error searching {provider_name}: {e}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
# Register cmdlet instance
Search_Provider_Instance = Search_Provider()

345
cmdlet/search_store.py Normal file
View File

@@ -0,0 +1,345 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, field
from collections import OrderedDict
import re
import json
import sys
from SYS.logger import log, debug
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
import pipeline as ctx
# Optional dependencies
try:
import mutagen # type: ignore
except ImportError: # pragma: no cover
mutagen = None # type: ignore
try:
from config import get_hydrus_url, resolve_output_dir
except Exception: # pragma: no cover
get_hydrus_url = None # type: ignore
resolve_output_dir = None # type: ignore
try:
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
except ImportError: # pragma: no cover
HydrusNetwork = None # type: ignore
HydrusRequestError = RuntimeError # type: ignore
try:
from SYS.utils import sha256_file
except ImportError: # pragma: no cover
sha256_file = None # type: ignore
try:
from SYS.utils_constant import mime_maps
except ImportError: # pragma: no cover
mime_maps = {} # type: ignore
@dataclass(slots=True)
class SearchRecord:
path: str
size_bytes: int | None = None
duration_seconds: str | None = None
tag: str | None = None
hash: str | None = None
def as_dict(self) -> dict[str, str]:
payload: dict[str, str] = {"path": self.path}
if self.size_bytes is not None:
payload["size"] = str(self.size_bytes)
if self.duration_seconds:
payload["duration"] = self.duration_seconds
if self.tag:
payload["tag"] = self.tag
if self.hash:
payload["hash"] = self.hash
return payload
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
class Search_Store(Cmdlet):
"""Class-based search-store cmdlet for searching storage backends."""
def __init__(self) -> None:
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
"Filter results by: tag, size, type, duration",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
"search-store -store home '*' # Search 'home' Hydrus instance",
"search-store -store test 'video' # Search 'test' folder store",
"search-store song -type audio # Search for audio files",
"search-store movie -tag action # Search with tag filter",
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
ext = str(ext_value or "").strip().lstrip(".")
for sep in (" ", "|", "(", "[", "{", ",", ";"):
if sep in ext:
ext = ext.split(sep, 1)[0]
break
if "." in ext:
ext = ext.split(".")[-1]
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
store_value = str(payload.get("store") or "").lower()
if store_value not in STORAGE_ORIGINS:
return payload
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search storage backends for files."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
args_list = [str(arg) for arg in (args or [])]
# Parse arguments
query = ""
tag_filters: List[str] = []
size_filter: Optional[Tuple[str, int]] = None
duration_filter: Optional[Tuple[str, float]] = None
type_filter: Optional[str] = None
storage_backend: Optional[str] = None
limit = 100
searched_backends: List[str] = []
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-store", "--store"} and i + 1 < len(args_list):
storage_backend = args_list[i + 1]
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower()
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
else:
i += 1
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
if match:
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
if store_filter and not storage_backend:
storage_backend = store_filter
if not query:
log("Provide a search query", file=sys.stderr)
return 1
from API.folder import API_folder_store
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
with API_folder_store(library_root) as db:
try:
db.insert_worker(
worker_id,
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query}"
if storage_backend:
table_title += f" [{storage_backend}]"
table = ResultTable(table_title)
from Store import Store
storage = Store(config=config or {})
from Store._base import Store as BaseStore
backend_to_search = storage_backend or None
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
if type(target_backend).search is BaseStore.search:
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
results = target_backend.search(query, limit=limit)
else:
from API.HydrusNetwork import is_hydrus_available
hydrus_available = is_hydrus_available(config or {})
from Store.HydrusNetwork import HydrusNetwork
all_results = []
for backend_name in storage.list_searchable_backends():
try:
backend = storage[backend_name]
if isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
searched_backends.append(backend_name)
backend_results = backend.search(query, limit=limit - len(all_results))
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
def _format_storage_label(name: str) -> str:
clean = str(name or "").strip()
if not clean:
return "Unknown"
return clean.replace("_", " ").title()
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
for item in results or []:
store = get_field(item, "store")
if not store:
continue
key = str(store).lower()
if key not in storage_counts:
storage_counts[key] = 0
storage_counts[key] += 1
if storage_counts or query:
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
summary_line = table.set_storage_summary(display_counts, query, inline=True)
if summary_line:
table.title = summary_line
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
return obj.to_dict() # type: ignore[arg-type]
return {"title": str(obj)}
item_dict = _as_dict(item)
if store_filter:
store_val = str(item_dict.get("store") or "").lower()
if store_filter != store_val:
continue
normalized = self._ensure_storage_columns(item_dict)
# Make hash/store available for downstream cmdlet without rerunning search
hash_val = normalized.get("hash")
store_val = normalized.get("store") or item_dict.get("store")
if hash_val and not normalized.get("hash"):
normalized["hash"] = hash_val
if store_val and not normalized.get("store"):
normalized["store"] = store_val
table.add_result(normalized)
results_list.append(normalized)
ctx.emit(normalized)
# Debug: Verify table rows match items list
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
if len(table.rows) != len(results_list):
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
CMDLET = Search_Store()

297
cmdlet/trim_file.py Normal file
View File

@@ -0,0 +1,297 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
from pathlib import Path
import sys
import json
import subprocess
import shutil
import re
from SYS.logger import log, debug
from SYS.utils import sha256_file
from ._shared import (
Cmdlet,
CmdletArg,
parse_cmdlet_args,
normalize_result_input,
extract_tag_from_result,
extract_title_from_result
)
import pipeline as ctx
CMDLET = Cmdlet(
name="trim-file",
summary="Trim a media file using ffmpeg.",
usage="trim-file [-path <path>] -range <start-end> [-delete]",
arg=[
CmdletArg("-path", description="Path to the file (optional if piped)."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55' or '00:03:45-00:03:55')."),
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
],
detail=[
"Creates a new file with 'clip_' prefix in the filename/title.",
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
]
)
def _parse_time(time_str: str) -> float:
"""Convert time string (HH:MM:SS or MM:SS or SS) to seconds."""
parts = time_str.strip().split(':')
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
elif len(parts) == 2:
return float(parts[0]) * 60 + float(parts[1])
elif len(parts) == 1:
return float(parts[0])
else:
raise ValueError(f"Invalid time format: {time_str}")
def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time: str) -> bool:
"""Trim media file using ffmpeg."""
ffmpeg_path = shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
# Calculate duration to avoid seeking issues if possible, or just use -to
# Using -ss before -i is faster (input seeking) but might be less accurate.
# Using -ss after -i is slower (output seeking) but accurate.
# For trimming, accuracy is usually preferred, but for long files input seeking is better.
# We'll use input seeking (-ss before -i) and -to.
cmd = [
ffmpeg_path, '-y',
'-ss', start_time,
'-i', str(input_path),
'-to', end_time,
'-c', 'copy', # Stream copy for speed and quality preservation
'-map_metadata', '0', # Copy metadata
str(output_path)
]
# If stream copy fails (e.g. cutting not on keyframe), we might need re-encoding.
# But let's try copy first as it's standard for "trimming" without quality loss.
# Note: -to with input seeking (-ss before -i) resets timestamp, so -to refers to duration?
# No, -to refers to position in output if used after -ss?
# Actually, if -ss is before -i, the timestamps are reset to 0.
# So -to should be (end - start).
# Alternatively, use -t (duration).
try:
s = _parse_time(start_time)
e = _parse_time(end_time)
duration = e - s
if duration <= 0:
log(f"Invalid range: start {start_time} >= end {end_time}", file=sys.stderr)
return False
cmd = [
ffmpeg_path, '-y',
'-ss', start_time,
'-i', str(input_path),
'-t', str(duration),
'-c', 'copy',
'-map_metadata', '0',
str(output_path)
]
debug(f"Running ffmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
range_arg = parsed.get("range")
if not range_arg or '-' not in range_arg:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = range_arg.split('-', 1)
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
# Collect inputs
inputs = normalize_result_input(result)
# If path arg provided, add it to inputs
if path_arg:
inputs.append({"path": path_arg})
if not inputs:
log("No input files provided.", file=sys.stderr)
return 1
success_count = 0
for item in inputs:
# Resolve file path
file_path = None
if isinstance(item, dict):
file_path = item.get("path") or item.get("target")
elif hasattr(item, "path"):
file_path = item.path
elif isinstance(item, str):
file_path = item
if not file_path:
continue
path_obj = Path(file_path)
if not path_obj.exists():
log(f"File not found: {file_path}", file=sys.stderr)
continue
# Determine output path
# Prepend clip_ to filename
new_filename = f"clip_{path_obj.name}"
output_path = path_obj.parent / new_filename
# Trim
log(f"Trimming {path_obj.name} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(path_obj, output_path, start_str, end_str):
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
# Prepare result for pipeline
# 1. Get source hash for relationship
source_hash = None
if isinstance(item, dict):
source_hash = item.get("hash")
elif hasattr(item, "hash"):
source_hash = item.hash
if not source_hash:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
# 2. Get tag values
tags = extract_tag_from_result(item)
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
title = path_obj.stem
new_title = f"clip_{title}"
# Update title tag if present
new_tags = []
has_title_tag = False
for t in tags:
if t.lower().startswith("title:"):
new_tags.append(f"title:{new_title}")
has_title_tag = True
else:
new_tags.append(t)
if not has_title_tag:
new_tags.append(f"title:{new_title}")
# 4. Calculate clip hash and update original file's relationships
clip_hash = None
try:
clip_hash = sha256_file(output_path)
except Exception:
pass
if source_hash and clip_hash:
# Update original file in local DB if possible
try:
from config import get_local_storage_path
from API.folder import API_folder_store
storage_path = get_local_storage_path(config)
if storage_path:
with API_folder_store(storage_path) as db:
# Get original file metadata
# We need to find the original file by hash or path
# Try path first
orig_meta = db.get_metadata(path_obj)
if not orig_meta and source_hash:
# Try by hash
orig_path_resolved = db.search_hash(source_hash)
if orig_path_resolved:
orig_meta = db.get_metadata(orig_path_resolved)
if orig_meta:
# Update relationships
rels = orig_meta.get("relationships", {})
if not isinstance(rels, dict):
rels = {}
# Add clip as "derivative" (since original is the source)
if "derivative" not in rels:
rels["derivative"] = []
if clip_hash not in rels["derivative"]:
rels["derivative"].append(clip_hash)
# Save back to DB
# We need to preserve other metadata
orig_meta["relationships"] = rels
# Ensure hash is set in metadata if we have it
if source_hash and not orig_meta.get("hash"):
orig_meta["hash"] = source_hash
# We need the path to save
save_path = Path(orig_meta.get("path") or path_obj)
db.save_metadata(save_path, orig_meta)
log(f"Updated relationship for original file: {save_path.name}", file=sys.stderr)
except Exception as e:
log(f"Failed to update original file relationships: {e}", file=sys.stderr)
# 5. Construct result
result_dict = {
"path": str(output_path),
"title": new_title,
"tag": new_tags,
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"relationships": {
# The source is the KING of this clip
"king": [source_hash] if source_hash else []
}
}
# Emit result
ctx.emit(result_dict)
# Delete original if requested
if delete_original:
try:
path_obj.unlink()
log(f"Deleted original file: {path_obj}", file=sys.stderr)
# Also try to delete sidecars?
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
log(f"Failed to trim {path_obj.name}", file=sys.stderr)
return 0 if success_count > 0 else 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()