AST
This commit is contained in:
139
cmdlets/__init__.py
Normal file
139
cmdlets/__init__.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, Iterable, Sequence
|
||||
from importlib import import_module as _import_module
|
||||
|
||||
# A cmdlet is a callable taking (result, args, config) -> int
|
||||
Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
|
||||
|
||||
# Registry of command-name -> cmdlet function
|
||||
REGISTRY: Dict[str, Cmdlet] = {}
|
||||
|
||||
|
||||
def register(names: Iterable[str]):
|
||||
"""Decorator to register a function under one or more command names.
|
||||
|
||||
Usage:
|
||||
@register(["add-tag", "add-tags"])
|
||||
def _run(result, args, config) -> int: ...
|
||||
"""
|
||||
def _wrap(fn: Cmdlet) -> Cmdlet:
|
||||
for name in names:
|
||||
REGISTRY[name.replace('_', '-').lower()] = fn
|
||||
return fn
|
||||
return _wrap
|
||||
|
||||
|
||||
class AutoRegister:
|
||||
"""Decorator that automatically registers a cmdlet function using CMDLET.aliases.
|
||||
|
||||
Usage:
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-file",
|
||||
aliases=["del", "del-file"],
|
||||
...
|
||||
)
|
||||
|
||||
@AutoRegister(CMDLET)
|
||||
def _run(result, args, config) -> int:
|
||||
...
|
||||
|
||||
Registers the cmdlet under:
|
||||
- Its main name from CMDLET.name
|
||||
- All aliases from CMDLET.aliases
|
||||
|
||||
This allows the help display to show: "cmd: delete-file | alias: del, del-file"
|
||||
"""
|
||||
def __init__(self, cmdlet):
|
||||
self.cmdlet = cmdlet
|
||||
|
||||
def __call__(self, fn: Cmdlet) -> Cmdlet:
|
||||
"""Register fn for the main name and all aliases in cmdlet."""
|
||||
normalized_name = None
|
||||
|
||||
# Register for main name first
|
||||
if hasattr(self.cmdlet, 'name') and self.cmdlet.name:
|
||||
normalized_name = self.cmdlet.name.replace('_', '-').lower()
|
||||
REGISTRY[normalized_name] = fn
|
||||
|
||||
# Register for all aliases
|
||||
if hasattr(self.cmdlet, 'aliases') and self.cmdlet.aliases:
|
||||
for alias in self.cmdlet.aliases:
|
||||
normalized_alias = alias.replace('_', '-').lower()
|
||||
# Always register (aliases are separate from main name)
|
||||
REGISTRY[normalized_alias] = fn
|
||||
|
||||
return fn
|
||||
|
||||
|
||||
def get(cmd_name: str) -> Cmdlet | None:
|
||||
return REGISTRY.get(cmd_name.replace('_', '-').lower())
|
||||
|
||||
|
||||
def format_cmd_help(cmdlet) -> str:
|
||||
"""Format a cmdlet for help display showing cmd:name and aliases.
|
||||
|
||||
Example output: "delete-file | aliases: del, del-file"
|
||||
"""
|
||||
if not hasattr(cmdlet, 'name'):
|
||||
return str(cmdlet)
|
||||
|
||||
cmd_str = f"cmd: {cmdlet.name}"
|
||||
|
||||
if hasattr(cmdlet, 'aliases') and cmdlet.aliases:
|
||||
aliases_str = ", ".join(cmdlet.aliases)
|
||||
cmd_str += f" | aliases: {aliases_str}"
|
||||
|
||||
return cmd_str
|
||||
|
||||
|
||||
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
|
||||
import os
|
||||
cmdlet_dir = os.path.dirname(__file__)
|
||||
for filename in os.listdir(cmdlet_dir):
|
||||
if (
|
||||
filename.endswith(".py")
|
||||
and not filename.startswith("_")
|
||||
and filename != "__init__.py"
|
||||
):
|
||||
mod_name = filename[:-3]
|
||||
try:
|
||||
module = _import_module(f".{mod_name}", __name__)
|
||||
|
||||
# Auto-register based on CMDLET object with exec function
|
||||
# This allows cmdlets to be fully self-contained in the CMDLET object
|
||||
if hasattr(module, 'CMDLET'):
|
||||
cmdlet_obj = module.CMDLET
|
||||
|
||||
# Get the execution function from the CMDLET object
|
||||
run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
|
||||
|
||||
if callable(run_fn):
|
||||
# Register main name
|
||||
if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
|
||||
normalized_name = cmdlet_obj.name.replace('_', '-').lower()
|
||||
REGISTRY[normalized_name] = run_fn
|
||||
|
||||
# Register all aliases
|
||||
if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
|
||||
for alias in cmdlet_obj.aliases:
|
||||
normalized_alias = alias.replace('_', '-').lower()
|
||||
REGISTRY[normalized_alias] = run_fn
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Import root-level modules that also register cmdlets
|
||||
# Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py
|
||||
# Use search-file -provider libgen, -provider soulseek, or -provider debrid instead
|
||||
for _root_mod in ("select_cmdlet",):
|
||||
try:
|
||||
_import_module(_root_mod)
|
||||
except Exception:
|
||||
# Allow missing optional modules
|
||||
continue
|
||||
|
||||
# Also import helper modules that register cmdlets
|
||||
try:
|
||||
import helper.alldebrid as _alldebrid
|
||||
except Exception:
|
||||
pass
|
||||
1229
cmdlets/_shared.py
Normal file
1229
cmdlets/_shared.py
Normal file
File diff suppressed because it is too large
Load Diff
910
cmdlets/add_file.py
Normal file
910
cmdlets/add_file.py
Normal file
@@ -0,0 +1,910 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
|
||||
from collections.abc import Iterable as IterableABC
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from helper.logger import log, debug
|
||||
from helper.file_storage import FileStorage
|
||||
from ._shared import (
|
||||
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
|
||||
extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
|
||||
merge_sequences, extract_relationships, extract_duration
|
||||
)
|
||||
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
|
||||
from helper.utils import sha256_file
|
||||
from metadata import embed_metadata_in_file
|
||||
|
||||
# Use official Hydrus supported filetypes from hydrus_wrapper
|
||||
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
|
||||
|
||||
# Initialize file storage system
|
||||
storage = FileStorage()
|
||||
|
||||
|
||||
def _guess_media_kind_from_suffix(media_path: Path) -> str:
|
||||
suffix = media_path.suffix.lower()
|
||||
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
|
||||
return 'audio'
|
||||
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||
return 'video'
|
||||
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
|
||||
return 'image'
|
||||
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
|
||||
return 'document'
|
||||
return 'other'
|
||||
|
||||
|
||||
def _resolve_media_kind(result: Any, media_path: Path) -> str:
|
||||
if isinstance(result, models.PipeObject):
|
||||
if getattr(result, 'media_kind', None):
|
||||
return str(result.media_kind)
|
||||
elif isinstance(result, dict):
|
||||
media_kind = result.get('media_kind')
|
||||
if media_kind:
|
||||
return str(media_kind)
|
||||
metadata = result.get('metadata')
|
||||
if isinstance(metadata, dict) and metadata.get('media_kind'):
|
||||
return str(metadata['media_kind'])
|
||||
return _guess_media_kind_from_suffix(media_path)
|
||||
|
||||
|
||||
def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
|
||||
# For local origin, try to read from local database first
|
||||
if origin and origin.lower() == "local" and config:
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from config import get_local_storage_path
|
||||
|
||||
try:
|
||||
db_root = get_local_storage_path(config)
|
||||
except Exception:
|
||||
db_root = None
|
||||
|
||||
if db_root:
|
||||
try:
|
||||
db = LocalLibraryDB(Path(db_root))
|
||||
try:
|
||||
# Get tags and metadata from database
|
||||
tags = db.get_tags(media_path) or []
|
||||
metadata = db.get_metadata(media_path) or {}
|
||||
known_urls = metadata.get("known_urls") or []
|
||||
file_hash = metadata.get("hash")
|
||||
|
||||
if tags or known_urls or file_hash:
|
||||
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
|
||||
return None, file_hash, tags, known_urls
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fall back to sidecar file lookup
|
||||
try:
|
||||
sidecar_path = find_sidecar(media_path)
|
||||
except Exception:
|
||||
sidecar_path = None
|
||||
if not sidecar_path or not sidecar_path.exists():
|
||||
return None, None, [], []
|
||||
try:
|
||||
hash_value, tags, known_urls = read_sidecar(sidecar_path)
|
||||
return sidecar_path, hash_value, tags or [], known_urls or []
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
|
||||
return sidecar_path, None, [], []
|
||||
|
||||
|
||||
def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
|
||||
candidate = None
|
||||
if isinstance(result, models.PipeObject):
|
||||
candidate = result.file_hash
|
||||
elif isinstance(result, dict):
|
||||
candidate = result.get('file_hash') or result.get('hash')
|
||||
candidate = candidate or fallback_hash
|
||||
if candidate:
|
||||
return str(candidate)
|
||||
try:
|
||||
return sha256_file(file_path)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
|
||||
targets = [
|
||||
media_path.parent / (media_path.name + '.metadata'),
|
||||
media_path.parent / (media_path.name + '.notes'),
|
||||
media_path.parent / (media_path.name + '.tags'),
|
||||
media_path.parent / (media_path.name + '.tags.txt'),
|
||||
]
|
||||
targets.extend(extra_paths)
|
||||
for target in targets:
|
||||
if not target:
|
||||
continue
|
||||
try:
|
||||
path_obj = Path(target)
|
||||
if path_obj.exists():
|
||||
path_obj.unlink()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
def _persist_local_metadata(
|
||||
library_root: Path,
|
||||
dest_path: Path,
|
||||
tags: list[str],
|
||||
known_urls: list[str],
|
||||
file_hash: Optional[str],
|
||||
relationships: Optional[Dict[str, Any]],
|
||||
duration: Optional[float],
|
||||
media_kind: str,
|
||||
) -> None:
|
||||
payload = {
|
||||
'hash': file_hash,
|
||||
'known_urls': known_urls,
|
||||
'relationships': relationships or [],
|
||||
'duration': duration,
|
||||
'size': None,
|
||||
'ext': dest_path.suffix.lower(),
|
||||
'media_type': media_kind,
|
||||
'media_kind': media_kind,
|
||||
}
|
||||
try:
|
||||
payload['size'] = dest_path.stat().st_size
|
||||
except OSError:
|
||||
payload['size'] = None
|
||||
|
||||
try:
|
||||
debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
|
||||
db_path = Path(library_root) / ".downlow_library.db"
|
||||
debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
|
||||
debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
|
||||
debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
|
||||
|
||||
with LocalLibraryDB(library_root) as db:
|
||||
# Save metadata FIRST to ensure file entry is created in DB
|
||||
if any(payload.values()):
|
||||
debug(f"[_persist_local_metadata] Saving metadata payload first")
|
||||
try:
|
||||
db.save_metadata(dest_path, payload)
|
||||
debug(f"[_persist_local_metadata] ✅ Metadata saved")
|
||||
except Exception as meta_exc:
|
||||
log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
# Save tags to DB synchronously in same transaction
|
||||
# For local storage, DB is the primary source of truth
|
||||
if tags:
|
||||
try:
|
||||
debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
|
||||
db.save_tags(dest_path, tags)
|
||||
debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
|
||||
except Exception as tag_exc:
|
||||
log(f"[_persist_local_metadata] ⚠️ Failed to save tags to DB: {tag_exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
# NOTE: Sidecar files are intentionally NOT created for local storage
|
||||
# Local storage uses database as primary source, not sidecar files
|
||||
|
||||
debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
log(traceback.format_exc(), file=sys.stderr)
|
||||
|
||||
|
||||
def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
|
||||
"""Transfer a file to local storage and return (exit_code, destination_path).
|
||||
|
||||
Args:
|
||||
media_path: Path to source file
|
||||
destination_root: Destination directory
|
||||
result: Result object with metadata
|
||||
config: Configuration dictionary
|
||||
|
||||
Returns:
|
||||
Tuple of (exit_code, destination_path)
|
||||
- exit_code: 0 on success, 1 on failure
|
||||
- destination_path: Path to moved file on success, None on failure
|
||||
"""
|
||||
destination_root = destination_root.expanduser()
|
||||
try:
|
||||
destination_root.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as exc:
|
||||
log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
|
||||
return 1, None
|
||||
|
||||
|
||||
tags_from_result = extract_tags_from_result(result)
|
||||
urls_from_result = extract_known_urls_from_result(result)
|
||||
# Get origin from result if available
|
||||
result_origin = None
|
||||
if hasattr(result, "origin"):
|
||||
result_origin = result.origin
|
||||
elif isinstance(result, dict):
|
||||
result_origin = result.get("origin") or result.get("source")
|
||||
sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
|
||||
|
||||
# Normalize all title tags to use spaces instead of underscores BEFORE merging
|
||||
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
|
||||
def normalize_title_tag(tag: str) -> str:
|
||||
"""Normalize a title tag by replacing underscores with spaces."""
|
||||
if str(tag).strip().lower().startswith("title:"):
|
||||
parts = tag.split(":", 1)
|
||||
if len(parts) == 2:
|
||||
value = parts[1].replace("_", " ").strip()
|
||||
return f"title:{value}"
|
||||
return tag
|
||||
|
||||
tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
|
||||
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
|
||||
|
||||
# Merge tags carefully: if URL has title tag, don't include sidecar title tags
|
||||
# This prevents duplicate title: tags when URL provides a title
|
||||
has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
|
||||
if has_url_title:
|
||||
# URL has a title, filter out any sidecar title tags to avoid duplication
|
||||
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
|
||||
merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
|
||||
else:
|
||||
# No URL title, use all sidecar tags
|
||||
merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
|
||||
|
||||
merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
|
||||
relationships = extract_relationships(result)
|
||||
duration = extract_duration(result)
|
||||
|
||||
try:
|
||||
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
|
||||
return 1, None
|
||||
|
||||
dest_path = Path(dest_file)
|
||||
file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
|
||||
media_kind = _resolve_media_kind(result, dest_path)
|
||||
|
||||
# Ensure only ONE title tag that matches the actual filename
|
||||
# Remove all existing title tags and add one based on the saved filename
|
||||
merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
|
||||
filename_title = dest_path.stem.replace("_", " ").strip()
|
||||
if filename_title:
|
||||
merged_tags_no_titles.insert(0, f"title:{filename_title}")
|
||||
|
||||
_persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
|
||||
_cleanup_sidecar_files(media_path, sidecar_path)
|
||||
debug(f"✅ Moved to local library: {dest_path}")
|
||||
return 0, dest_path
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Upload/copy a file to specified location.
|
||||
|
||||
Returns 0 on success, non-zero on failure.
|
||||
"""
|
||||
import sys # For stderr output
|
||||
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
debug("Starting add-file cmdlet")
|
||||
|
||||
# Handle list of results (from piped commands that emit multiple items)
|
||||
if isinstance(result, list):
|
||||
debug(f"Processing {len(result)} piped files")
|
||||
success_count = 0
|
||||
for item in result:
|
||||
exit_code = _run(item, _args, config)
|
||||
if exit_code == 0:
|
||||
success_count += 1
|
||||
return 0 if success_count > 0 else 1
|
||||
|
||||
# Parse arguments using CMDLET spec
|
||||
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||
location: Optional[str] = None
|
||||
provider_name: Optional[str] = None
|
||||
delete_after_upload = False
|
||||
|
||||
# Check if -path argument was provided to use direct file path instead of piped result
|
||||
path_arg = parsed.get("path")
|
||||
if path_arg:
|
||||
# Create a pseudo-result object from the file path
|
||||
media_path = Path(str(path_arg).strip())
|
||||
if not media_path.exists():
|
||||
log(f"❌ File not found: {media_path}")
|
||||
return 1
|
||||
# Create result dict with the file path and origin 'wild' for direct path inputs
|
||||
result = {"target": str(media_path), "origin": "wild"}
|
||||
log(f"Using direct file path: {media_path}")
|
||||
|
||||
# Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
|
||||
location = parsed.get("storage")
|
||||
if location:
|
||||
location = str(location).lower().strip()
|
||||
|
||||
# Get file provider from parsed args
|
||||
provider_name = parsed.get("provider")
|
||||
if provider_name:
|
||||
provider_name = str(provider_name).lower().strip()
|
||||
|
||||
# Check for delete flag (presence in parsed dict means it was provided)
|
||||
delete_after_upload = "delete" in parsed
|
||||
|
||||
# Either storage or provider must be specified, but not both
|
||||
if location is None and provider_name is None:
|
||||
log("Either -storage or -provider must be specified")
|
||||
log(" -storage options: 'hydrus', 'local', or a directory path")
|
||||
log(" -provider options: '0x0'")
|
||||
return 1
|
||||
|
||||
if location is not None and provider_name is not None:
|
||||
log("❌ Cannot specify both -storage and -provider")
|
||||
return 1
|
||||
|
||||
# Validate location (storage backends)
|
||||
is_valid_location = False
|
||||
if location is not None:
|
||||
valid_locations = {'hydrus', 'local'}
|
||||
is_valid_location = location in valid_locations
|
||||
is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
|
||||
|
||||
if location is not None and not (is_valid_location or is_local_path):
|
||||
log(f"❌ Invalid location: {location}")
|
||||
log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
||||
return 1
|
||||
|
||||
# Extract tags/known URLs from pipeline objects if available
|
||||
pipe_object_tags = extract_tags_from_result(result)
|
||||
if pipe_object_tags:
|
||||
log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
|
||||
pipe_known_urls = extract_known_urls_from_result(result)
|
||||
|
||||
# Resolve media path: get from piped result
|
||||
# Support both object attributes (getattr) and dict keys (get)
|
||||
target = None
|
||||
origin = None
|
||||
|
||||
# Try object attributes first
|
||||
if hasattr(result, "target"):
|
||||
target = result.target
|
||||
elif hasattr(result, "path"):
|
||||
target = result.path
|
||||
elif hasattr(result, "file_path"):
|
||||
target = result.file_path
|
||||
# Try dict keys if object attributes failed
|
||||
elif isinstance(result, dict):
|
||||
target = (result.get("target") or result.get("path") or result.get("file_path") or
|
||||
result.get("__file_path") or result.get("__path") or result.get("__target"))
|
||||
|
||||
# Get origin to detect Hydrus files
|
||||
if hasattr(result, "origin"):
|
||||
origin = result.origin
|
||||
elif hasattr(result, "source"):
|
||||
origin = result.source
|
||||
elif isinstance(result, dict):
|
||||
origin = result.get("origin") or result.get("source") or result.get("__source")
|
||||
|
||||
# Convert target to string and preserve URLs (don't let Path() mangle them)
|
||||
target_str = str(target) if target else None
|
||||
|
||||
# Check if this is a playlist item that needs to be downloaded first
|
||||
is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
|
||||
if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
|
||||
# This is a playlist item URL - we need to download it first
|
||||
log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
|
||||
|
||||
# Extract item number if available
|
||||
item_num = None
|
||||
if "__action" in result and result["__action"].startswith("playlist-item:"):
|
||||
item_num = result["__action"].split(":")[1]
|
||||
elif "index" in result:
|
||||
item_num = result["index"]
|
||||
|
||||
# Call download-data to download this specific item
|
||||
# Pass the item number so it knows which track to download
|
||||
from cmdlets import download_data as dl_module
|
||||
|
||||
# Capture emissions from download-data to process them
|
||||
captured_results = []
|
||||
original_emit = ctx.emit
|
||||
|
||||
def capture_emit(obj):
|
||||
captured_results.append(obj)
|
||||
# Also emit to original so user sees progress/output if needed
|
||||
# But since add-file is usually terminal, we might not need to
|
||||
# original_emit(obj)
|
||||
|
||||
# Temporarily hook the pipeline emit function
|
||||
ctx.emit = capture_emit
|
||||
|
||||
try:
|
||||
if item_num:
|
||||
# Pass a marker dict to tell download-data which item to get
|
||||
download_result = dl_module._run(
|
||||
{
|
||||
"__playlist_url": str(target_str),
|
||||
"__playlist_item": int(item_num)
|
||||
},
|
||||
[],
|
||||
config
|
||||
)
|
||||
else:
|
||||
# Fallback: just download the URL (will show all items)
|
||||
download_result = dl_module._run(None, [str(target_str)], config)
|
||||
finally:
|
||||
# Restore original emit function
|
||||
ctx.emit = original_emit
|
||||
|
||||
if download_result != 0:
|
||||
log(f"❌ Failed to download playlist item", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
|
||||
|
||||
# Process the downloaded files recursively
|
||||
success_count = 0
|
||||
for res in captured_results:
|
||||
# Recursively call add-file with the downloaded result
|
||||
# This ensures tags and metadata from download-data are applied
|
||||
if _run(res, _args, config) == 0:
|
||||
success_count += 1
|
||||
|
||||
return 0 if success_count > 0 else 1
|
||||
# Determine media_path from result
|
||||
media_path: Optional[Path] = None
|
||||
is_hydrus_file = origin and origin.lower() == "hydrus"
|
||||
|
||||
if target_str:
|
||||
# Check if it's a URL or Hydrus hash
|
||||
if target_str.lower().startswith(("http://", "https://")):
|
||||
media_path = None # Will handle as Hydrus file below
|
||||
elif not is_hydrus_file:
|
||||
# Only treat as local path if not a Hydrus file
|
||||
media_path = Path(target_str)
|
||||
|
||||
if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
|
||||
# Check if this is a format object from download-data
|
||||
if isinstance(result, dict) and result.get('format_id') is not None:
|
||||
log("❌ Format object received, but add-file expects a downloaded file")
|
||||
log(f" Tip: Use @N to automatically select and download the format")
|
||||
log(f" Streamlined workflow:")
|
||||
log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
|
||||
log(f" (The @N automatically expands to download-data \"URL\" -item N)")
|
||||
return 1
|
||||
log("❌ File not found: provide a piped file result or local file path")
|
||||
return 1
|
||||
|
||||
# Check if this is a Hydrus file - fetch the actual file path from Hydrus
|
||||
if is_hydrus_file and target_str:
|
||||
log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
|
||||
try:
|
||||
from helper import hydrus
|
||||
|
||||
# Get the Hydrus client
|
||||
client = hydrus.get_client(config)
|
||||
if not client:
|
||||
log(f"❌ Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# target_str is the hash - need to get the actual file path from Hydrus
|
||||
file_hash = target_str
|
||||
|
||||
# Call the /get_files/file_path endpoint to get the actual file path
|
||||
response = client.get_file_path(file_hash)
|
||||
if not response or not isinstance(response, dict):
|
||||
log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_path_str = response.get("path")
|
||||
if not file_path_str:
|
||||
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
media_path = Path(file_path_str)
|
||||
if not media_path.exists():
|
||||
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Generic URL handler: if target is a URL and we haven't resolved a local path yet
|
||||
# This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
|
||||
if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
|
||||
log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
|
||||
from cmdlets import download_data as dl_module
|
||||
|
||||
dl_args = []
|
||||
if location:
|
||||
dl_args.extend(["-storage", location])
|
||||
|
||||
# Map provider 0x0 to storage 0x0 for download-data
|
||||
if provider_name == "0x0":
|
||||
dl_args.extend(["-storage", "0x0"])
|
||||
|
||||
return dl_module._run(result, dl_args, config)
|
||||
|
||||
if media_path is None:
|
||||
log("File path could not be resolved")
|
||||
return 1
|
||||
|
||||
if not media_path.exists() or not media_path.is_file():
|
||||
log(f"File not found: {media_path}")
|
||||
return 1
|
||||
|
||||
# Validate file type - only accept Hydrus-supported files
|
||||
file_extension = media_path.suffix.lower()
|
||||
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
|
||||
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
|
||||
log(f"Hydrus supports the following file types:", file=sys.stderr)
|
||||
# Display by category from hydrus_wrapper
|
||||
for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
|
||||
ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
|
||||
log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
|
||||
log(f"Skipping this file: {media_path.name}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Handle based on provider or storage
|
||||
if provider_name is not None:
|
||||
# Use file provider (e.g., 0x0.st)
|
||||
from helper.search_provider import get_file_provider
|
||||
|
||||
log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
|
||||
|
||||
try:
|
||||
file_provider = get_file_provider(provider_name, config)
|
||||
if file_provider is None:
|
||||
log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
hoster_url = file_provider.upload(media_path)
|
||||
log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
|
||||
|
||||
# Associate the URL with the file in Hydrus if possible
|
||||
current_hash = locals().get('file_hash')
|
||||
if not current_hash:
|
||||
current_hash = _resolve_file_hash(result, None, media_path)
|
||||
|
||||
if current_hash:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
if client:
|
||||
client.associate_url(current_hash, hoster_url)
|
||||
log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ {provider_name} upload failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if delete_after_upload:
|
||||
try:
|
||||
media_path.unlink()
|
||||
_cleanup_sidecar_files(media_path)
|
||||
log(f"✅ Deleted file and sidecar", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
# Handle storage-based operations (location is not None here)
|
||||
valid_locations = {'hydrus', 'local'}
|
||||
is_valid_location = location in valid_locations
|
||||
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
|
||||
|
||||
if not (is_valid_location or is_local_path):
|
||||
log(f"❌ Invalid location: {location}")
|
||||
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
||||
return 1
|
||||
|
||||
if location == 'local':
|
||||
try:
|
||||
from config import get_local_storage_path
|
||||
resolved_dir = get_local_storage_path(config)
|
||||
except Exception:
|
||||
resolved_dir = None
|
||||
|
||||
if not resolved_dir:
|
||||
resolved_dir = config.get("LocalDir") or config.get("OutputDir")
|
||||
|
||||
if not resolved_dir:
|
||||
log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
|
||||
exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
|
||||
|
||||
# After successful local transfer, emit result for pipeline continuation
|
||||
# This allows downstream commands like add-tags to chain automatically
|
||||
if exit_code == 0 and dest_path:
|
||||
# Extract tags from result for emission
|
||||
emit_tags = extract_tags_from_result(result)
|
||||
file_hash = _resolve_file_hash(result, None, dest_path)
|
||||
|
||||
# Extract title from original result, fallback to filename if not available
|
||||
result_title = extract_title_from_result(result) or dest_path.name
|
||||
|
||||
# Always emit result for local files, even if no tags
|
||||
# This allows @N selection and piping to downstream commands
|
||||
result_dict = create_pipe_object_result(
|
||||
source='local',
|
||||
identifier=str(dest_path),
|
||||
file_path=str(dest_path),
|
||||
cmdlet_name='add-file',
|
||||
title=result_title,
|
||||
file_hash=file_hash,
|
||||
tags=emit_tags if emit_tags else [],
|
||||
target=str(dest_path) # Explicit target for get-file
|
||||
)
|
||||
ctx.emit(result_dict)
|
||||
|
||||
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||
# Next stage will use these local file results, not format objects
|
||||
ctx.set_current_stage_table(None)
|
||||
|
||||
return exit_code
|
||||
|
||||
elif is_local_path:
|
||||
try:
|
||||
destination_root = Path(location)
|
||||
except Exception as exc:
|
||||
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Moving to local path: {destination_root}", file=sys.stderr)
|
||||
exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)
|
||||
|
||||
# After successful local transfer, emit result for pipeline continuation
|
||||
if exit_code == 0 and dest_path:
|
||||
# Extract tags from result for emission
|
||||
emit_tags = extract_tags_from_result(result)
|
||||
file_hash = _resolve_file_hash(result, None, dest_path)
|
||||
|
||||
# Extract title from original result, fallback to filename if not available
|
||||
result_title = extract_title_from_result(result) or dest_path.name
|
||||
|
||||
# Always emit result for local files, even if no tags
|
||||
# This allows @N selection and piping to downstream commands
|
||||
result_dict = create_pipe_object_result(
|
||||
source='local',
|
||||
identifier=str(dest_path),
|
||||
file_path=str(dest_path),
|
||||
cmdlet_name='add-file',
|
||||
title=result_title,
|
||||
file_hash=file_hash,
|
||||
tags=emit_tags if emit_tags else [],
|
||||
target=str(dest_path) # Explicit target for get-file
|
||||
)
|
||||
ctx.emit(result_dict)
|
||||
|
||||
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||
# Next stage will use these local file results, not format objects
|
||||
ctx.set_current_stage_table(None)
|
||||
|
||||
return exit_code
|
||||
|
||||
# location == 'hydrus'
|
||||
# Compute file hash to check if already in Hydrus
|
||||
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
|
||||
log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
|
||||
try:
|
||||
file_hash = sha256_file(media_path)
|
||||
except Exception as exc:
|
||||
log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
log(f"File hash: {file_hash}", file=sys.stderr)
|
||||
|
||||
# Read sidecar tags and known URLs first (for tagging)
|
||||
|
||||
sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
|
||||
if sidecar_path:
|
||||
log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
|
||||
log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
|
||||
if sidecar_tags:
|
||||
log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
|
||||
if sidecar_urls:
|
||||
log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
|
||||
else:
|
||||
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
|
||||
|
||||
# Normalize all title tags to use spaces instead of underscores BEFORE merging
|
||||
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
|
||||
def normalize_title_tag(tag: str) -> str:
|
||||
"""Normalize a title tag by replacing underscores with spaces."""
|
||||
if str(tag).strip().lower().startswith("title:"):
|
||||
parts = tag.split(":", 1)
|
||||
if len(parts) == 2:
|
||||
value = parts[1].replace("_", " ").strip()
|
||||
return f"title:{value}"
|
||||
return tag
|
||||
|
||||
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
|
||||
pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
|
||||
|
||||
# Merge tags from PipeObject with tags from sidecar
|
||||
# NOTE: Remove ALL existing title tags and use only filename-based title
|
||||
# The filename is the source of truth for the title
|
||||
tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
|
||||
if not str(t).strip().lower().startswith("title:")]
|
||||
|
||||
# Ensure ONE title tag based on the actual filename
|
||||
filename_title = media_path.stem.replace("_", " ").strip()
|
||||
if filename_title:
|
||||
tags = [f"title:{filename_title}"] + tags_without_titles
|
||||
else:
|
||||
tags = tags_without_titles
|
||||
|
||||
known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
|
||||
|
||||
if pipe_object_tags:
|
||||
log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
|
||||
|
||||
# Write metadata to file before uploading (only for local storage, not for Hydrus)
|
||||
# Hydrus stores tags separately, so we don't need to modify the file
|
||||
if location != 'hydrus':
|
||||
try:
|
||||
if tags:
|
||||
# Determine file kind from extension
|
||||
file_kind = ''
|
||||
sfx = media_path.suffix.lower()
|
||||
if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
||||
file_kind = 'audio'
|
||||
elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||
file_kind = 'video'
|
||||
|
||||
if embed_metadata_in_file(media_path, tags, file_kind):
|
||||
log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
|
||||
else:
|
||||
log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
|
||||
else:
|
||||
log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
|
||||
|
||||
# Use FileStorage backend to upload to Hydrus
|
||||
try:
|
||||
file_hash = storage["hydrus"].upload(
|
||||
media_path,
|
||||
config=config,
|
||||
tags=tags,
|
||||
)
|
||||
log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Associate known URLs in Hydrus metadata
|
||||
url_count = 0
|
||||
if known_urls:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
if client:
|
||||
for url in known_urls:
|
||||
u = str(url or "").strip()
|
||||
if not u:
|
||||
continue
|
||||
try:
|
||||
client.associate_url(file_hash, u)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
url_count += 1
|
||||
except Exception as exc:
|
||||
log(f"Failed to associate URLs: {exc}", file=sys.stderr)
|
||||
|
||||
if url_count:
|
||||
log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
|
||||
else:
|
||||
log(f"No URLs to associate", file=sys.stderr)
|
||||
|
||||
_cleanup_sidecar_files(media_path, sidecar_path)
|
||||
|
||||
# Update in-memory result for downstream pipes
|
||||
try:
|
||||
# Only update piped result objects; direct -path usage may have a dummy result
|
||||
setattr(result, "hash_hex", file_hash)
|
||||
# Preserve media_kind for downstream commands (e.g., open)
|
||||
if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
|
||||
# Try to infer media_kind from file extension or keep existing
|
||||
suffix = media_path.suffix.lower()
|
||||
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
|
||||
setattr(result, "media_kind", "document")
|
||||
if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
|
||||
cols = list(getattr(result, "columns"))
|
||||
if ("Hash", file_hash) not in cols:
|
||||
cols.append(("Hash", file_hash))
|
||||
setattr(result, "columns", cols)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If -delete flag is set, delete the file and .tags after successful upload
|
||||
if delete_after_upload:
|
||||
log(f"Deleting local files (as requested)...", file=sys.stderr)
|
||||
try:
|
||||
media_path.unlink()
|
||||
log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
|
||||
except OSError as exc:
|
||||
log(f"Failed to delete file: {exc}", file=sys.stderr)
|
||||
|
||||
# Delete .tags sidecar if it exists
|
||||
if sidecar_path is not None:
|
||||
try:
|
||||
sidecar_path.unlink()
|
||||
log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
|
||||
except OSError as exc:
|
||||
log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
|
||||
|
||||
log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
|
||||
|
||||
# Emit result for Hydrus uploads so downstream commands know about it
|
||||
if location == 'hydrus':
|
||||
# Extract title from original result, fallback to filename if not available
|
||||
result_title = extract_title_from_result(result) or media_path.name
|
||||
|
||||
result_dict = create_pipe_object_result(
|
||||
source='hydrus',
|
||||
identifier=file_hash,
|
||||
file_path=f"hydrus:{file_hash}",
|
||||
cmdlet_name='add-file',
|
||||
title=result_title,
|
||||
file_hash=file_hash,
|
||||
extra={
|
||||
'storage_source': 'hydrus',
|
||||
'hydrus_hash': file_hash,
|
||||
'tags': tags,
|
||||
'known_urls': known_urls,
|
||||
}
|
||||
)
|
||||
ctx.emit(result_dict)
|
||||
|
||||
# Clear the stage table so downstream @N doesn't try to re-run download-data
|
||||
# Next stage will use these Hydrus file results, not format objects
|
||||
ctx.set_current_stage_table(None)
|
||||
|
||||
return 0
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-file",
|
||||
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
|
||||
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
|
||||
args=[
|
||||
CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
|
||||
SharedArgs.STORAGE, # For hydrus, local, or directory paths
|
||||
CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
|
||||
CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
|
||||
],
|
||||
details=[
|
||||
"- Storage location options (use -storage):",
|
||||
" hydrus: Upload to Hydrus database with metadata tagging",
|
||||
" local: Copy file to local directory",
|
||||
" <path>: Copy file to specified directory",
|
||||
"- File provider options (use -provider):",
|
||||
" 0x0: Upload to 0x0.st for temporary hosting with public URL",
|
||||
"- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
|
||||
"- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
|
||||
"- When using file provider: uploads to service, adds URL to sidecar",
|
||||
"- When copying locally: copies file with original metadata preserved",
|
||||
"- Use -delete flag to automatically delete the file and .tags after successful operation.",
|
||||
],
|
||||
)
|
||||
84
cmdlets/add_note.py
Normal file
84
cmdlets/add_note.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-note",
|
||||
summary="Add or set a note on a Hydrus file.",
|
||||
usage="add-note [-hash <sha256>] <name> <text>",
|
||||
args=[
|
||||
CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
|
||||
CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
|
||||
],
|
||||
details=[
|
||||
"- Notes are stored in the 'my notes' service by default.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["add-note", "set-note", "add_note"]) # aliases
|
||||
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from ._shared import parse_cmdlet_args
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
override_hash = parsed.get("hash")
|
||||
name = parsed.get("name")
|
||||
text_parts = parsed.get("text")
|
||||
|
||||
if not name:
|
||||
log("Requires a note name")
|
||||
return 1
|
||||
|
||||
name = str(name).strip()
|
||||
|
||||
if isinstance(text_parts, list):
|
||||
text = " ".join(text_parts).strip()
|
||||
else:
|
||||
text = str(text_parts or "").strip()
|
||||
|
||||
if not text:
|
||||
log("Empty note text")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
service_name = "my notes"
|
||||
client.set_notes(hash_hex, {name: text}, service_name)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus add-note failed: {exc}")
|
||||
return 1
|
||||
ctx.emit(f"Added note '{name}' ({len(text)} chars)")
|
||||
return 0
|
||||
|
||||
264
cmdlets/add_relationship.py
Normal file
264
cmdlets/add_relationship.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""Add file relationships in Hydrus based on relationship tags in sidecar."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from helper.local_library import read_sidecar, find_sidecar
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-relationship",
|
||||
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
|
||||
usage="add-relationship OR add-relationship -path <file>",
|
||||
args=[
|
||||
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||
],
|
||||
details=[
|
||||
"- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')",
|
||||
"- Calls Hydrus API to associate the hashes as relationships",
|
||||
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
|
||||
"- Works with piped file results or -path argument for direct invocation",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
||||
"""Normalize a hash hex string to lowercase 64-char format."""
|
||||
if not value or not isinstance(value, str):
|
||||
return None
|
||||
normalized = value.strip().lower()
|
||||
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
|
||||
return normalized
|
||||
return None
|
||||
|
||||
|
||||
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
|
||||
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
|
||||
|
||||
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
|
||||
"""
|
||||
result: Dict[str, list[str]] = {}
|
||||
if not isinstance(tag_value, str):
|
||||
return result
|
||||
|
||||
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
|
||||
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
|
||||
matches = re.findall(pattern, tag_value)
|
||||
|
||||
for rel_type, hash_value in matches:
|
||||
normalized = _normalise_hash_hex(hash_value)
|
||||
if normalized:
|
||||
if rel_type not in result:
|
||||
result[rel_type] = []
|
||||
result[rel_type].append(normalized)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register(["add-relationship", "add-rel"]) # primary name and alias
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Associate file relationships in Hydrus.
|
||||
|
||||
Two modes of operation:
|
||||
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
|
||||
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
|
||||
|
||||
Returns 0 on success, non-zero on failure.
|
||||
"""
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Parse arguments using CMDLET spec
|
||||
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||
arg_path: Optional[Path] = None
|
||||
if parsed:
|
||||
# Get the first arg value (e.g., -path)
|
||||
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
|
||||
if first_arg_name and first_arg_name in parsed:
|
||||
arg_value = parsed[first_arg_name]
|
||||
try:
|
||||
arg_path = Path(str(arg_value)).expanduser()
|
||||
except Exception:
|
||||
arg_path = Path(str(arg_value))
|
||||
|
||||
# Get Hydrus client
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
# Check if we're in pipeline mode (have a hash) or file mode
|
||||
file_hash = getattr(result, "hash_hex", None)
|
||||
|
||||
# PIPELINE MODE: Track relationships across multiple items
|
||||
if file_hash:
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Load or initialize king hash from pipeline context
|
||||
try:
|
||||
king_hash = ctx.load_value("relationship_king")
|
||||
except Exception:
|
||||
king_hash = None
|
||||
|
||||
# If this is the first item, make it the king
|
||||
if not king_hash:
|
||||
try:
|
||||
ctx.store_value("relationship_king", file_hash)
|
||||
log(f"Established king hash: {file_hash}", file=sys.stderr)
|
||||
return 0 # First item just becomes the king, no relationships yet
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If we already have a king and this is a different hash, link them
|
||||
if king_hash and king_hash != file_hash:
|
||||
try:
|
||||
client.set_relationship(file_hash, king_hash, "alt")
|
||||
log(
|
||||
f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return 0
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
# FILE MODE: Read relationships from sidecar
|
||||
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
|
||||
|
||||
# Resolve media path from -path arg or result target
|
||||
target = getattr(result, "target", None) or getattr(result, "path", None)
|
||||
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
|
||||
if media_path is None:
|
||||
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Validate local file
|
||||
if str(media_path).lower().startswith(("http://", "https://")):
|
||||
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
|
||||
return 1
|
||||
if not media_path.exists() or not media_path.is_file():
|
||||
log(f"File not found: {media_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Build Hydrus client
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Read sidecar to find relationship tags
|
||||
sidecar_path = find_sidecar(media_path)
|
||||
if sidecar_path is None:
|
||||
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
_, tags, _ = read_sidecar(sidecar_path)
|
||||
except Exception as exc:
|
||||
log(f"Failed to read sidecar: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
|
||||
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
|
||||
|
||||
if not relationship_tags:
|
||||
log(f"No relationship tags found in sidecar", file=sys.stderr)
|
||||
return 0 # Not an error, just nothing to do
|
||||
|
||||
# Get the file hash from result (should have been set by add-file)
|
||||
file_hash = getattr(result, "hash_hex", None)
|
||||
if not file_hash:
|
||||
log("File hash not available (run add-file first)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Parse relationships from tags and apply them
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
|
||||
for rel_tag in relationship_tags:
|
||||
try:
|
||||
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
|
||||
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
|
||||
|
||||
# Parse relationships
|
||||
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
|
||||
|
||||
# Set the relationships in Hydrus
|
||||
for rel_type, related_hashes in rels.items():
|
||||
if not related_hashes:
|
||||
continue
|
||||
|
||||
for related_hash in related_hashes:
|
||||
# Don't set relationship between hash and itself
|
||||
if file_hash == related_hash:
|
||||
continue
|
||||
|
||||
try:
|
||||
client.set_relationship(file_hash, related_hash, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: "
|
||||
f"{file_hash} <-> {related_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
success_count += 1
|
||||
except Exception as exc:
|
||||
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
|
||||
error_count += 1
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
|
||||
error_count += 1
|
||||
|
||||
if success_count > 0:
|
||||
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
|
||||
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
|
||||
return 0
|
||||
elif error_count == 0:
|
||||
log(f"No relationships to set", file=sys.stderr)
|
||||
return 0 # Success with nothing to do
|
||||
else:
|
||||
log(f"Failed with {error_count} error(s)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
276
cmdlets/add_tags.py
Normal file
276
cmdlets/add_tags.py
Normal file
@@ -0,0 +1,276 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import normalize_result_input, filter_results_by_temp
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from helper.local_library import read_sidecar, write_sidecar, find_sidecar, has_sidecar, LocalLibraryDB
|
||||
from metadata import rename_by_metadata
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args
|
||||
from config import get_local_storage_path
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-tags",
|
||||
summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
|
||||
usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||
args=[
|
||||
CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
|
||||
CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True),
|
||||
],
|
||||
details=[
|
||||
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
|
||||
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
|
||||
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
|
||||
"- Multiple tags can be comma-separated or space-separated.",
|
||||
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
|
||||
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
|
||||
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
|
||||
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
|
||||
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||
"- The source namespace must already exist in the file being tagged.",
|
||||
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||
],
|
||||
)
|
||||
|
||||
@register(["add-tag", "add-tags"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add tags to a file with smart filtering for pipeline results."""
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
# Check for --all flag
|
||||
include_temp = parsed.get("all", False)
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
# Filter by temp status (unless --all is set)
|
||||
if not include_temp:
|
||||
results = filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
if not results:
|
||||
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get tags from arguments
|
||||
raw_tags = parsed.get("tags", [])
|
||||
if isinstance(raw_tags, str):
|
||||
raw_tags = [raw_tags]
|
||||
|
||||
# Handle -list argument (convert to {list} syntax)
|
||||
list_arg = parsed.get("list")
|
||||
if list_arg:
|
||||
for l in list_arg.split(','):
|
||||
l = l.strip()
|
||||
if l:
|
||||
raw_tags.append(f"{{{l}}}")
|
||||
|
||||
# Parse and expand tags
|
||||
tags_to_add = parse_tag_arguments(raw_tags)
|
||||
tags_to_add = expand_tag_groups(tags_to_add)
|
||||
|
||||
# Get other flags
|
||||
hash_override = normalize_hash(parsed.get("hash"))
|
||||
duplicate_arg = parsed.get("duplicate")
|
||||
|
||||
# If no tags provided (and no list), write sidecar files with embedded tags
|
||||
# Note: Since 'tags' is required=True in CMDLET, this block might be unreachable via CLI
|
||||
# unless called programmatically or if required check is bypassed.
|
||||
if not tags_to_add and not duplicate_arg:
|
||||
# Write sidecar files with the tags that are already in the result dicts
|
||||
sidecar_count = 0
|
||||
for res in results:
|
||||
# Handle both dict and PipeObject formats
|
||||
file_path = None
|
||||
tags = []
|
||||
file_hash = ""
|
||||
|
||||
if isinstance(res, models.PipeObject):
|
||||
file_path = res.file_path
|
||||
tags = res.extra.get('tags', [])
|
||||
file_hash = res.file_hash or ""
|
||||
elif isinstance(res, dict):
|
||||
file_path = res.get('file_path')
|
||||
tags = res.get('tags', []) # Check both tags and extra['tags']
|
||||
if not tags and 'extra' in res:
|
||||
tags = res['extra'].get('tags', [])
|
||||
file_hash = res.get('file_hash', "")
|
||||
|
||||
if not file_path:
|
||||
log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
if tags:
|
||||
# Write sidecar file for this file with its tags
|
||||
try:
|
||||
sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
|
||||
log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
|
||||
sidecar_count += 1
|
||||
except Exception as e:
|
||||
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
if sidecar_count > 0:
|
||||
log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
|
||||
else:
|
||||
log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
# Tags ARE provided - append them to each result and write sidecar files or add to Hydrus
|
||||
sidecar_count = 0
|
||||
for res in results:
|
||||
# Handle both dict and PipeObject formats
|
||||
file_path = None
|
||||
existing_tags = []
|
||||
file_hash = ""
|
||||
storage_source = None
|
||||
hydrus_hash = None
|
||||
|
||||
if isinstance(res, models.PipeObject):
|
||||
file_path = res.file_path
|
||||
existing_tags = res.extra.get('tags', [])
|
||||
file_hash = res.file_hash or ""
|
||||
storage_source = res.extra.get('storage_source') or res.extra.get('source')
|
||||
hydrus_hash = res.extra.get('hydrus_hash')
|
||||
elif isinstance(res, dict):
|
||||
file_path = res.get('file_path') or res.get('path')
|
||||
existing_tags = res.get('tags', [])
|
||||
if not existing_tags and 'extra' in res:
|
||||
existing_tags = res['extra'].get('tags', [])
|
||||
file_hash = res.get('file_hash', "")
|
||||
storage_source = res.get('storage_source') or res.get('source') or res.get('origin')
|
||||
if not storage_source and 'extra' in res:
|
||||
storage_source = res['extra'].get('storage_source') or res['extra'].get('source')
|
||||
# For Hydrus results from search-file, look for hash, hash_hex, or target (all contain the hash)
|
||||
hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex')
|
||||
if not hydrus_hash and 'extra' in res:
|
||||
hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex')
|
||||
else:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
# Apply hash override if provided
|
||||
if hash_override:
|
||||
hydrus_hash = hash_override
|
||||
# If we have a hash override, we treat it as a Hydrus target
|
||||
storage_source = "hydrus"
|
||||
|
||||
if not file_path and not hydrus_hash:
|
||||
log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
# Handle -duplicate logic (copy existing tags to new namespaces)
|
||||
if duplicate_arg:
|
||||
# Parse duplicate format: source:target1,target2 or source,target1,target2
|
||||
parts = duplicate_arg.split(':')
|
||||
source_ns = ""
|
||||
targets = []
|
||||
|
||||
if len(parts) > 1:
|
||||
# Explicit format: source:target1,target2
|
||||
source_ns = parts[0]
|
||||
targets = parts[1].split(',')
|
||||
else:
|
||||
# Inferred format: source,target1,target2
|
||||
parts = duplicate_arg.split(',')
|
||||
if len(parts) > 1:
|
||||
source_ns = parts[0]
|
||||
targets = parts[1:]
|
||||
|
||||
if source_ns and targets:
|
||||
# Find tags in source namespace
|
||||
source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
|
||||
for t in source_tags:
|
||||
value = t.split(':', 1)[1]
|
||||
for target_ns in targets:
|
||||
new_tag = f"{target_ns}:{value}"
|
||||
if new_tag not in existing_tags and new_tag not in tags_to_add:
|
||||
tags_to_add.append(new_tag)
|
||||
|
||||
# Merge new tags with existing tags, handling namespace overwrites
|
||||
# When adding a tag like "namespace:value", remove any existing "namespace:*" tags
|
||||
for new_tag in tags_to_add:
|
||||
# Check if this is a namespaced tag (format: "namespace:value")
|
||||
if ':' in new_tag:
|
||||
namespace = new_tag.split(':', 1)[0]
|
||||
# Remove any existing tags with the same namespace
|
||||
existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))]
|
||||
|
||||
# Add the new tag if not already present
|
||||
if new_tag not in existing_tags:
|
||||
existing_tags.append(new_tag)
|
||||
|
||||
# Update the result's tags
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.extra['tags'] = existing_tags
|
||||
elif isinstance(res, dict):
|
||||
res['tags'] = existing_tags
|
||||
|
||||
# Determine where to add tags: Hydrus, local DB, or sidecar
|
||||
if storage_source and storage_source.lower() == 'hydrus':
|
||||
# Add tags to Hydrus using the API
|
||||
target_hash = hydrus_hash or file_hash
|
||||
if target_hash:
|
||||
try:
|
||||
log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
|
||||
hydrus_client = hydrus_wrapper.get_client(config)
|
||||
hydrus_client.add_tags(target_hash, existing_tags, "my tags")
|
||||
log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr)
|
||||
sidecar_count += 1
|
||||
except Exception as e:
|
||||
log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
|
||||
else:
|
||||
log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
|
||||
elif storage_source and storage_source.lower() == 'local':
|
||||
# For local storage, save directly to DB (no sidecar needed)
|
||||
if file_path:
|
||||
library_root = get_local_storage_path(config)
|
||||
if library_root:
|
||||
try:
|
||||
with LocalLibraryDB(library_root) as db:
|
||||
db.save_tags(Path(file_path), existing_tags)
|
||||
log(f"[add_tags] Saved {len(existing_tags)} tag(s) to local DB", file=sys.stderr)
|
||||
sidecar_count += 1
|
||||
except Exception as e:
|
||||
log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr)
|
||||
else:
|
||||
log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr)
|
||||
else:
|
||||
log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr)
|
||||
else:
|
||||
# For other storage types or unknown sources, write sidecar file if we have a file path
|
||||
if file_path:
|
||||
try:
|
||||
sidecar_path = write_sidecar(Path(file_path), existing_tags, [], file_hash)
|
||||
log(f"[add_tags] Wrote {len(existing_tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
|
||||
sidecar_count += 1
|
||||
except Exception as e:
|
||||
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
|
||||
|
||||
# Emit the modified result
|
||||
ctx.emit(res)
|
||||
|
||||
log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr)
|
||||
return 0
|
||||
78
cmdlets/add_url.py
Normal file
78
cmdlets/add_url.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-url",
|
||||
summary="Associate a URL with a Hydrus file.",
|
||||
usage="add-url [-hash <sha256>] <url>",
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("url", required=True, description="The URL to associate with the file."),
|
||||
],
|
||||
details=[
|
||||
"- Adds the URL to the Hydrus file's known URL list.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["add-url", "ass-url", "associate-url", "add_url"]) # aliases
|
||||
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from ._shared import parse_cmdlet_args
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
override_hash = parsed.get("hash")
|
||||
url = parsed.get("url")
|
||||
|
||||
if not url:
|
||||
log("Requires a URL argument")
|
||||
return 1
|
||||
|
||||
url = str(url).strip()
|
||||
if not url:
|
||||
log("Requires a non-empty URL")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
client.associate_url(hash_hex, url)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus add-url failed: {exc}")
|
||||
return 1
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
ctx.emit(f"Associated URL with {preview}: {url}")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
148
cmdlets/adjective.py
Normal file
148
cmdlets/adjective.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import List, Dict, Any, Optional, Sequence
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from helper.logger import log
|
||||
from result_table import ResultTable
|
||||
import pipeline as ctx
|
||||
|
||||
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "helper", "adjective.json")
|
||||
|
||||
def _load_adjectives() -> Dict[str, List[str]]:
|
||||
try:
|
||||
if os.path.exists(ADJECTIVE_FILE):
|
||||
with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
log(f"Error loading adjectives: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
def _save_adjectives(data: Dict[str, List[str]]) -> bool:
|
||||
try:
|
||||
with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
return True
|
||||
except Exception as e:
|
||||
log(f"Error saving adjectives: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
data = _load_adjectives()
|
||||
|
||||
# Parse arguments manually first to handle positional args
|
||||
# We expect: .adjective [category] [tag] [-add] [-delete]
|
||||
|
||||
# If no args, list categories
|
||||
if not args:
|
||||
table = ResultTable("Adjective Categories")
|
||||
for i, (category, tags) in enumerate(data.items()):
|
||||
row = table.add_row()
|
||||
row.add_column("#", str(i + 1))
|
||||
row.add_column("Category", category)
|
||||
row.add_column("Tag Amount", str(len(tags)))
|
||||
|
||||
# Selection expands to: .adjective "Category Name"
|
||||
table.set_row_selection_args(i, [category])
|
||||
|
||||
table.set_source_command(".adjective")
|
||||
ctx.set_last_result_table_overlay(table, list(data.keys()))
|
||||
ctx.set_current_stage_table(table)
|
||||
print(table)
|
||||
return 0
|
||||
|
||||
# We have args. First arg is likely category.
|
||||
category = args[0]
|
||||
|
||||
# Check if we are adding a new category (implicit if it doesn't exist)
|
||||
if category not in data:
|
||||
# If only category provided, create it
|
||||
if len(args) == 1:
|
||||
data[category] = []
|
||||
_save_adjectives(data)
|
||||
log(f"Created new category: {category}")
|
||||
# If more args, we might be trying to add to a non-existent category
|
||||
elif "-add" in args:
|
||||
data[category] = []
|
||||
# Continue to add logic
|
||||
|
||||
# Handle operations within category
|
||||
remaining_args = list(args[1:])
|
||||
|
||||
# Check for -add flag
|
||||
if "-add" in remaining_args:
|
||||
# .adjective category -add tag
|
||||
# or .adjective category tag -add
|
||||
add_idx = remaining_args.index("-add")
|
||||
# Tag could be before or after
|
||||
tag = None
|
||||
if add_idx + 1 < len(remaining_args):
|
||||
tag = remaining_args[add_idx + 1]
|
||||
elif add_idx > 0:
|
||||
tag = remaining_args[add_idx - 1]
|
||||
|
||||
if tag:
|
||||
if tag not in data[category]:
|
||||
data[category].append(tag)
|
||||
_save_adjectives(data)
|
||||
log(f"Added '{tag}' to '{category}'")
|
||||
else:
|
||||
log(f"Tag '{tag}' already exists in '{category}'")
|
||||
else:
|
||||
log("Error: No tag specified to add")
|
||||
return 1
|
||||
|
||||
# Check for -delete flag
|
||||
elif "-delete" in remaining_args:
|
||||
# .adjective category -delete tag
|
||||
# or .adjective category tag -delete
|
||||
del_idx = remaining_args.index("-delete")
|
||||
tag = None
|
||||
if del_idx + 1 < len(remaining_args):
|
||||
tag = remaining_args[del_idx + 1]
|
||||
elif del_idx > 0:
|
||||
tag = remaining_args[del_idx - 1]
|
||||
|
||||
if tag:
|
||||
if tag in data[category]:
|
||||
data[category].remove(tag)
|
||||
_save_adjectives(data)
|
||||
log(f"Deleted '{tag}' from '{category}'")
|
||||
else:
|
||||
log(f"Tag '{tag}' not found in '{category}'")
|
||||
else:
|
||||
log("Error: No tag specified to delete")
|
||||
return 1
|
||||
|
||||
# List tags in category (Default action if no flags or after modification)
|
||||
tags = data.get(category, [])
|
||||
table = ResultTable(f"Tags in '{category}'")
|
||||
for i, tag in enumerate(tags):
|
||||
row = table.add_row()
|
||||
row.add_column("#", str(i + 1))
|
||||
row.add_column("Tag", tag)
|
||||
|
||||
# Selection expands to: .adjective "Category" "Tag"
|
||||
# This allows typing @N -delete to delete it
|
||||
table.set_row_selection_args(i, [category, tag])
|
||||
|
||||
table.set_source_command(".adjective")
|
||||
ctx.set_last_result_table_overlay(table, tags)
|
||||
ctx.set_current_stage_table(table)
|
||||
print(table)
|
||||
|
||||
return 0
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name=".adjective",
|
||||
aliases=["adj"],
|
||||
summary="Manage adjective categories and tags",
|
||||
usage=".adjective [category] [-add tag] [-delete tag]",
|
||||
args=[
|
||||
CmdletArg(name="category", type="string", description="Category name", required=False),
|
||||
CmdletArg(name="tag", type="string", description="Tag name", required=False),
|
||||
CmdletArg(name="add", type="flag", description="Add tag"),
|
||||
CmdletArg(name="delete", type="flag", description="Delete tag"),
|
||||
],
|
||||
exec=_run
|
||||
)
|
||||
153
cmdlets/check_file_status.py
Normal file
153
cmdlets/check_file_status.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
from . import register
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="check-file-status",
|
||||
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
|
||||
usage="check-file-status [-hash <sha256>]",
|
||||
args=[
|
||||
CmdletArg("-hash", description="File hash (SHA256) to check. If not provided, uses selected result."),
|
||||
],
|
||||
details=[
|
||||
"- Shows whether file is active in Hydrus or marked as deleted",
|
||||
"- Detects corrupted data (e.g., comma-separated URLs)",
|
||||
"- Displays file metadata and service locations",
|
||||
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["check-file-status", "check-status", "file-status", "status"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Parse arguments
|
||||
override_hash: str | None = None
|
||||
i = 0
|
||||
while i < len(args):
|
||||
token = args[i]
|
||||
low = str(token).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
i += 1
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
|
||||
if not hash_hex:
|
||||
log("No hash provided and no result selected", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result_data = client.fetch_file_metadata(hashes=[hash_hex])
|
||||
if not result_data.get("metadata"):
|
||||
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_info = result_data["metadata"][0]
|
||||
|
||||
# Status summary
|
||||
is_deleted = file_info.get("is_deleted", False)
|
||||
is_local = file_info.get("is_local", False)
|
||||
is_trashed = file_info.get("is_trashed", False)
|
||||
|
||||
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
|
||||
log(f"File status: {status_str}", file=sys.stderr)
|
||||
|
||||
# File info
|
||||
log(f"\n📄 File Information:", file=sys.stderr)
|
||||
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
|
||||
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
|
||||
log(f" MIME: {file_info['mime']}", file=sys.stderr)
|
||||
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
|
||||
|
||||
# Service status
|
||||
file_services = file_info.get("file_services", {})
|
||||
current_services = file_services.get("current", {})
|
||||
deleted_services = file_services.get("deleted", {})
|
||||
|
||||
if current_services:
|
||||
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
|
||||
for service_key, service_info in current_services.items():
|
||||
sname = service_info.get("name", "unknown")
|
||||
stype = service_info.get("type_pretty", "unknown")
|
||||
log(f" - {sname} ({stype})", file=sys.stderr)
|
||||
|
||||
if deleted_services:
|
||||
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
|
||||
for service_key, service_info in deleted_services.items():
|
||||
sname = service_info.get("name", "unknown")
|
||||
stype = service_info.get("type_pretty", "unknown")
|
||||
time_deleted = service_info.get("time_deleted", "?")
|
||||
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
|
||||
|
||||
# URL check
|
||||
urls = file_info.get("known_urls", [])
|
||||
log(f"\n🔗 URLs ({len(urls)}):", file=sys.stderr)
|
||||
|
||||
corrupted_count = 0
|
||||
for i, url in enumerate(urls, 1):
|
||||
if "," in url:
|
||||
corrupted_count += 1
|
||||
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
|
||||
else:
|
||||
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
|
||||
|
||||
if corrupted_count > 0:
|
||||
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
|
||||
|
||||
# Tags
|
||||
tags_dict = file_info.get("tags", {})
|
||||
total_tags = 0
|
||||
for service_key, service_data in tags_dict.items():
|
||||
service_name = service_data.get("name", "unknown")
|
||||
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||
total_tags += len(display_tags)
|
||||
|
||||
if total_tags > 0:
|
||||
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
|
||||
for service_key, service_data in tags_dict.items():
|
||||
display_tags = service_data.get("display_tags", {}).get("0", [])
|
||||
if display_tags:
|
||||
service_name = service_data.get("name", "unknown")
|
||||
log(f" {service_name}:", file=sys.stderr)
|
||||
for tag in display_tags[:5]: # Show first 5
|
||||
log(f" - {tag}", file=sys.stderr)
|
||||
if len(display_tags) > 5:
|
||||
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
|
||||
|
||||
log("\n", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error checking file status: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return 1
|
||||
110
cmdlets/cleanup.py
Normal file
110
cmdlets/cleanup.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
|
||||
|
||||
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
|
||||
then emits the remaining non-temporary results for further pipeline stages.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp
|
||||
import models
|
||||
import pipeline as pipeline_context
|
||||
|
||||
|
||||
@register(["cleanup"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Remove temporary files from pipeline results.
|
||||
|
||||
Accepts:
|
||||
- Single result object with is_temp field
|
||||
- List of result objects to clean up
|
||||
|
||||
Process:
|
||||
- Filters results by is_temp=True
|
||||
- Deletes those files from disk
|
||||
- Emits only non-temporary results
|
||||
|
||||
Typical pipeline usage:
|
||||
download-data url | screen-shot | add-tag "tag" --all | cleanup
|
||||
"""
|
||||
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
import json
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
if not results:
|
||||
log("[cleanup] No results to process", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Separate temporary and permanent results
|
||||
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
|
||||
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
# Delete temporary files
|
||||
deleted_count = 0
|
||||
for temp_result in temp_results:
|
||||
try:
|
||||
file_path = get_pipe_object_path(temp_result)
|
||||
|
||||
if file_path:
|
||||
path_obj = Path(file_path)
|
||||
if path_obj.exists():
|
||||
# Delete the file
|
||||
path_obj.unlink()
|
||||
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
|
||||
deleted_count += 1
|
||||
|
||||
# Clean up any associated sidecar files
|
||||
for ext in ['.tags', '.metadata']:
|
||||
sidecar = path_obj.parent / (path_obj.name + ext)
|
||||
if sidecar.exists():
|
||||
try:
|
||||
sidecar.unlink()
|
||||
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
|
||||
else:
|
||||
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
|
||||
|
||||
# Log summary
|
||||
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
|
||||
|
||||
# Emit permanent results for downstream processing
|
||||
for perm_result in perm_results:
|
||||
pipeline_context.emit(perm_result)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="cleanup",
|
||||
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
|
||||
usage="cleanup",
|
||||
args=[],
|
||||
details=[
|
||||
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
|
||||
"- Deletes files marked with is_temp=True from disk",
|
||||
"- Also cleans up associated sidecar files (.tags, .metadata)",
|
||||
"- Emits only non-temporary results for further processing",
|
||||
"- Typical usage at end of pipeline: ... | add-tag \"tag\" --all | cleanup",
|
||||
"- Exit code 0 if cleanup successful, 1 if no results to process",
|
||||
],
|
||||
)
|
||||
|
||||
242
cmdlets/delete_file.py
Normal file
242
cmdlets/delete_file.py
Normal file
@@ -0,0 +1,242 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
|
||||
|
||||
|
||||
|
||||
def _delete_database_entry(db_path: Path, file_path: str) -> bool:
|
||||
"""Delete file and related entries from local library database.
|
||||
|
||||
Args:
|
||||
db_path: Path to the library.db file
|
||||
file_path: Exact file path string as stored in database
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
if not db_path.exists():
|
||||
log(f"Database not found at {db_path}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
log(f"Searching database for file_path: {file_path}", file=sys.stderr)
|
||||
|
||||
# Find the file_id using the exact file_path
|
||||
cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,))
|
||||
result = cursor.fetchone()
|
||||
|
||||
if not result:
|
||||
log(f"ERROR: File path not found in database", file=sys.stderr)
|
||||
log(f"Expected: {file_path}", file=sys.stderr)
|
||||
|
||||
# Debug: show sample entries
|
||||
cursor.execute('SELECT id, file_path FROM files LIMIT 3')
|
||||
samples = cursor.fetchall()
|
||||
if samples:
|
||||
log(f"Sample DB entries:", file=sys.stderr)
|
||||
for fid, fpath in samples:
|
||||
log(f"{fid}: {fpath}", file=sys.stderr)
|
||||
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
file_id = result[0]
|
||||
log(f"Found file_id={file_id}, deleting all related records", file=sys.stderr)
|
||||
|
||||
# Delete related records
|
||||
cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,))
|
||||
meta_count = cursor.rowcount
|
||||
|
||||
cursor.execute('DELETE FROM tags WHERE file_id = ?', (file_id,))
|
||||
tags_count = cursor.rowcount
|
||||
|
||||
cursor.execute('DELETE FROM notes WHERE file_id = ?', (file_id,))
|
||||
notes_count = cursor.rowcount
|
||||
|
||||
cursor.execute('DELETE FROM files WHERE id = ?', (file_id,))
|
||||
files_count = cursor.rowcount
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
log(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr)
|
||||
return True
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Database cleanup failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
# Parse overrides and options
|
||||
override_hash: str | None = None
|
||||
conserve: str | None = None
|
||||
lib_root: str | None = None
|
||||
reason_tokens: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
token = args[i]
|
||||
low = str(token).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
|
||||
value = str(args[i + 1]).strip().lower()
|
||||
if value in {"local", "hydrus"}:
|
||||
conserve = value
|
||||
i += 2
|
||||
continue
|
||||
if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
|
||||
lib_root = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
reason_tokens.append(token)
|
||||
i += 1
|
||||
|
||||
# Handle result as either dict or object
|
||||
if isinstance(result, dict):
|
||||
hash_hex_raw = result.get("hash_hex") or result.get("hash")
|
||||
target = result.get("target")
|
||||
origin = result.get("origin")
|
||||
else:
|
||||
hash_hex_raw = getattr(result, "hash_hex", None) or getattr(result, "hash", None)
|
||||
target = getattr(result, "target", None)
|
||||
origin = getattr(result, "origin", None)
|
||||
|
||||
# For Hydrus files, the target IS the hash
|
||||
if origin and origin.lower() == "hydrus" and not hash_hex_raw:
|
||||
hash_hex_raw = target
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
|
||||
reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
|
||||
|
||||
local_deleted = False
|
||||
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
|
||||
if conserve != "local" and local_target:
|
||||
path = Path(str(target))
|
||||
file_path_str = str(target) # Keep the original string for DB matching
|
||||
try:
|
||||
if path.exists() and path.is_file():
|
||||
path.unlink()
|
||||
local_deleted = True
|
||||
if ctx._PIPE_ACTIVE:
|
||||
ctx.emit(f"Removed local file: {path}")
|
||||
log(f"Deleted: {path.name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Local delete failed: {exc}", file=sys.stderr)
|
||||
|
||||
# Remove common sidecars regardless of file removal success
|
||||
for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"),
|
||||
path.with_suffix(".metadata"), path.with_suffix(".notes")):
|
||||
try:
|
||||
if sidecar.exists() and sidecar.is_file():
|
||||
sidecar.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clean up database entry if library root provided - do this regardless of file deletion success
|
||||
if lib_root:
|
||||
lib_root_path = Path(lib_root)
|
||||
db_path = lib_root_path / ".downlow_library.db"
|
||||
log(f"Attempting DB cleanup: lib_root={lib_root}, db_path={db_path}", file=sys.stderr)
|
||||
log(f"Deleting DB entry for: {file_path_str}", file=sys.stderr)
|
||||
if _delete_database_entry(db_path, file_path_str):
|
||||
if ctx._PIPE_ACTIVE:
|
||||
ctx.emit(f"Removed database entry: {path.name}")
|
||||
log(f"Database entry cleaned up", file=sys.stderr)
|
||||
local_deleted = True # Mark as deleted if DB cleanup succeeded
|
||||
else:
|
||||
log(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr)
|
||||
else:
|
||||
log(f"No lib_root provided, skipping database cleanup", file=sys.stderr)
|
||||
|
||||
hydrus_deleted = False
|
||||
if conserve != "hydrus" and hash_hex:
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||
if reason:
|
||||
payload["reason"] = reason
|
||||
try:
|
||||
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||
hydrus_deleted = True
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
log(f"Deleted from Hydrus: {preview}…", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus delete failed: {exc}", file=sys.stderr)
|
||||
if not local_deleted:
|
||||
return 1
|
||||
|
||||
if hydrus_deleted and hash_hex:
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
if ctx._PIPE_ACTIVE:
|
||||
if reason:
|
||||
ctx.emit(f"Deleted {preview} (reason: {reason}).")
|
||||
else:
|
||||
ctx.emit(f"Deleted {preview}.")
|
||||
|
||||
if hydrus_deleted or local_deleted:
|
||||
return 0
|
||||
|
||||
log("Selected result has neither Hydrus hash nor local file target")
|
||||
return 1
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-file",
|
||||
summary="Delete a file locally and/or from Hydrus, including database entries.",
|
||||
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
|
||||
aliases=["del-file"],
|
||||
args=[
|
||||
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
|
||||
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
|
||||
CmdletArg("reason", description="Optional reason for deletion (free text)."),
|
||||
],
|
||||
details=[
|
||||
"Default removes both the local file and Hydrus file.",
|
||||
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
|
||||
"Database entries are automatically cleaned up for local files.",
|
||||
"Any remaining arguments are treated as the Hydrus reason text.",
|
||||
],
|
||||
)
|
||||
|
||||
79
cmdlets/delete_note.py
Normal file
79
cmdlets/delete_note.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-note",
|
||||
summary="Delete a named note from a Hydrus file.",
|
||||
usage="i | del-note [-hash <sha256>] <name>",
|
||||
aliases=["del-note"],
|
||||
args=[
|
||||
|
||||
],
|
||||
details=[
|
||||
"- Removes the note with the given name from the Hydrus file.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
if not args:
|
||||
log("Requires the note name/key to delete")
|
||||
return 1
|
||||
override_hash: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
if not rest:
|
||||
log("Requires the note name/key to delete")
|
||||
return 1
|
||||
name = str(rest[0] or '').strip()
|
||||
if not name:
|
||||
log("Requires a non-empty note name/key")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
service_name = "my notes"
|
||||
client.delete_notes(hash_hex, [name], service_name)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus delete-note failed: {exc}")
|
||||
return 1
|
||||
log(f"Deleted note '{name}'")
|
||||
return 0
|
||||
219
cmdlets/delete_tag.py
Normal file
219
cmdlets/delete_tag.py
Normal file
@@ -0,0 +1,219 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments
|
||||
from helper.logger import log
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-tags",
|
||||
summary="Remove tags from a Hydrus file.",
|
||||
usage="del-tags [-hash <sha256>] <tag>[,<tag>...]",
|
||||
aliases=["del-tag", "del-tags", "delete-tag"],
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
|
||||
],
|
||||
details=[
|
||||
"- Requires a Hydrus file (hash present) or explicit -hash override.",
|
||||
"- Multiple tags can be comma-separated or space-separated.",
|
||||
],
|
||||
)
|
||||
|
||||
@register(["del-tag", "del-tags", "delete-tag", "delete-tags"]) # Still needed for backward compatibility
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
|
||||
has_piped_tag = (result and hasattr(result, '__class__') and
|
||||
result.__class__.__name__ == 'TagItem' and
|
||||
hasattr(result, 'tag_name'))
|
||||
|
||||
# Check if we have a piped list of TagItems (from @N selection)
|
||||
has_piped_tag_list = (isinstance(result, list) and result and
|
||||
hasattr(result[0], '__class__') and
|
||||
result[0].__class__.__name__ == 'TagItem')
|
||||
|
||||
if not args and not has_piped_tag and not has_piped_tag_list:
|
||||
log("Requires at least one tag argument")
|
||||
return 1
|
||||
|
||||
# Parse -hash override and collect tags from remaining args
|
||||
override_hash: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
|
||||
# Check if first argument is @ syntax (result table selection)
|
||||
# @5 or @{2,5,8} to delete tags from ResultTable by index
|
||||
tags_from_at_syntax = []
|
||||
hash_from_at_syntax = None
|
||||
|
||||
if rest and str(rest[0]).startswith("@"):
|
||||
selector_arg = str(rest[0])
|
||||
pipe_selector = selector_arg[1:].strip()
|
||||
# Parse @N or @{N,M,K} syntax
|
||||
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
|
||||
# @{2,5,8}
|
||||
pipe_selector = pipe_selector[1:-1]
|
||||
try:
|
||||
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
|
||||
except ValueError:
|
||||
log("Invalid selection syntax. Use @2 or @{2,5,8}")
|
||||
return 1
|
||||
|
||||
# Get the last ResultTable from pipeline context
|
||||
try:
|
||||
last_table = ctx._LAST_RESULT_TABLE
|
||||
if last_table:
|
||||
# Extract tags from selected rows
|
||||
for idx in indices:
|
||||
if 1 <= idx <= len(last_table.rows):
|
||||
# Look for a TagItem in _LAST_RESULT_ITEMS by index
|
||||
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
|
||||
item = ctx._LAST_RESULT_ITEMS[idx - 1]
|
||||
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
|
||||
tag_name = getattr(item, 'tag_name', None)
|
||||
if tag_name:
|
||||
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
|
||||
tags_from_at_syntax.append(tag_name)
|
||||
# Also get hash from first item for consistency
|
||||
if not hash_from_at_syntax:
|
||||
hash_from_at_syntax = getattr(item, 'hash_hex', None)
|
||||
|
||||
if not tags_from_at_syntax:
|
||||
log(f"No tags found at indices: {indices}")
|
||||
return 1
|
||||
else:
|
||||
log("No ResultTable in pipeline (use @ after running get-tag)")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
# If we have a list of TagItems, we want to process ALL of them if no args provided
|
||||
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
|
||||
if not args and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
|
||||
# We will extract tags from the list later
|
||||
pass
|
||||
else:
|
||||
result = result[0]
|
||||
|
||||
# Determine tags and hash to use
|
||||
tags: list[str] = []
|
||||
hash_hex = None
|
||||
|
||||
if tags_from_at_syntax:
|
||||
# Use tags extracted from @ syntax
|
||||
tags = tags_from_at_syntax
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
|
||||
log(f"[delete_tag] Using @ syntax extraction: {len(tags)} tag(s) to delete: {tags}")
|
||||
elif isinstance(result, list) and result and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
|
||||
# Got a list of TagItems (e.g. from delete-tag @1)
|
||||
tags = [getattr(item, 'tag_name') for item in result if getattr(item, 'tag_name', None)]
|
||||
# Use hash from first item
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result[0], "hash_hex", None))
|
||||
elif result and hasattr(result, '__class__') and result.__class__.__name__ == 'TagItem':
|
||||
# Got a piped TagItem - delete this specific tag
|
||||
tag_name = getattr(result, 'tag_name', None)
|
||||
if tag_name:
|
||||
tags = [tag_name]
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
else:
|
||||
# Traditional mode - parse tag arguments
|
||||
tags = parse_tag_arguments(rest)
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
|
||||
if not tags:
|
||||
log("No valid tags were provided")
|
||||
return 1
|
||||
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a hash")
|
||||
return 1
|
||||
|
||||
try:
|
||||
service_name = hydrus_wrapper.get_tag_service_name(config)
|
||||
except Exception as exc:
|
||||
log(f"Failed to resolve tag service: {exc}")
|
||||
return 1
|
||||
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
|
||||
log(f"[delete_tag] Sending deletion request: hash={hash_hex}, tags={tags}, service={service_name}")
|
||||
try:
|
||||
result = client.delete_tags(hash_hex, tags, service_name)
|
||||
log(f"[delete_tag] Hydrus response: {result}")
|
||||
except Exception as exc:
|
||||
log(f"Hydrus del-tag failed: {exc}")
|
||||
return 1
|
||||
|
||||
preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '')
|
||||
log(f"Removed {len(tags)} tag(s) from {preview} via '{service_name}'.")
|
||||
|
||||
# Re-fetch and emit updated tags after deletion
|
||||
try:
|
||||
payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False)
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
if isinstance(items, list) and items:
|
||||
meta = items[0] if isinstance(items[0], dict) else None
|
||||
if isinstance(meta, dict):
|
||||
# Extract tags from updated metadata
|
||||
from cmdlets.get_tag import _extract_my_tags_from_hydrus_meta, TagItem
|
||||
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
|
||||
updated_tags = _extract_my_tags_from_hydrus_meta(meta, service_key, service_name)
|
||||
|
||||
# Emit updated tags as TagItem objects
|
||||
from result_table import ResultTable
|
||||
table = ResultTable("Tags", max_columns=2)
|
||||
tag_items = []
|
||||
for idx, tag_name in enumerate(updated_tags, start=1):
|
||||
tag_item = TagItem(
|
||||
tag_name=tag_name,
|
||||
tag_index=idx,
|
||||
hash_hex=hash_hex,
|
||||
source="hydrus",
|
||||
service_name=service_name,
|
||||
)
|
||||
tag_items.append(tag_item)
|
||||
table.add_result(tag_item)
|
||||
ctx.emit(tag_item)
|
||||
|
||||
# Store items for @ selection in next command (CLI will handle table management)
|
||||
# Don't call set_last_result_table so we don't pollute history or table context
|
||||
except Exception as exc:
|
||||
log(f"Warning: Could not fetch updated tags after deletion: {exc}", file=__import__('sys').stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
82
cmdlets/delete_url.py
Normal file
82
cmdlets/delete_url.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-url",
|
||||
summary="Remove a URL association from a Hydrus file.",
|
||||
usage="delete-url [-hash <sha256>] <url>",
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("<url>", required=True, description="The URL to remove from the file."),
|
||||
],
|
||||
details=[
|
||||
"- Removes the URL from the Hydrus file's known URL list.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
|
||||
override_hash: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
return override_hash, rest
|
||||
|
||||
|
||||
@register(["del-url", "delete-url", "delete_url"]) # aliases
|
||||
def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
override_hash, rest = _parse_hash_and_rest(args)
|
||||
if not rest:
|
||||
log("Requires a URL argument")
|
||||
return 1
|
||||
url = str(rest[0] or '').strip()
|
||||
if not url:
|
||||
log("Requires a non-empty URL")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
client.delete_url(hash_hex, url)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus del-url failed: {exc}")
|
||||
return 1
|
||||
log(f"Deleted URL: {url}")
|
||||
return 0
|
||||
2633
cmdlets/download_data.py
Normal file
2633
cmdlets/download_data.py
Normal file
File diff suppressed because it is too large
Load Diff
1618
cmdlets/get_file.py
Normal file
1618
cmdlets/get_file.py
Normal file
File diff suppressed because it is too large
Load Diff
246
cmdlets/get_metadata.py
Normal file
246
cmdlets/get_metadata.py
Normal file
@@ -0,0 +1,246 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, Optional
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
from pathlib import Path
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
|
||||
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||
log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Helper to get field from both dict and object
|
||||
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(field, default)
|
||||
else:
|
||||
return getattr(obj, field, default)
|
||||
|
||||
# Parse -hash override
|
||||
override_hash: str | None = None
|
||||
args_list = list(_args)
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
a = args_list[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||||
override_hash = str(args_list[i + 1]).strip()
|
||||
break
|
||||
i += 1
|
||||
|
||||
# Try to determine if this is a local file or Hydrus file
|
||||
local_path = get_field(result, "target", None) or get_field(result, "path", None)
|
||||
is_local = False
|
||||
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
|
||||
is_local = True
|
||||
|
||||
# LOCAL FILE PATH
|
||||
if is_local and local_path:
|
||||
try:
|
||||
file_path = Path(str(local_path))
|
||||
if file_path.exists() and file_path.is_file():
|
||||
# Get the hash from result or compute it
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||
|
||||
# If no hash, compute SHA256 of the file
|
||||
if not hash_hex:
|
||||
try:
|
||||
import hashlib
|
||||
with open(file_path, 'rb') as f:
|
||||
hash_hex = hashlib.sha256(f.read()).hexdigest()
|
||||
except Exception:
|
||||
hash_hex = None
|
||||
|
||||
# Get MIME type
|
||||
mime_type, _ = mimetypes.guess_type(str(file_path))
|
||||
if not mime_type:
|
||||
mime_type = "unknown"
|
||||
|
||||
# Get file size
|
||||
try:
|
||||
file_size = file_path.stat().st_size
|
||||
except Exception:
|
||||
file_size = None
|
||||
|
||||
# Try to get duration if it's a media file
|
||||
duration_seconds = None
|
||||
try:
|
||||
# Try to use ffprobe if available
|
||||
import subprocess
|
||||
result_proc = subprocess.run(
|
||||
["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
if result_proc.returncode == 0 and result_proc.stdout.strip():
|
||||
try:
|
||||
duration_seconds = float(result_proc.stdout.strip())
|
||||
except ValueError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get format helpers from search module
|
||||
try:
|
||||
from .search_file import _format_size as _fmt_size
|
||||
from .search_file import _format_duration as _fmt_dur
|
||||
except Exception:
|
||||
_fmt_size = lambda x: str(x) if x is not None else ""
|
||||
_fmt_dur = lambda x: str(x) if x is not None else ""
|
||||
|
||||
size_label = _fmt_size(file_size) if file_size is not None else ""
|
||||
dur_label = _fmt_dur(duration_seconds) if duration_seconds is not None else ""
|
||||
|
||||
# Get known URLs from sidecar or result
|
||||
urls = []
|
||||
sidecar_path = Path(str(file_path) + '.tags')
|
||||
if sidecar_path.exists():
|
||||
try:
|
||||
with open(sidecar_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('known_url:'):
|
||||
url_value = line.replace('known_url:', '', 1).strip()
|
||||
if url_value:
|
||||
urls.append(url_value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to result URLs if not in sidecar
|
||||
if not urls:
|
||||
urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
|
||||
if isinstance(urls_from_result, list):
|
||||
urls.extend([str(u).strip() for u in urls_from_result if u])
|
||||
|
||||
# Display local file metadata
|
||||
log(f"PATH: {file_path}")
|
||||
if hash_hex:
|
||||
log(f"HASH: {hash_hex}")
|
||||
if mime_type:
|
||||
log(f"MIME: {mime_type}")
|
||||
if size_label:
|
||||
log(f"Size: {size_label}")
|
||||
if dur_label:
|
||||
log(f"Duration: {dur_label}")
|
||||
if urls:
|
||||
log("URLs:")
|
||||
for url in urls:
|
||||
log(f" {url}")
|
||||
|
||||
return 0
|
||||
except Exception as exc:
|
||||
# Fall through to Hydrus if local file handling fails
|
||||
pass
|
||||
|
||||
# HYDRUS PATH
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
payload = client.fetch_file_metadata(
|
||||
hashes=[hash_hex],
|
||||
include_service_keys_to_tags=False,
|
||||
include_file_urls=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_mime=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
if not isinstance(items, list) or not items:
|
||||
log("No metadata found.")
|
||||
return 0
|
||||
|
||||
meta = items[0] if isinstance(items[0], dict) else None
|
||||
if not isinstance(meta, dict):
|
||||
log("No metadata found.")
|
||||
return 0
|
||||
|
||||
mime = meta.get("mime")
|
||||
size = meta.get("size") or meta.get("file_size")
|
||||
duration_value = meta.get("duration")
|
||||
inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
|
||||
if duration_value is None and isinstance(inner, dict):
|
||||
duration_value = inner.get("duration")
|
||||
|
||||
try:
|
||||
from .search_file import _format_size as _fmt_size
|
||||
from .search_file import _format_duration as _fmt_dur
|
||||
from .search_file import _hydrus_duration_seconds as _dur_secs
|
||||
except Exception:
|
||||
_fmt_size = lambda x: str(x) if x is not None else ""
|
||||
_dur_secs = lambda x: x
|
||||
_fmt_dur = lambda x: str(x) if x is not None else ""
|
||||
|
||||
dur_seconds = _dur_secs(duration_value)
|
||||
dur_label = _fmt_dur(dur_seconds) if dur_seconds is not None else ""
|
||||
size_label = _fmt_size(size)
|
||||
|
||||
# Display Hydrus file metadata
|
||||
log(f"PATH: hydrus://file/{hash_hex}")
|
||||
log(f"Hash: {hash_hex}")
|
||||
if mime:
|
||||
log(f"MIME: {mime}")
|
||||
if dur_label:
|
||||
log(f"Duration: {dur_label}")
|
||||
if size_label:
|
||||
log(f"Size: {size_label}")
|
||||
|
||||
urls = meta.get("known_urls") or meta.get("urls")
|
||||
if isinstance(urls, list) and urls:
|
||||
log("URLs:")
|
||||
for url in urls:
|
||||
try:
|
||||
text = str(url).strip()
|
||||
except Exception:
|
||||
text = ""
|
||||
if text:
|
||||
log(f" {text}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-metadata",
|
||||
summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
|
||||
usage="get-metadata [-hash <sha256>]",
|
||||
aliases=["meta"],
|
||||
args=[
|
||||
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
],
|
||||
details=[
|
||||
"- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
|
||||
"- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
|
||||
"- Automatically detects local vs Hydrus files.",
|
||||
"- Local file hashes are computed via SHA256 if not already available.",
|
||||
],
|
||||
)
|
||||
87
cmdlets/get_note.py
Normal file
87
cmdlets/get_note.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-note",
|
||||
summary="List notes on a Hydrus file.",
|
||||
usage="get-note [-hash <sha256>]",
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
],
|
||||
details=[
|
||||
"- Prints notes by service and note name.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["get-note", "get-notes", "get_note"]) # aliases
|
||||
def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Helper to get field from both dict and object
|
||||
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(field, default)
|
||||
else:
|
||||
return getattr(obj, field, default)
|
||||
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from ._shared import parse_cmdlet_args
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
override_hash = parsed.get("hash")
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
payload = client.fetch_file_metadata(hashes=[hash_hex], include_service_keys_to_tags=False, include_notes=True)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus metadata fetch failed: {exc}")
|
||||
return 1
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
|
||||
notes = {}
|
||||
if isinstance(meta, dict):
|
||||
# Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
|
||||
notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
|
||||
if notes:
|
||||
ctx.emit("Notes:")
|
||||
# Print flattened: service -> (name: text)
|
||||
if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
|
||||
for svc, mapping in notes.items():
|
||||
ctx.emit(f"- {svc}:")
|
||||
if isinstance(mapping, dict):
|
||||
for k, v in mapping.items():
|
||||
ctx.emit(f" • {k}: {str(v).strip()}")
|
||||
elif isinstance(notes, dict):
|
||||
for k, v in notes.items():
|
||||
ctx.emit(f"- {k}: {str(v).strip()}")
|
||||
else:
|
||||
ctx.emit("No notes found.")
|
||||
return 0
|
||||
|
||||
|
||||
240
cmdlets/get_relationship.py
Normal file
240
cmdlets/get_relationship.py
Normal file
@@ -0,0 +1,240 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-relationship",
|
||||
summary="Print Hydrus relationships for the selected file.",
|
||||
usage="get-relationship [-hash <sha256>]",
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
],
|
||||
details=[
|
||||
"- Lists relationship data as returned by Hydrus.",
|
||||
],
|
||||
)
|
||||
|
||||
@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"]) # aliases
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Parse -hash override
|
||||
override_hash: str | None = None
|
||||
args_list = list(_args)
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
a = args_list[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||||
override_hash = str(args_list[i + 1]).strip()
|
||||
break
|
||||
i += 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash", file=sys.stderr)
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
try:
|
||||
rel = client.get_file_relationships(hash_hex)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
if not rel:
|
||||
log("No relationships found.")
|
||||
return 0
|
||||
|
||||
# Extract file_relationships from response
|
||||
file_rels = rel.get("file_relationships", {})
|
||||
if not file_rels:
|
||||
log("No relationships found.")
|
||||
return 0
|
||||
|
||||
# Get the relationships dict for this specific hash
|
||||
this_file_rels = file_rels.get(hash_hex)
|
||||
if not this_file_rels:
|
||||
log("No relationships found.")
|
||||
return 0
|
||||
|
||||
# Extract related hashes from all relationship types
|
||||
# Keys "0", "1", "3", "8" are relationship type IDs
|
||||
# Values are lists of hashes
|
||||
related_hashes = []
|
||||
for rel_type_id, hash_list in this_file_rels.items():
|
||||
# Skip non-numeric keys and metadata keys
|
||||
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
|
||||
continue
|
||||
if isinstance(hash_list, list):
|
||||
for rel_hash in hash_list:
|
||||
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
|
||||
related_hashes.append(rel_hash)
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_hashes = []
|
||||
for h in related_hashes:
|
||||
if h not in seen:
|
||||
seen.add(h)
|
||||
unique_hashes.append(h)
|
||||
|
||||
if not unique_hashes:
|
||||
log("No related files found.")
|
||||
return 0
|
||||
|
||||
# Fetch metadata for all related files
|
||||
try:
|
||||
metadata_payload = client.fetch_file_metadata(
|
||||
hashes=unique_hashes,
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_mime=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None
|
||||
if not isinstance(metadata_list, list):
|
||||
log("Hydrus metadata response was not a list", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Build metadata map by hash
|
||||
meta_by_hash: Dict[str, Dict[str, Any]] = {}
|
||||
for item in metadata_list:
|
||||
if isinstance(item, dict):
|
||||
item_hash = normalize_hash(item.get("hash"))
|
||||
if item_hash:
|
||||
meta_by_hash[item_hash] = item
|
||||
|
||||
# Helper functions for formatting
|
||||
def _format_duration(seconds: Optional[float]) -> str:
|
||||
if seconds is None:
|
||||
return ""
|
||||
try:
|
||||
s = int(seconds)
|
||||
hours = s // 3600
|
||||
minutes = (s % 3600) // 60
|
||||
secs = s % 60
|
||||
if hours > 0:
|
||||
return f"{hours}:{minutes:02d}:{secs:02d}"
|
||||
else:
|
||||
return f"{minutes}:{secs:02d}"
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _get_title(meta: Dict[str, Any]) -> str:
|
||||
# Try to extract title from tags
|
||||
tags_payload = meta.get("tags")
|
||||
if isinstance(tags_payload, dict):
|
||||
for service_data in tags_payload.values():
|
||||
if isinstance(service_data, dict):
|
||||
storage_tags = service_data.get("storage_tags")
|
||||
if isinstance(storage_tags, dict):
|
||||
for tag_list in storage_tags.values():
|
||||
if isinstance(tag_list, list):
|
||||
for tag in tag_list:
|
||||
tag_str = str(tag).lower()
|
||||
if tag_str.startswith("title:"):
|
||||
return str(tag)[6:].strip()
|
||||
# Fallback to hash prefix
|
||||
h = meta.get("hash")
|
||||
return str(h)[:12] if h else "unknown"
|
||||
|
||||
def _get_mime_type(meta: Dict[str, Any]) -> str:
|
||||
mime = meta.get("mime", "")
|
||||
if not mime:
|
||||
return ""
|
||||
# Extract type from mime (e.g., "video/mp4" -> "video")
|
||||
parts = str(mime).split("/")
|
||||
return parts[0] if parts else ""
|
||||
|
||||
# Print header and separator
|
||||
log("# | Title | Type | Duration | Size")
|
||||
log("--+---------------------------+-------+----------+--------")
|
||||
|
||||
# Create result objects for each related file
|
||||
results: List[Any] = []
|
||||
|
||||
# Print each related file
|
||||
for idx, rel_hash in enumerate(unique_hashes, start=1):
|
||||
meta = meta_by_hash.get(rel_hash)
|
||||
if not meta:
|
||||
continue
|
||||
|
||||
title = _get_title(meta)
|
||||
mime_type = _get_mime_type(meta)
|
||||
|
||||
# Get duration
|
||||
duration_value = meta.get("duration")
|
||||
if duration_value is None and isinstance(meta.get("metadata"), dict):
|
||||
duration_value = meta["metadata"].get("duration")
|
||||
duration_str = _format_duration(duration_value)
|
||||
|
||||
# Get size
|
||||
size = meta.get("size") or meta.get("file_size")
|
||||
size_str = fmt_bytes(size) if size else ""
|
||||
|
||||
# Format and print row
|
||||
title_display = title[:25].ljust(25)
|
||||
type_display = mime_type[:5].ljust(5)
|
||||
duration_display = duration_str[:8].ljust(8)
|
||||
size_display = size_str[:7].ljust(7)
|
||||
|
||||
log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}")
|
||||
|
||||
# Create result object for pipeline
|
||||
result_obj = type("RelatedFile", (), {
|
||||
"hash_hex": rel_hash,
|
||||
"title": title,
|
||||
"media_kind": mime_type or "other",
|
||||
"size": size,
|
||||
"duration": duration_value,
|
||||
"known_urls": [],
|
||||
"annotations": [],
|
||||
"columns": [
|
||||
("Title", title),
|
||||
("Type", mime_type),
|
||||
("Duration", duration_str),
|
||||
("Size", size_str),
|
||||
],
|
||||
})()
|
||||
results.append(result_obj)
|
||||
|
||||
# Emit results to pipeline
|
||||
try:
|
||||
ctx._PIPE_EMITS.extend(results)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
1191
cmdlets/get_tag.py
Normal file
1191
cmdlets/get_tag.py
Normal file
File diff suppressed because it is too large
Load Diff
90
cmdlets/get_url.py
Normal file
90
cmdlets/get_url.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash
|
||||
from helper.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-url",
|
||||
summary="List URLs associated with a Hydrus file.",
|
||||
usage="get-url [-hash <sha256>]",
|
||||
args=[
|
||||
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
],
|
||||
details=[
|
||||
"- Prints the known URLs for the selected Hydrus file.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
|
||||
override_hash: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
return override_hash, rest
|
||||
|
||||
|
||||
@register(["get-url", "get-urls", "get_url"]) # aliases
|
||||
def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Helper to get field from both dict and object
|
||||
def get_field(obj: Any, field: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(field, default)
|
||||
else:
|
||||
return getattr(obj, field, default)
|
||||
|
||||
# Help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
override_hash, _ = _parse_hash_and_rest(args)
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus metadata fetch failed: {exc}")
|
||||
return 1
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
|
||||
urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or []
|
||||
if urls:
|
||||
ctx.emit("URLs:")
|
||||
for u in urls:
|
||||
text = str(u).strip()
|
||||
if text:
|
||||
ctx.emit(f"- {text}")
|
||||
else:
|
||||
ctx.emit("No URLs found.")
|
||||
return 0
|
||||
|
||||
|
||||
138
cmdlets/manage_config.py
Normal file
138
cmdlets/manage_config.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from typing import List, Dict, Any
|
||||
from ._shared import Cmdlet, CmdletArg
|
||||
from config import load_config, save_config
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name=".config",
|
||||
summary="Manage configuration settings",
|
||||
usage=".config [key] [value]",
|
||||
args=[
|
||||
CmdletArg(
|
||||
name="key",
|
||||
description="Configuration key to update (dot-separated)",
|
||||
required=False
|
||||
),
|
||||
CmdletArg(
|
||||
name="value",
|
||||
description="New value for the configuration key",
|
||||
required=False
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
|
||||
items = []
|
||||
for k, v in config.items():
|
||||
if k.startswith('_'): # Skip internal keys
|
||||
continue
|
||||
|
||||
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.extend(flatten_config(v, new_key, sep=sep))
|
||||
else:
|
||||
items.append({
|
||||
"Key": new_key,
|
||||
"Value": str(v),
|
||||
"Type": type(v).__name__,
|
||||
"_selection_args": [new_key]
|
||||
})
|
||||
return items
|
||||
|
||||
def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
|
||||
keys = key.split('.')
|
||||
d = config
|
||||
|
||||
# Navigate to the parent dict
|
||||
for k in keys[:-1]:
|
||||
if k not in d or not isinstance(d[k], dict):
|
||||
d[k] = {}
|
||||
d = d[k]
|
||||
|
||||
last_key = keys[-1]
|
||||
|
||||
# Try to preserve type if key exists
|
||||
if last_key in d:
|
||||
current_val = d[last_key]
|
||||
if isinstance(current_val, bool):
|
||||
if value.lower() in ('true', 'yes', '1', 'on'):
|
||||
d[last_key] = True
|
||||
elif value.lower() in ('false', 'no', '0', 'off'):
|
||||
d[last_key] = False
|
||||
else:
|
||||
# Fallback to boolean conversion of string (usually True for non-empty)
|
||||
# But for config, explicit is better.
|
||||
print(f"Warning: Could not convert '{value}' to boolean. Using string.")
|
||||
d[last_key] = value
|
||||
elif isinstance(current_val, int):
|
||||
try:
|
||||
d[last_key] = int(value)
|
||||
except ValueError:
|
||||
print(f"Warning: Could not convert '{value}' to int. Using string.")
|
||||
d[last_key] = value
|
||||
elif isinstance(current_val, float):
|
||||
try:
|
||||
d[last_key] = float(value)
|
||||
except ValueError:
|
||||
print(f"Warning: Could not convert '{value}' to float. Using string.")
|
||||
d[last_key] = value
|
||||
else:
|
||||
d[last_key] = value
|
||||
else:
|
||||
# New key, try to infer type
|
||||
if value.lower() in ('true', 'false'):
|
||||
d[last_key] = (value.lower() == 'true')
|
||||
elif value.isdigit():
|
||||
d[last_key] = int(value)
|
||||
else:
|
||||
d[last_key] = value
|
||||
|
||||
return True
|
||||
|
||||
def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
|
||||
# Reload config to ensure we have the latest on disk
|
||||
# We don't use the passed 'config' because we want to edit the file
|
||||
# and 'config' might contain runtime objects (like worker manager)
|
||||
# But load_config() returns a fresh dict from disk (or cache)
|
||||
# We should use load_config()
|
||||
|
||||
current_config = load_config()
|
||||
|
||||
# Parse args
|
||||
# We handle args manually because of the potential for spaces in values
|
||||
# and the @ expansion logic in CLI.py passing args
|
||||
|
||||
if not args:
|
||||
# List mode
|
||||
items = flatten_config(current_config)
|
||||
# Sort by key
|
||||
items.sort(key=lambda x: x['Key'])
|
||||
|
||||
# Emit items for ResultTable
|
||||
import pipeline as ctx
|
||||
for item in items:
|
||||
ctx.emit(item)
|
||||
return 0
|
||||
|
||||
# Update mode
|
||||
key = args[0]
|
||||
|
||||
if len(args) < 2:
|
||||
print(f"Error: Value required for key '{key}'")
|
||||
return 1
|
||||
|
||||
value = " ".join(args[1:])
|
||||
|
||||
# Remove quotes if present
|
||||
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
|
||||
value = value[1:-1]
|
||||
|
||||
try:
|
||||
set_nested_config(current_config, key, value)
|
||||
save_config(current_config)
|
||||
print(f"Updated '{key}' to '{value}'")
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"Error updating config: {e}")
|
||||
return 1
|
||||
|
||||
CMDLET.exec = _run
|
||||
916
cmdlets/merge_file.py
Normal file
916
cmdlets/merge_file.py
Normal file
@@ -0,0 +1,916 @@
|
||||
"""Merge multiple files into a single output file."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, List
|
||||
from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
from helper.download import download_media
|
||||
from models import DownloadOptions
|
||||
from config import resolve_output_dir
|
||||
import subprocess as _subprocess
|
||||
import shutil as _shutil
|
||||
from ._shared import parse_cmdlet_args
|
||||
|
||||
try:
|
||||
from PyPDF2 import PdfWriter, PdfReader
|
||||
HAS_PYPDF2 = True
|
||||
except ImportError:
|
||||
HAS_PYPDF2 = False
|
||||
PdfWriter = None
|
||||
PdfReader = None
|
||||
|
||||
try:
|
||||
from metadata import (
|
||||
read_tags_from_file,
|
||||
write_tags_to_file,
|
||||
dedup_tags_by_namespace,
|
||||
merge_multiple_tag_lists,
|
||||
write_tags,
|
||||
write_metadata
|
||||
)
|
||||
HAS_METADATA_API = True
|
||||
except ImportError:
|
||||
HAS_METADATA_API = False
|
||||
|
||||
from . import register
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
normalize_result_input,
|
||||
get_pipe_object_path,
|
||||
get_pipe_object_hash,
|
||||
)
|
||||
import models
|
||||
import pipeline as ctx
|
||||
|
||||
|
||||
def _get_item_value(item: Any, key: str, default: Any = None) -> Any:
|
||||
"""Helper to read either dict keys or attributes."""
|
||||
if isinstance(item, dict):
|
||||
return item.get(key, default)
|
||||
return getattr(item, key, default)
|
||||
|
||||
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Merge multiple files into one."""
|
||||
|
||||
# Parse help
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
output_override: Optional[Path] = None
|
||||
output_arg = parsed.get("output")
|
||||
if output_arg:
|
||||
try:
|
||||
output_override = Path(str(output_arg)).expanduser()
|
||||
except Exception:
|
||||
output_override = None
|
||||
|
||||
format_spec = parsed.get("format")
|
||||
if format_spec:
|
||||
format_spec = str(format_spec).lower().strip()
|
||||
|
||||
# Collect files from piped results
|
||||
# Use normalize_result_input to handle both single items and lists
|
||||
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
|
||||
|
||||
if not files_to_merge:
|
||||
log("No files provided to merge", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if len(files_to_merge) < 2:
|
||||
# Only 1 file - pass it through unchanged
|
||||
# (merge only happens when multiple files are collected)
|
||||
item = files_to_merge[0]
|
||||
ctx.emit(item)
|
||||
return 0
|
||||
|
||||
# Extract file paths and metadata from result objects
|
||||
source_files: List[Path] = []
|
||||
source_tags_files: List[Path] = []
|
||||
source_hashes: List[str] = []
|
||||
source_urls: List[str] = []
|
||||
source_tags: List[str] = [] # NEW: collect tags from source files
|
||||
source_relationships: List[str] = [] # NEW: collect relationships from source files
|
||||
|
||||
for item in files_to_merge:
|
||||
raw_path = get_pipe_object_path(item)
|
||||
target_path = None
|
||||
if isinstance(raw_path, Path):
|
||||
target_path = raw_path
|
||||
elif isinstance(raw_path, str) and raw_path.strip():
|
||||
candidate = Path(raw_path).expanduser()
|
||||
if candidate.exists():
|
||||
target_path = candidate
|
||||
|
||||
# Check for playlist item that needs downloading
|
||||
if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
|
||||
try:
|
||||
playlist_url = item.get('__file_path')
|
||||
item_idx = int(item['__action'].split(':')[1])
|
||||
log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
|
||||
|
||||
output_dir = resolve_output_dir(config)
|
||||
opts = DownloadOptions(
|
||||
url=playlist_url,
|
||||
output_dir=output_dir,
|
||||
playlist_items=str(item_idx),
|
||||
mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
|
||||
)
|
||||
|
||||
res = download_media(opts)
|
||||
if res and res.path and res.path.exists():
|
||||
target_path = res.path
|
||||
log(f"✓ Downloaded: {target_path.name}", flush=True)
|
||||
except Exception as e:
|
||||
log(f"Failed to download playlist item: {e}", file=sys.stderr)
|
||||
|
||||
if target_path and target_path.exists():
|
||||
source_files.append(target_path)
|
||||
|
||||
# Track the .tags file for this source
|
||||
tags_file = target_path.with_suffix(target_path.suffix + '.tags')
|
||||
if tags_file.exists():
|
||||
source_tags_files.append(tags_file)
|
||||
|
||||
# Try to read hash, tags, urls, and relationships from .tags sidecar file
|
||||
try:
|
||||
tags_content = tags_file.read_text(encoding='utf-8')
|
||||
for line in tags_content.split('\n'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('hash:'):
|
||||
hash_value = line[5:].strip()
|
||||
if hash_value:
|
||||
source_hashes.append(hash_value)
|
||||
elif line.startswith('known_url:') or line.startswith('url:'):
|
||||
# Extract URLs from tags file
|
||||
url_value = line.split(':', 1)[1].strip() if ':' in line else ''
|
||||
if url_value and url_value not in source_urls:
|
||||
source_urls.append(url_value)
|
||||
elif line.startswith('relationship:'):
|
||||
# Extract relationships from tags file
|
||||
rel_value = line.split(':', 1)[1].strip() if ':' in line else ''
|
||||
if rel_value and rel_value not in source_relationships:
|
||||
source_relationships.append(rel_value)
|
||||
else:
|
||||
# Collect actual tags (not metadata like hash: or known_url:)
|
||||
source_tags.append(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract hash if available in item (as fallback)
|
||||
hash_value = get_pipe_object_hash(item)
|
||||
if hash_value and hash_value not in source_hashes:
|
||||
source_hashes.append(str(hash_value))
|
||||
|
||||
# Extract known URLs if available
|
||||
known_urls = _get_item_value(item, 'known_urls', [])
|
||||
if isinstance(known_urls, str):
|
||||
source_urls.append(known_urls)
|
||||
elif isinstance(known_urls, list):
|
||||
source_urls.extend(known_urls)
|
||||
else:
|
||||
title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown')
|
||||
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
|
||||
|
||||
if len(source_files) < 2:
|
||||
log("At least 2 valid files required to merge", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Detect file types
|
||||
file_types = set()
|
||||
for f in source_files:
|
||||
suffix = f.suffix.lower()
|
||||
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
||||
file_types.add('audio')
|
||||
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
||||
file_types.add('video')
|
||||
elif suffix in {'.pdf'}:
|
||||
file_types.add('pdf')
|
||||
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
|
||||
file_types.add('text')
|
||||
else:
|
||||
file_types.add('other')
|
||||
|
||||
if len(file_types) > 1 and 'other' not in file_types:
|
||||
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
|
||||
log(f"Can only merge files of the same type", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_kind = list(file_types)[0] if file_types else 'other'
|
||||
|
||||
# Determine output format
|
||||
output_format = format_spec or 'auto'
|
||||
if output_format == 'auto':
|
||||
if file_kind == 'audio':
|
||||
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
|
||||
elif file_kind == 'video':
|
||||
output_format = 'mp4' # Default video codec
|
||||
elif file_kind == 'pdf':
|
||||
output_format = 'pdf'
|
||||
else:
|
||||
output_format = 'txt'
|
||||
|
||||
# Determine output path
|
||||
if output_override:
|
||||
if output_override.is_dir():
|
||||
base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
|
||||
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
|
||||
else:
|
||||
output_path = output_override
|
||||
else:
|
||||
first_file = source_files[0]
|
||||
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
|
||||
|
||||
# Ensure output directory exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Perform merge based on file type
|
||||
if file_kind == 'audio':
|
||||
success = _merge_audio(source_files, output_path, output_format)
|
||||
elif file_kind == 'video':
|
||||
success = _merge_video(source_files, output_path, output_format)
|
||||
elif file_kind == 'pdf':
|
||||
success = _merge_pdf(source_files, output_path)
|
||||
elif file_kind == 'text':
|
||||
success = _merge_text(source_files, output_path)
|
||||
else:
|
||||
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not success:
|
||||
log("Merge failed", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
|
||||
|
||||
# Create .tags sidecar file for the merged output using unified API
|
||||
tags_path = output_path.with_suffix(output_path.suffix + '.tags')
|
||||
try:
|
||||
# Start with title tag
|
||||
merged_tags = [f"title:{output_path.stem}"]
|
||||
|
||||
# Merge tags from source files using metadata API
|
||||
if source_tags and HAS_METADATA_API:
|
||||
# Use dedup function to normalize and deduplicate
|
||||
merged_source_tags = dedup_tags_by_namespace(source_tags)
|
||||
merged_tags.extend(merged_source_tags)
|
||||
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
|
||||
elif source_tags:
|
||||
# Fallback: simple deduplication if metadata API unavailable
|
||||
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
|
||||
|
||||
# Write merged tags to sidecar file
|
||||
if HAS_METADATA_API and write_tags_to_file:
|
||||
# Use unified API for file writing
|
||||
source_hashes_list = source_hashes if source_hashes else None
|
||||
source_urls_list = source_urls if source_urls else None
|
||||
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list)
|
||||
else:
|
||||
# Fallback: manual file writing
|
||||
tags_lines = []
|
||||
|
||||
# Add hash first (if available)
|
||||
if source_hashes:
|
||||
tags_lines.append(f"hash:{source_hashes[0]}")
|
||||
|
||||
# Add regular tags
|
||||
tags_lines.extend(merged_tags)
|
||||
|
||||
# Add known URLs
|
||||
if source_urls:
|
||||
for url in source_urls:
|
||||
tags_lines.append(f"known_url:{url}")
|
||||
|
||||
# Add relationships (if available)
|
||||
if source_relationships:
|
||||
for rel in source_relationships:
|
||||
tags_lines.append(f"relationship:{rel}")
|
||||
|
||||
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(tags_lines) + '\n')
|
||||
|
||||
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
|
||||
|
||||
# Also create .metadata file using centralized function
|
||||
try:
|
||||
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships)
|
||||
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
|
||||
|
||||
# Emit PipelineItem so the merged file can be piped to next command
|
||||
try:
|
||||
# Try to import PipelineItem from downlow module
|
||||
try:
|
||||
from downlow import PipelineItem
|
||||
except ImportError:
|
||||
# Fallback: create a simple object with the required attributes
|
||||
class SimpleItem:
|
||||
def __init__(self, target, title, media_kind, tags=None, known_urls=None):
|
||||
self.target = target
|
||||
self.title = title
|
||||
self.media_kind = media_kind
|
||||
self.tags = tags or []
|
||||
self.known_urls = known_urls or []
|
||||
PipelineItem = SimpleItem
|
||||
|
||||
merged_item = PipelineItem(
|
||||
target=str(output_path),
|
||||
title=output_path.stem,
|
||||
media_kind=file_kind,
|
||||
tags=merged_tags, # Include merged tags
|
||||
known_urls=source_urls # Include known URLs
|
||||
)
|
||||
ctx.emit(merged_item)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
|
||||
# Still emit a string representation for feedback
|
||||
ctx.emit(f"Merged: {output_path}")
|
||||
|
||||
# Delete source files if requested
|
||||
if delete_after:
|
||||
# First delete all .tags files
|
||||
for tags_file in source_tags_files:
|
||||
try:
|
||||
tags_file.unlink()
|
||||
log(f"Deleted: {tags_file.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
|
||||
|
||||
# Then delete all source files
|
||||
for f in source_files:
|
||||
try:
|
||||
f.unlink()
|
||||
log(f"Deleted: {f.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _sanitize_name(text: str) -> str:
|
||||
"""Sanitize filename."""
|
||||
allowed = []
|
||||
for ch in text:
|
||||
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
|
||||
return (" ".join("".join(allowed).split()) or "merged").strip()
|
||||
|
||||
|
||||
def _ext_for_format(fmt: str) -> str:
|
||||
"""Get file extension for format."""
|
||||
format_map = {
|
||||
'mp3': 'mp3',
|
||||
'm4a': 'm4a',
|
||||
'aac': 'aac',
|
||||
'opus': 'opus',
|
||||
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
|
||||
'mkv': 'mkv',
|
||||
'mp4': 'mp4',
|
||||
'webm': 'webm',
|
||||
'pdf': 'pdf',
|
||||
'txt': 'txt',
|
||||
'auto': 'mka', # Default - MKA for chapters
|
||||
}
|
||||
return format_map.get(fmt.lower(), 'mka')
|
||||
|
||||
|
||||
def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
|
||||
"""Add chapters to an M4A file using mutagen.
|
||||
|
||||
Args:
|
||||
file_path: Path to M4A file
|
||||
chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if not chapters:
|
||||
return True
|
||||
|
||||
try:
|
||||
from mutagen.mp4 import MP4, Atom
|
||||
from mutagen.mp4._util import Atom as MP4Atom
|
||||
except ImportError:
|
||||
logger.warning("[merge-file] mutagen not available for chapter writing")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Load the MP4 file
|
||||
audio = MP4(str(file_path))
|
||||
|
||||
# Build the chapter atom
|
||||
# MP4 chapters are stored in a 'chap' atom with specific structure
|
||||
chapter_data = b''
|
||||
|
||||
for i, chapter in enumerate(chapters, 1):
|
||||
# Each chapter entry: 10-byte header + title
|
||||
title = chapter.get('title', f'Chapter {i}').encode('utf-8')
|
||||
start_time_ms = int(chapter.get('start_ms', 0))
|
||||
|
||||
# Chapter atom format for M4A:
|
||||
# (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
|
||||
# This is complex, so we'll use a simpler atom approach
|
||||
pass
|
||||
|
||||
# Unfortunately, mutagen doesn't have built-in chapter writing for MP4
|
||||
# Chapter writing requires low-level atom manipulation
|
||||
# For now, we'll just return and note this limitation
|
||||
logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Error writing chapters: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
"""Merge audio files with chapters based on file boundaries."""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ffmpeg_path = _shutil.which('ffmpeg')
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Step 1: Get duration of each file to calculate chapter timestamps
|
||||
chapters = []
|
||||
current_time_ms = 0
|
||||
|
||||
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Analyzing files for chapters")
|
||||
|
||||
for file_path in files:
|
||||
# Get duration using ffprobe
|
||||
try:
|
||||
ffprobe_cmd = [
|
||||
'ffprobe', '-v', 'error', '-show_entries',
|
||||
'format=duration', '-print_format',
|
||||
'default=noprint_wrappers=1:nokey=1', str(file_path)
|
||||
]
|
||||
|
||||
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
|
||||
if probe_result.returncode == 0 and probe_result.stdout.strip():
|
||||
try:
|
||||
duration_sec = float(probe_result.stdout.strip())
|
||||
except ValueError:
|
||||
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
|
||||
duration_sec = 0
|
||||
else:
|
||||
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
|
||||
duration_sec = 0
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
|
||||
duration_sec = 0
|
||||
|
||||
# Create chapter entry - use title: tag from metadata if available
|
||||
title = file_path.stem # Default to filename without extension
|
||||
if HAS_METADATA_API:
|
||||
try:
|
||||
# Try to read tags from .tags sidecar file
|
||||
tags_file = file_path.with_suffix(file_path.suffix + '.tags')
|
||||
if tags_file.exists():
|
||||
tags = read_tags_from_file(tags_file)
|
||||
if tags:
|
||||
# Look for title: tag
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith('title:'):
|
||||
# Extract the title value after the colon
|
||||
title = tag.split(':', 1)[1].strip()
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
|
||||
pass # Fall back to filename
|
||||
|
||||
# Convert seconds to HH:MM:SS.mmm format
|
||||
hours = int(current_time_ms // 3600000)
|
||||
minutes = int((current_time_ms % 3600000) // 60000)
|
||||
seconds = int((current_time_ms % 60000) // 1000)
|
||||
millis = int(current_time_ms % 1000)
|
||||
|
||||
chapters.append({
|
||||
'time_ms': current_time_ms,
|
||||
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
|
||||
'title': title,
|
||||
'duration_sec': duration_sec
|
||||
})
|
||||
|
||||
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
|
||||
current_time_ms += int(duration_sec * 1000)
|
||||
|
||||
# Step 2: Create concat demuxer file
|
||||
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||
concat_lines = []
|
||||
for f in files:
|
||||
# Escape quotes in path
|
||||
safe_path = str(f).replace("'", "'\\''")
|
||||
concat_lines.append(f"file '{safe_path}'")
|
||||
|
||||
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||
|
||||
# Step 3: Create FFmpeg metadata file with chapters
|
||||
metadata_file = output.parent / f".metadata_{output.stem}.txt"
|
||||
metadata_lines = [';FFMETADATA1']
|
||||
|
||||
for i, chapter in enumerate(chapters):
|
||||
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
|
||||
metadata_lines.append('[CHAPTER]')
|
||||
metadata_lines.append('TIMEBASE=1/1000')
|
||||
metadata_lines.append(f'START={chapter["time_ms"]}')
|
||||
# Calculate end time (start of next chapter or end of file)
|
||||
if i < len(chapters) - 1:
|
||||
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
|
||||
else:
|
||||
metadata_lines.append(f'END={current_time_ms}')
|
||||
metadata_lines.append(f'title={chapter["title"]}')
|
||||
|
||||
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
|
||||
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Created {len(chapters)} chapters")
|
||||
|
||||
# Step 4: Build FFmpeg command to merge and embed chapters
|
||||
# Strategy: First merge audio, then add metadata in separate pass
|
||||
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||
|
||||
# Add threading options for speed
|
||||
cmd.extend(['-threads', '0']) # Use all available threads
|
||||
|
||||
# Audio codec selection for first input
|
||||
if output_format == 'mp3':
|
||||
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
|
||||
elif output_format == 'm4a':
|
||||
# Use copy if possible (much faster), otherwise re-encode
|
||||
# Check if inputs are already AAC/M4A to avoid re-encoding
|
||||
# For now, default to copy if format matches, otherwise re-encode
|
||||
# But since we are merging potentially different codecs, re-encoding is safer
|
||||
# To speed up re-encoding, we can use a faster preset or hardware accel if available
|
||||
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
|
||||
elif output_format == 'aac':
|
||||
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
|
||||
elif output_format == 'opus':
|
||||
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
|
||||
elif output_format == 'mka':
|
||||
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
|
||||
# If we want speed, copy is best. If we want compatibility, re-encode.
|
||||
# Let's try copy first if inputs are same format, but that's hard to detect here.
|
||||
# Defaulting to copy for MKA as it's a container that supports many codecs
|
||||
cmd.extend(['-c:a', 'copy'])
|
||||
else:
|
||||
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
|
||||
|
||||
# Add the output file
|
||||
cmd.append(str(output))
|
||||
|
||||
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
|
||||
|
||||
# Run ffmpeg with progress monitoring
|
||||
try:
|
||||
from helper.progress import print_progress, print_final_progress
|
||||
import re
|
||||
|
||||
process = _subprocess.Popen(
|
||||
cmd,
|
||||
stdout=_subprocess.PIPE,
|
||||
stderr=_subprocess.PIPE,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
errors='replace'
|
||||
)
|
||||
|
||||
# Monitor progress
|
||||
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
|
||||
total_duration_sec = current_time_ms / 1000.0
|
||||
|
||||
while True:
|
||||
# Read stderr line by line (ffmpeg writes progress to stderr)
|
||||
if process.stderr:
|
||||
line = process.stderr.readline()
|
||||
if not line and process.poll() is not None:
|
||||
break
|
||||
|
||||
if line:
|
||||
# Parse time=HH:MM:SS.mm
|
||||
match = duration_re.search(line)
|
||||
if match and total_duration_sec > 0:
|
||||
h, m, s, cs = map(int, match.groups())
|
||||
current_sec = h * 3600 + m * 60 + s + cs / 100.0
|
||||
|
||||
# Calculate speed/bitrate if available (optional)
|
||||
# For now just show percentage
|
||||
print_progress(
|
||||
output.name,
|
||||
int(current_sec * 1000), # Use ms as "bytes" for progress bar
|
||||
int(total_duration_sec * 1000),
|
||||
speed=0
|
||||
)
|
||||
else:
|
||||
break
|
||||
|
||||
# Wait for completion
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
if process.returncode != 0:
|
||||
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
|
||||
|
||||
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] ffmpeg process error: {e}")
|
||||
raise
|
||||
|
||||
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
|
||||
|
||||
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
|
||||
if output_format == 'mka' or output.suffix.lower() == '.mka':
|
||||
# MKA/MKV format has native chapter support via FFMetadata
|
||||
# Re-mux the file with chapters embedded (copy streams, no re-encode)
|
||||
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
|
||||
|
||||
temp_output = output.parent / f".temp_{output.stem}.mka"
|
||||
|
||||
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
|
||||
mkvmerge_path = _shutil.which('mkvmerge')
|
||||
|
||||
if mkvmerge_path:
|
||||
# mkvmerge is the best tool for embedding chapters in Matroska files
|
||||
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
|
||||
cmd2 = [
|
||||
mkvmerge_path, '-o', str(temp_output),
|
||||
'--chapters', str(metadata_file),
|
||||
str(output)
|
||||
]
|
||||
else:
|
||||
# Fallback to ffmpeg with proper chapter embedding for Matroska
|
||||
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
|
||||
# For Matroska files, the metadata must be provided via -f ffmetadata input
|
||||
cmd2 = [
|
||||
ffmpeg_path, '-y',
|
||||
'-i', str(output), # Input: merged audio
|
||||
'-i', str(metadata_file), # Input: FFMetadata file
|
||||
'-c:a', 'copy', # Copy audio without re-encoding
|
||||
'-threads', '0', # Use all threads
|
||||
'-map', '0', # Map all from first input
|
||||
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||
str(temp_output) # Output
|
||||
]
|
||||
|
||||
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
|
||||
|
||||
try:
|
||||
# Run chapter embedding silently (progress handled by worker thread)
|
||||
_subprocess.run(
|
||||
cmd2,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
stdin=_subprocess.DEVNULL,
|
||||
timeout=600,
|
||||
check=False
|
||||
)
|
||||
|
||||
# Replace original with temp if successful
|
||||
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||
try:
|
||||
import shutil
|
||||
if output.exists():
|
||||
output.unlink()
|
||||
shutil.move(str(temp_output), str(output))
|
||||
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Chapters embedded successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||
try:
|
||||
temp_output.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
logger.warning(f"[merge-file] Chapter embedding did not create output")
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
|
||||
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||
elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
|
||||
# MP4/M4A format has native chapter support via iTunes metadata atoms
|
||||
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
|
||||
|
||||
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
|
||||
|
||||
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
|
||||
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
|
||||
cmd2 = [
|
||||
ffmpeg_path, '-y',
|
||||
'-i', str(output), # Input: merged audio
|
||||
'-i', str(metadata_file), # Input: FFMetadata file
|
||||
'-c:a', 'copy', # Copy audio without re-encoding
|
||||
'-threads', '0', # Use all threads
|
||||
'-map', '0', # Map all from first input
|
||||
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
|
||||
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
||||
str(temp_output) # Output
|
||||
]
|
||||
|
||||
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
|
||||
|
||||
try:
|
||||
# Run MP4 chapter embedding silently (progress handled by worker thread)
|
||||
_subprocess.run(
|
||||
cmd2,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
stdin=_subprocess.DEVNULL,
|
||||
timeout=600,
|
||||
check=False
|
||||
)
|
||||
|
||||
# Replace original with temp if successful
|
||||
if temp_output.exists() and temp_output.stat().st_size > 0:
|
||||
try:
|
||||
import shutil
|
||||
if output.exists():
|
||||
output.unlink()
|
||||
shutil.move(str(temp_output), str(output))
|
||||
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
|
||||
logger.info(f"[merge-file] MP4 chapters embedded successfully")
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Could not replace file: {e}")
|
||||
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
||||
try:
|
||||
temp_output.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
|
||||
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||
else:
|
||||
# For other formats, chapters would require external tools
|
||||
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
|
||||
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
|
||||
|
||||
# Clean up temp files
|
||||
try:
|
||||
concat_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
metadata_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Audio merge error: {e}", file=sys.stderr)
|
||||
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
"""Merge video files."""
|
||||
ffmpeg_path = _shutil.which('ffmpeg')
|
||||
if not ffmpeg_path:
|
||||
log("ffmpeg not found in PATH", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create concat demuxer file
|
||||
concat_file = output.parent / f".concat_{output.stem}.txt"
|
||||
concat_lines = []
|
||||
for f in files:
|
||||
safe_path = str(f).replace("'", "'\\''")
|
||||
concat_lines.append(f"file '{safe_path}'")
|
||||
|
||||
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
||||
|
||||
# Build FFmpeg command for video merge
|
||||
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
||||
|
||||
# Video codec selection
|
||||
if output_format == 'mp4':
|
||||
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
|
||||
elif output_format == 'mkv':
|
||||
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
|
||||
else:
|
||||
cmd.extend(['-c', 'copy']) # Copy without re-encoding
|
||||
|
||||
cmd.append(str(output))
|
||||
|
||||
log(f"Merging {len(files)} video files...", file=sys.stderr)
|
||||
result = _subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
# Clean up concat file
|
||||
try:
|
||||
concat_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = (result.stderr or '').strip()
|
||||
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Video merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_text(files: List[Path], output: Path) -> bool:
|
||||
"""Merge text files."""
|
||||
try:
|
||||
with open(output, 'w', encoding='utf-8') as outf:
|
||||
for i, f in enumerate(files):
|
||||
if i > 0:
|
||||
outf.write('\n---\n') # Separator between files
|
||||
try:
|
||||
content = f.read_text(encoding='utf-8', errors='replace')
|
||||
outf.write(content)
|
||||
except Exception as e:
|
||||
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"Text merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||
"""Merge PDF files."""
|
||||
if not HAS_PYPDF2:
|
||||
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
if HAS_PYPDF2:
|
||||
writer = PdfWriter()
|
||||
else:
|
||||
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||
return False
|
||||
|
||||
for f in files:
|
||||
try:
|
||||
reader = PdfReader(f)
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
with open(output, 'wb') as outf:
|
||||
writer.write(outf)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f"PDF merge error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="merge-file",
|
||||
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
args=[
|
||||
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||
CmdletArg("-output", description="Override output file path."),
|
||||
CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||
],
|
||||
details=[
|
||||
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
|
||||
"- Audio files merge with minimal quality loss using specified codec.",
|
||||
"- Video files merge into MP4 or MKV containers.",
|
||||
"- PDF files merge into a single PDF document.",
|
||||
"- Text/document files are concatenated.",
|
||||
"- Output name derived from first file with ' (merged)' suffix.",
|
||||
"- -delete flag removes all source files after successful merge.",
|
||||
],
|
||||
)
|
||||
335
cmdlets/pipe.py
Normal file
335
cmdlets/pipe.py
Normal file
@@ -0,0 +1,335 @@
|
||||
from typing import Any, Dict, Sequence, List, Optional
|
||||
import sys
|
||||
import json
|
||||
import platform
|
||||
import socket
|
||||
import re
|
||||
import subprocess
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from helper.logger import log
|
||||
from result_table import ResultTable
|
||||
from .get_file import _get_fixed_ipc_pipe
|
||||
import pipeline as ctx
|
||||
|
||||
def _send_ipc_command(command: Dict[str, Any]) -> Optional[Any]:
|
||||
"""Send a command to the MPV IPC pipe and return the response."""
|
||||
ipc_pipe = _get_fixed_ipc_pipe()
|
||||
request = json.dumps(command) + "\n"
|
||||
|
||||
try:
|
||||
if platform.system() == 'Windows':
|
||||
# Windows named pipe
|
||||
# Opening in r+b mode to read response
|
||||
try:
|
||||
with open(ipc_pipe, 'r+b', buffering=0) as pipe:
|
||||
pipe.write(request.encode('utf-8'))
|
||||
pipe.flush()
|
||||
|
||||
# Read response
|
||||
# We'll try to read a line. This might block if MPV is unresponsive.
|
||||
response_line = pipe.readline()
|
||||
if response_line:
|
||||
return json.loads(response_line.decode('utf-8'))
|
||||
except FileNotFoundError:
|
||||
return None # MPV not running
|
||||
except Exception as e:
|
||||
log(f"Windows IPC Error: {e}", file=sys.stderr)
|
||||
return None
|
||||
else:
|
||||
# Unix socket
|
||||
af_unix = getattr(socket, 'AF_UNIX', None)
|
||||
if af_unix is None:
|
||||
log("Unix sockets not supported on this platform", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
sock = socket.socket(af_unix, socket.SOCK_STREAM)
|
||||
sock.settimeout(2.0)
|
||||
sock.connect(ipc_pipe)
|
||||
sock.sendall(request.encode('utf-8'))
|
||||
|
||||
# Read response
|
||||
response_data = b""
|
||||
while True:
|
||||
try:
|
||||
chunk = sock.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
response_data += chunk
|
||||
if b"\n" in chunk:
|
||||
break
|
||||
except socket.timeout:
|
||||
break
|
||||
|
||||
sock.close()
|
||||
|
||||
if response_data:
|
||||
# Parse lines, look for response to our request
|
||||
lines = response_data.decode('utf-8').strip().split('\n')
|
||||
for line in lines:
|
||||
try:
|
||||
resp = json.loads(line)
|
||||
# If it has 'error' field, it's a response
|
||||
if 'error' in resp:
|
||||
return resp
|
||||
except:
|
||||
pass
|
||||
except (FileNotFoundError, ConnectionRefusedError):
|
||||
return None # MPV not running
|
||||
except Exception as e:
|
||||
log(f"Unix IPC Error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
log(f"IPC Error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def _get_playlist() -> List[Dict[str, Any]]:
|
||||
"""Get the current playlist from MPV."""
|
||||
cmd = {"command": ["get_property", "playlist"], "request_id": 100}
|
||||
resp = _send_ipc_command(cmd)
|
||||
if resp and resp.get("error") == "success":
|
||||
return resp.get("data", [])
|
||||
return []
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Manage and play items in the MPV playlist via IPC."""
|
||||
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
# Handle positional index argument if provided
|
||||
index_arg = parsed.get("index")
|
||||
|
||||
clear_mode = parsed.get("clear")
|
||||
list_mode = parsed.get("list")
|
||||
|
||||
# Handle piped input (add to playlist)
|
||||
if result:
|
||||
# If result is a list of items, add them to playlist
|
||||
items_to_add = []
|
||||
if isinstance(result, list):
|
||||
items_to_add = result
|
||||
elif isinstance(result, dict):
|
||||
items_to_add = [result]
|
||||
|
||||
added_count = 0
|
||||
for i, item in enumerate(items_to_add):
|
||||
# Extract URL/Path
|
||||
target = None
|
||||
title = None
|
||||
|
||||
if isinstance(item, dict):
|
||||
target = item.get("target") or item.get("url") or item.get("path")
|
||||
title = item.get("title") or item.get("name")
|
||||
elif hasattr(item, "target"):
|
||||
target = item.target
|
||||
title = getattr(item, "title", None)
|
||||
elif isinstance(item, str):
|
||||
target = item
|
||||
|
||||
if target:
|
||||
# Add to MPV playlist
|
||||
# We use loadfile with append flag
|
||||
# Configure 1080p limit for streams (bestvideo<=1080p + bestaudio)
|
||||
options = {
|
||||
"ytdl-format": "bestvideo[height<=?1080]+bestaudio/best[height<=?1080]"
|
||||
}
|
||||
|
||||
if title:
|
||||
options["force-media-title"] = title
|
||||
|
||||
cmd = {"command": ["loadfile", target, "append", options], "request_id": 200}
|
||||
resp = _send_ipc_command(cmd)
|
||||
|
||||
if resp is None:
|
||||
# MPV not running (or died)
|
||||
# Start MPV with remaining items
|
||||
_start_mpv(items_to_add[i:])
|
||||
return 0
|
||||
elif resp.get("error") == "success":
|
||||
added_count += 1
|
||||
if title:
|
||||
log(f"Queued: {title}")
|
||||
else:
|
||||
log(f"Queued: {target}")
|
||||
|
||||
if added_count > 0:
|
||||
# If we added items, we might want to play the first one if nothing is playing?
|
||||
# For now, just list the playlist
|
||||
pass
|
||||
|
||||
# Get playlist from MPV
|
||||
items = _get_playlist()
|
||||
|
||||
if not items:
|
||||
log("MPV playlist is empty or MPV is not running.")
|
||||
return 0
|
||||
|
||||
# If index is provided, perform action (Play or Clear)
|
||||
if index_arg is not None:
|
||||
try:
|
||||
# Handle 1-based index
|
||||
idx = int(index_arg) - 1
|
||||
|
||||
if idx < 0 or idx >= len(items):
|
||||
log(f"Index {index_arg} out of range (1-{len(items)}).")
|
||||
return 1
|
||||
|
||||
item = items[idx]
|
||||
title = item.get("title") or item.get("filename") or "Unknown"
|
||||
|
||||
if clear_mode:
|
||||
# Remove item
|
||||
cmd = {"command": ["playlist-remove", idx], "request_id": 101}
|
||||
resp = _send_ipc_command(cmd)
|
||||
if resp and resp.get("error") == "success":
|
||||
log(f"Removed: {title}")
|
||||
# Refresh items for listing
|
||||
items = _get_playlist()
|
||||
list_mode = True
|
||||
index_arg = None
|
||||
else:
|
||||
log(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
|
||||
return 1
|
||||
else:
|
||||
# Play item
|
||||
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
|
||||
resp = _send_ipc_command(cmd)
|
||||
if resp and resp.get("error") == "success":
|
||||
log(f"Playing: {title}")
|
||||
return 0
|
||||
else:
|
||||
log(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
|
||||
return 1
|
||||
|
||||
except ValueError:
|
||||
log(f"Invalid index: {index_arg}")
|
||||
return 1
|
||||
|
||||
# List items (Default action or after clear)
|
||||
if list_mode or index_arg is None:
|
||||
if not items:
|
||||
log("MPV playlist is empty.")
|
||||
return 0
|
||||
|
||||
table = ResultTable("MPV Playlist")
|
||||
|
||||
for i, item in enumerate(items):
|
||||
is_current = item.get("current", False)
|
||||
title = item.get("title") or ""
|
||||
filename = item.get("filename") or ""
|
||||
|
||||
# Special handling for memory:// M3U playlists (used to pass titles via IPC)
|
||||
if "memory://" in filename and "#EXTINF:" in filename:
|
||||
try:
|
||||
# Extract title from #EXTINF:-1,Title
|
||||
# Use regex to find title between #EXTINF:-1, and newline
|
||||
match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename)
|
||||
if match:
|
||||
extracted_title = match.group(1).strip()
|
||||
if not title or title == "memory://":
|
||||
title = extracted_title
|
||||
|
||||
# Extract actual URL
|
||||
# Find the first line that looks like a URL and not a directive
|
||||
lines = filename.splitlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and not line.startswith('memory://'):
|
||||
filename = line
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Truncate if too long
|
||||
if len(title) > 57:
|
||||
title = title[:57] + "..."
|
||||
if len(filename) > 27:
|
||||
filename = filename[:27] + "..."
|
||||
|
||||
row = table.add_row()
|
||||
row.add_column("#", str(i + 1))
|
||||
row.add_column("Current", "*" if is_current else "")
|
||||
row.add_column("Title", title)
|
||||
row.add_column("Filename", filename)
|
||||
|
||||
table.set_row_selection_args(i, [str(i + 1)])
|
||||
|
||||
table.set_source_command(".pipe")
|
||||
|
||||
# Register results with pipeline context so @N selection works
|
||||
ctx.set_last_result_table_overlay(table, items)
|
||||
ctx.set_current_stage_table(table)
|
||||
|
||||
print(table)
|
||||
|
||||
return 0
|
||||
|
||||
def _start_mpv(items: List[Any]) -> None:
|
||||
"""Start MPV with a list of items."""
|
||||
ipc_pipe = _get_fixed_ipc_pipe()
|
||||
|
||||
cmd = ['mpv', f'--input-ipc-server={ipc_pipe}']
|
||||
cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]')
|
||||
|
||||
# Add items
|
||||
first_title_set = False
|
||||
|
||||
for item in items:
|
||||
target = None
|
||||
title = None
|
||||
|
||||
if isinstance(item, dict):
|
||||
target = item.get("target") or item.get("url") or item.get("path")
|
||||
title = item.get("title") or item.get("name")
|
||||
elif hasattr(item, "target"):
|
||||
target = item.target
|
||||
title = getattr(item, "title", None)
|
||||
elif isinstance(item, str):
|
||||
target = item
|
||||
|
||||
if target:
|
||||
if not first_title_set and title:
|
||||
cmd.append(f'--force-media-title={title}')
|
||||
first_title_set = True
|
||||
cmd.append(target)
|
||||
|
||||
if len(cmd) > 3: # mpv + ipc + format + at least one file
|
||||
try:
|
||||
kwargs = {}
|
||||
if platform.system() == 'Windows':
|
||||
kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS
|
||||
|
||||
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
|
||||
log(f"Started MPV with {len(cmd)-3} items")
|
||||
except Exception as e:
|
||||
log(f"Error starting MPV: {e}", file=sys.stderr)
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name=".pipe",
|
||||
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
||||
summary="Manage and play items in the MPV playlist via IPC",
|
||||
usage=".pipe [index] [-clear]",
|
||||
args=[
|
||||
CmdletArg(
|
||||
name="index",
|
||||
type="int",
|
||||
description="Index of item to play or clear",
|
||||
required=False
|
||||
),
|
||||
CmdletArg(
|
||||
name="clear",
|
||||
type="flag",
|
||||
description="Remove the selected item from the playlist"
|
||||
),
|
||||
CmdletArg(
|
||||
name="list",
|
||||
type="flag",
|
||||
description="List items (default)"
|
||||
),
|
||||
],
|
||||
exec=_run
|
||||
)
|
||||
|
||||
739
cmdlets/screen_shot.py
Normal file
739
cmdlets/screen_shot.py
Normal file
@@ -0,0 +1,739 @@
|
||||
"""Screen-shot cmdlet for capturing screenshots of URLs in a pipeline.
|
||||
|
||||
This cmdlet processes files through the pipeline and creates screenshots using
|
||||
Playwright, marking them as temporary artifacts for cleanup.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import importlib
|
||||
import sys
|
||||
import time
|
||||
import httpx
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from urllib.parse import urlsplit, quote, urljoin
|
||||
|
||||
from helper.logger import log
|
||||
from helper.http_client import HTTPClient
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input
|
||||
import models
|
||||
import pipeline as pipeline_context
|
||||
|
||||
# ============================================================================
|
||||
# CMDLET Metadata Declaration
|
||||
# ============================================================================
|
||||
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Playwright & Screenshot Dependencies
|
||||
# ============================================================================
|
||||
|
||||
try:
|
||||
from playwright.sync_api import (
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
ViewportSize,
|
||||
sync_playwright,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
"playwright is required for screenshot capture; install with 'pip install playwright'"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
from config import resolve_output_dir
|
||||
except ImportError:
|
||||
try:
|
||||
_parent_dir = str(Path(__file__).parent.parent)
|
||||
if _parent_dir not in sys.path:
|
||||
sys.path.insert(0, _parent_dir)
|
||||
from config import resolve_output_dir
|
||||
except ImportError:
|
||||
resolve_output_dir = None
|
||||
|
||||
# ============================================================================
|
||||
# Screenshot Constants & Configuration
|
||||
# ============================================================================
|
||||
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
|
||||
ARCHIVE_TIMEOUT = 30.0
|
||||
|
||||
|
||||
class ScreenshotError(RuntimeError):
|
||||
"""Raised when screenshot capture or upload fails."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ScreenshotOptions:
|
||||
"""Options controlling screenshot capture and post-processing."""
|
||||
|
||||
url: str
|
||||
output_dir: Path
|
||||
output_path: Optional[Path] = None
|
||||
full_page: bool = True
|
||||
headless: bool = True
|
||||
wait_after_load: float = 2.0
|
||||
wait_for_article: bool = False
|
||||
replace_video_posters: bool = True
|
||||
tags: Sequence[str] = ()
|
||||
archive: bool = False
|
||||
archive_timeout: float = ARCHIVE_TIMEOUT
|
||||
known_urls: Sequence[str] = ()
|
||||
output_format: Optional[str] = None
|
||||
prefer_platform_target: bool = False
|
||||
target_selectors: Optional[Sequence[str]] = None
|
||||
selector_timeout_ms: int = 10_000
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ScreenshotResult:
|
||||
"""Details about the captured screenshot."""
|
||||
|
||||
path: Path
|
||||
url: str
|
||||
tags_applied: List[str]
|
||||
archive_urls: List[str]
|
||||
known_urls: List[str]
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions
|
||||
# ============================================================================
|
||||
|
||||
def _ensure_directory(path: Path) -> None:
|
||||
"""Ensure directory exists."""
|
||||
if not isinstance(path, Path):
|
||||
path = Path(path)
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _unique_path(path: Path) -> Path:
|
||||
"""Get unique path by appending numbers if file exists."""
|
||||
if not path.exists():
|
||||
return path
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
counter = 1
|
||||
while True:
|
||||
new_path = parent / f"{stem}_{counter}{suffix}"
|
||||
if not new_path.exists():
|
||||
return new_path
|
||||
counter += 1
|
||||
|
||||
|
||||
def _unique_preserve_order(items: Sequence[str]) -> List[str]:
|
||||
"""Remove duplicates while preserving order."""
|
||||
seen = set()
|
||||
result = []
|
||||
for item in items:
|
||||
if item not in seen:
|
||||
seen.add(item)
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
|
||||
def _slugify_url(url: str) -> str:
|
||||
"""Convert URL to filesystem-safe slug."""
|
||||
parsed = urlsplit(url)
|
||||
candidate = f"{parsed.netloc}{parsed.path}"
|
||||
if parsed.query:
|
||||
candidate += f"?{parsed.query}"
|
||||
slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
|
||||
slug = slug.strip("-") or "screenshot"
|
||||
return slug[:100]
|
||||
|
||||
|
||||
def _normalise_format(fmt: Optional[str]) -> str:
|
||||
"""Normalize output format to valid values."""
|
||||
if not fmt:
|
||||
return "png"
|
||||
value = fmt.strip().lower()
|
||||
if value in {"jpg", "jpeg"}:
|
||||
return "jpeg"
|
||||
if value in {"png", "pdf"}:
|
||||
return value
|
||||
return "png"
|
||||
|
||||
|
||||
def _format_suffix(fmt: str) -> str:
|
||||
"""Get file suffix for format."""
|
||||
if fmt == "jpeg":
|
||||
return ".jpg"
|
||||
return f".{fmt}"
|
||||
|
||||
|
||||
def _selectors_for_url(url: str) -> List[str]:
|
||||
"""Return a list of likely content selectors for known platforms."""
|
||||
u = url.lower()
|
||||
sels: List[str] = []
|
||||
# Twitter/X
|
||||
if "twitter.com" in u or "x.com" in u:
|
||||
sels.extend([
|
||||
"article[role='article']",
|
||||
"div[data-testid='tweet']",
|
||||
"div[data-testid='cellInnerDiv'] article",
|
||||
])
|
||||
# Instagram
|
||||
if "instagram.com" in u:
|
||||
sels.extend([
|
||||
"article[role='presentation']",
|
||||
"article[role='article']",
|
||||
"div[role='dialog'] article",
|
||||
"section main article",
|
||||
])
|
||||
# Reddit
|
||||
if "reddit.com" in u:
|
||||
sels.extend([
|
||||
"shreddit-post",
|
||||
"div[data-testid='post-container']",
|
||||
"div[data-click-id='background']",
|
||||
"article",
|
||||
])
|
||||
# Rumble (video post)
|
||||
if "rumble.com" in u:
|
||||
sels.extend([
|
||||
"rumble-player, iframe.rumble",
|
||||
"div.video-item--main",
|
||||
"main article",
|
||||
])
|
||||
return sels or ["article"]
|
||||
|
||||
|
||||
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
|
||||
"""Best-effort page tweaks for popular platforms before capture."""
|
||||
u = url.lower()
|
||||
|
||||
def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
|
||||
clicks = 0
|
||||
for _ in range(max(1, passes)):
|
||||
for t in texts:
|
||||
try:
|
||||
page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
|
||||
clicks += 1
|
||||
except PlaywrightTimeoutError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
return clicks
|
||||
|
||||
# Dismiss common cookie/consent prompts
|
||||
_try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
|
||||
|
||||
# Platform-specific expansions
|
||||
if "reddit.com" in u:
|
||||
_try_click_texts(["see more", "read more", "show more", "more"])
|
||||
if ("twitter.com" in u) or ("x.com" in u):
|
||||
_try_click_texts(["show more", "more"])
|
||||
if "instagram.com" in u:
|
||||
_try_click_texts(["more", "see more"])
|
||||
if "tiktok.com" in u:
|
||||
_try_click_texts(["more", "see more"])
|
||||
if ("facebook.com" in u) or ("fb.watch" in u):
|
||||
_try_click_texts(["see more", "show more", "more"])
|
||||
if "rumble.com" in u:
|
||||
_try_click_texts(["accept", "agree", "close"])
|
||||
|
||||
|
||||
def _submit_wayback(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Internet Archive Wayback Machine."""
|
||||
encoded = quote(url, safe="/:?=&")
|
||||
with HTTPClient() as client:
|
||||
response = client.get(f"https://web.archive.org/save/{encoded}")
|
||||
response.raise_for_status()
|
||||
content_location = response.headers.get("Content-Location")
|
||||
if content_location:
|
||||
return urljoin("https://web.archive.org", content_location)
|
||||
return str(response.url)
|
||||
|
||||
|
||||
def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Archive.today."""
|
||||
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||
response = client.get(f"https://archive.today/submit/?url={encoded}")
|
||||
response.raise_for_status()
|
||||
final = str(response.url)
|
||||
if final and ("archive.today" in final or "archive.ph" in final):
|
||||
return final
|
||||
return None
|
||||
|
||||
|
||||
def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
|
||||
"""Submit URL to Archive.ph."""
|
||||
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
|
||||
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
|
||||
response = client.get(f"https://archive.ph/submit/?url={encoded}")
|
||||
response.raise_for_status()
|
||||
final = str(response.url)
|
||||
if final and "archive.ph" in final:
|
||||
return final
|
||||
return None
|
||||
|
||||
|
||||
def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
|
||||
"""Submit URL to all available archive services."""
|
||||
archives: List[str] = []
|
||||
warnings: List[str] = []
|
||||
for submitter, label in (
|
||||
(_submit_wayback, "wayback"),
|
||||
(_submit_archive_today, "archive.today"),
|
||||
(_submit_archive_ph, "archive.ph"),
|
||||
):
|
||||
try:
|
||||
log(f"Archiving to {label}...", flush=True)
|
||||
archived = submitter(url, timeout)
|
||||
except httpx.HTTPStatusError as exc:
|
||||
if exc.response.status_code == 429:
|
||||
warnings.append(f"archive {label} rate limited (HTTP 429)")
|
||||
log(f"{label}: Rate limited (HTTP 429)", flush=True)
|
||||
else:
|
||||
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
|
||||
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
|
||||
except httpx.RequestError as exc:
|
||||
warnings.append(f"archive {label} failed: {exc}")
|
||||
log(f"{label}: Connection error: {exc}", flush=True)
|
||||
except Exception as exc:
|
||||
warnings.append(f"archive {label} failed: {exc}")
|
||||
log(f"{label}: {exc}", flush=True)
|
||||
else:
|
||||
if archived:
|
||||
archives.append(archived)
|
||||
log(f"{label}: Success - {archived}", flush=True)
|
||||
else:
|
||||
log(f"{label}: No archive link returned", flush=True)
|
||||
return archives, warnings
|
||||
|
||||
|
||||
def _prepare_output_path(options: ScreenshotOptions) -> Path:
|
||||
"""Prepare and validate output path for screenshot."""
|
||||
_ensure_directory(options.output_dir)
|
||||
explicit_format = _normalise_format(options.output_format) if options.output_format else None
|
||||
inferred_format: Optional[str] = None
|
||||
if options.output_path is not None:
|
||||
path = options.output_path
|
||||
if not path.is_absolute():
|
||||
path = options.output_dir / path
|
||||
suffix = path.suffix.lower()
|
||||
if suffix:
|
||||
inferred_format = _normalise_format(suffix[1:])
|
||||
else:
|
||||
stamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{_slugify_url(options.url)}_{stamp}"
|
||||
path = options.output_dir / filename
|
||||
final_format = explicit_format or inferred_format or "png"
|
||||
if not path.suffix:
|
||||
path = path.with_suffix(_format_suffix(final_format))
|
||||
else:
|
||||
current_suffix = path.suffix.lower()
|
||||
expected = _format_suffix(final_format)
|
||||
if current_suffix != expected:
|
||||
path = path.with_suffix(expected)
|
||||
options.output_format = final_format
|
||||
return _unique_path(path)
|
||||
|
||||
|
||||
def _capture_with_playwright(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
|
||||
"""Capture screenshot using Playwright."""
|
||||
playwright = None
|
||||
browser = None
|
||||
context = None
|
||||
try:
|
||||
log("Starting Playwright...", flush=True)
|
||||
playwright = sync_playwright().start()
|
||||
log("Launching Chromium browser...", flush=True)
|
||||
format_name = _normalise_format(options.output_format)
|
||||
headless = options.headless or format_name == "pdf"
|
||||
if format_name == "pdf" and not options.headless:
|
||||
warnings.append("pdf output requires headless Chromium; overriding headless mode")
|
||||
browser = playwright.chromium.launch(
|
||||
headless=headless,
|
||||
args=["--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
log("Creating browser context...", flush=True)
|
||||
context = browser.new_context(
|
||||
user_agent=USER_AGENT,
|
||||
viewport=DEFAULT_VIEWPORT,
|
||||
ignore_https_errors=True,
|
||||
)
|
||||
page = context.new_page()
|
||||
log(f"Navigating to {options.url}...", flush=True)
|
||||
try:
|
||||
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
|
||||
log("Page loaded successfully", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("navigation timeout; capturing current page state")
|
||||
log("Navigation timeout; proceeding with current state", flush=True)
|
||||
|
||||
# Skip article lookup by default (wait_for_article defaults to False)
|
||||
if options.wait_for_article:
|
||||
try:
|
||||
log("Waiting for article element...", flush=True)
|
||||
page.wait_for_selector("article", timeout=10_000)
|
||||
log("Article element found", flush=True)
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("<article> selector not found; capturing fallback")
|
||||
log("Article element not found; using fallback", flush=True)
|
||||
|
||||
if options.wait_after_load > 0:
|
||||
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
if options.replace_video_posters:
|
||||
log("Replacing video elements with posters...", flush=True)
|
||||
page.evaluate(
|
||||
"""
|
||||
document.querySelectorAll('video').forEach(v => {
|
||||
if (v.poster) {
|
||||
const img = document.createElement('img');
|
||||
img.src = v.poster;
|
||||
img.style.maxWidth = '100%';
|
||||
img.style.borderRadius = '12px';
|
||||
v.replaceWith(img);
|
||||
}
|
||||
});
|
||||
"""
|
||||
)
|
||||
# Attempt platform-specific target capture if requested (and not PDF)
|
||||
element_captured = False
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
log("Attempting platform-specific content capture...", flush=True)
|
||||
try:
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
except Exception:
|
||||
pass
|
||||
selectors = list(options.target_selectors or [])
|
||||
if not selectors:
|
||||
selectors = _selectors_for_url(options.url)
|
||||
for sel in selectors:
|
||||
try:
|
||||
log(f"Trying selector: {sel}", flush=True)
|
||||
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
|
||||
except PlaywrightTimeoutError:
|
||||
log(f"Selector not found: {sel}", flush=True)
|
||||
continue
|
||||
try:
|
||||
if el is not None:
|
||||
log(f"Found element with selector: {sel}", flush=True)
|
||||
try:
|
||||
el.scroll_into_view_if_needed(timeout=1000)
|
||||
except Exception:
|
||||
pass
|
||||
log(f"Capturing element to {destination}...", flush=True)
|
||||
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||
element_captured = True
|
||||
log("Element captured successfully", flush=True)
|
||||
break
|
||||
except Exception as exc:
|
||||
warnings.append(f"element capture failed for '{sel}': {exc}")
|
||||
log(f"Failed to capture element: {exc}", flush=True)
|
||||
# Fallback to default capture paths
|
||||
if element_captured:
|
||||
pass
|
||||
elif format_name == "pdf":
|
||||
log("Generating PDF...", flush=True)
|
||||
page.emulate_media(media="print")
|
||||
page.pdf(path=str(destination), print_background=True)
|
||||
log(f"PDF saved to {destination}", flush=True)
|
||||
else:
|
||||
log(f"Capturing full page to {destination}...", flush=True)
|
||||
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||
if format_name == "jpeg":
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
if options.full_page:
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
else:
|
||||
article = page.query_selector("article")
|
||||
if article is not None:
|
||||
article_kwargs = dict(screenshot_kwargs)
|
||||
article_kwargs.pop("full_page", None)
|
||||
article.screenshot(**article_kwargs)
|
||||
else:
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
log(f"Screenshot saved to {destination}", flush=True)
|
||||
except Exception as exc:
|
||||
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
|
||||
finally:
|
||||
log("Cleaning up browser resources...", flush=True)
|
||||
with contextlib.suppress(Exception):
|
||||
if context is not None:
|
||||
context.close()
|
||||
with contextlib.suppress(Exception):
|
||||
if browser is not None:
|
||||
browser.close()
|
||||
with contextlib.suppress(Exception):
|
||||
if playwright is not None:
|
||||
playwright.stop()
|
||||
log("Cleanup complete", flush=True)
|
||||
|
||||
|
||||
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
"""Capture a screenshot for the given options."""
|
||||
destination = _prepare_output_path(options)
|
||||
warnings: List[str] = []
|
||||
_capture_with_playwright(options, destination, warnings)
|
||||
|
||||
known_urls = _unique_preserve_order([options.url, *options.known_urls])
|
||||
archive_urls: List[str] = []
|
||||
if options.archive:
|
||||
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||
archive_urls.extend(archives)
|
||||
warnings.extend(archive_warnings)
|
||||
if archives:
|
||||
known_urls = _unique_preserve_order([*known_urls, *archives])
|
||||
|
||||
applied_tags = _unique_preserve_order(list(tag for tag in options.tags if tag.strip()))
|
||||
|
||||
return ScreenshotResult(
|
||||
path=destination,
|
||||
url=options.url,
|
||||
tags_applied=applied_tags,
|
||||
archive_urls=archive_urls,
|
||||
known_urls=known_urls,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Main Cmdlet Function
|
||||
# ============================================================================
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Take screenshots of URLs in the pipeline.
|
||||
|
||||
Accepts:
|
||||
- Single result object (dict or PipeObject) with 'file_path' field
|
||||
- List of result objects to screenshot each
|
||||
- Direct URL as string
|
||||
|
||||
Emits PipeObject-formatted results for each screenshot with:
|
||||
- action: 'cmdlet:screen-shot'
|
||||
- is_temp: True (screenshots are temporary artifacts)
|
||||
- parent_id: hash of the original file/URL
|
||||
|
||||
Screenshots are created using Playwright and marked as temporary
|
||||
so they can be cleaned up later with the cleanup cmdlet.
|
||||
"""
|
||||
from ._shared import parse_cmdlet_args
|
||||
|
||||
# Help check
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ========================================================================
|
||||
# ARGUMENT PARSING
|
||||
# ========================================================================
|
||||
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
format_value = parsed.get("format")
|
||||
storage_value = parsed.get("storage")
|
||||
selector_arg = parsed.get("selector")
|
||||
selectors = [selector_arg] if selector_arg else []
|
||||
archive_enabled = parsed.get("archive", False)
|
||||
|
||||
# Positional URL argument (if provided)
|
||||
url_arg = parsed.get("url")
|
||||
positional_urls = [str(url_arg)] if url_arg else []
|
||||
|
||||
# ========================================================================
|
||||
# INPUT PROCESSING - Extract URLs from pipeline or command arguments
|
||||
# ========================================================================
|
||||
|
||||
piped_results = normalize_result_input(result)
|
||||
urls_to_process = []
|
||||
|
||||
# Extract URLs from piped results
|
||||
if piped_results:
|
||||
for item in piped_results:
|
||||
url = None
|
||||
if isinstance(item, dict):
|
||||
url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target')
|
||||
else:
|
||||
url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None)
|
||||
|
||||
if url:
|
||||
urls_to_process.append(str(url))
|
||||
|
||||
# Use positional arguments if no pipeline input
|
||||
if not urls_to_process and positional_urls:
|
||||
urls_to_process = positional_urls
|
||||
|
||||
if not urls_to_process:
|
||||
log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# ========================================================================
|
||||
# OUTPUT DIRECTORY RESOLUTION - Priority chain
|
||||
# ========================================================================
|
||||
|
||||
screenshot_dir: Optional[Path] = None
|
||||
|
||||
# Primary: Use --storage if provided (highest priority)
|
||||
if storage_value:
|
||||
try:
|
||||
screenshot_dir = SharedArgs.resolve_storage(storage_value)
|
||||
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
|
||||
except ValueError as e:
|
||||
log(str(e), file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Secondary: Use config-based resolver ONLY if --storage not provided
|
||||
if screenshot_dir is None and resolve_output_dir is not None:
|
||||
try:
|
||||
screenshot_dir = resolve_output_dir(config)
|
||||
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
|
||||
if screenshot_dir is None and config and config.get("outfile"):
|
||||
try:
|
||||
screenshot_dir = Path(config["outfile"]).expanduser()
|
||||
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default: User's Videos directory
|
||||
if screenshot_dir is None:
|
||||
screenshot_dir = Path.home() / "Videos"
|
||||
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
|
||||
|
||||
_ensure_directory(screenshot_dir)
|
||||
|
||||
# ========================================================================
|
||||
# PREPARE SCREENSHOT OPTIONS
|
||||
# ========================================================================
|
||||
|
||||
format_name = _normalise_format(format_value)
|
||||
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
|
||||
target_selectors = filtered_selectors if filtered_selectors else None
|
||||
|
||||
all_emitted = []
|
||||
exit_code = 0
|
||||
# ========================================================================
|
||||
# PROCESS URLs AND CAPTURE SCREENSHOTS
|
||||
# ========================================================================
|
||||
|
||||
for url in urls_to_process:
|
||||
# Validate URL format
|
||||
if not url.lower().startswith(("http://", "https://", "file://")):
|
||||
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
try:
|
||||
# Create screenshot with provided options
|
||||
options = ScreenshotOptions(
|
||||
url=url,
|
||||
output_dir=screenshot_dir,
|
||||
output_format=format_name,
|
||||
archive=archive_enabled,
|
||||
target_selectors=target_selectors,
|
||||
prefer_platform_target=False,
|
||||
wait_for_article=False,
|
||||
full_page=True,
|
||||
)
|
||||
|
||||
screenshot_result = _capture_screenshot(options)
|
||||
|
||||
# Log results and warnings
|
||||
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
|
||||
if screenshot_result.archive_urls:
|
||||
log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True)
|
||||
for warning in screenshot_result.warnings:
|
||||
log(f"Warning: {warning}", flush=True)
|
||||
|
||||
# Compute hash of screenshot file
|
||||
screenshot_hash = None
|
||||
try:
|
||||
with open(screenshot_result.path, 'rb') as f:
|
||||
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create PipeObject result - marked as TEMP since derivative artifact
|
||||
pipe_obj = create_pipe_object_result(
|
||||
source='screenshot',
|
||||
identifier=Path(screenshot_result.path).stem,
|
||||
file_path=str(screenshot_result.path),
|
||||
cmdlet_name='screen-shot',
|
||||
title=f"Screenshot: {Path(screenshot_result.path).name}",
|
||||
file_hash=screenshot_hash,
|
||||
is_temp=True,
|
||||
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
|
||||
extra={
|
||||
'source_url': url,
|
||||
'archive_urls': screenshot_result.archive_urls,
|
||||
'known_urls': screenshot_result.known_urls,
|
||||
'target': str(screenshot_result.path), # Explicit target for add-file
|
||||
}
|
||||
)
|
||||
|
||||
# Emit the result so downstream cmdlets (like add-file) can use it
|
||||
pipeline_context.emit(pipe_obj)
|
||||
all_emitted.append(pipe_obj)
|
||||
|
||||
except ScreenshotError as exc:
|
||||
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
exit_code = 1
|
||||
except Exception as exc:
|
||||
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
exit_code = 1
|
||||
|
||||
if not all_emitted:
|
||||
log(f"No screenshots were successfully captured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Log completion message
|
||||
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
|
||||
|
||||
return exit_code
|
||||
CMDLET = Cmdlet(
|
||||
name="screen-shot",
|
||||
summary="Capture a screenshot of a URL or file and mark as temporary artifact",
|
||||
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
|
||||
aliases=["screenshot", "ss"],
|
||||
args=[
|
||||
CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"),
|
||||
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
|
||||
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
|
||||
SharedArgs.ARCHIVE, # Use shared archive argument
|
||||
SharedArgs.STORAGE, # Use shared storage argument
|
||||
],
|
||||
details=[
|
||||
"Take screenshots of URLs with optional archiving and element targeting.",
|
||||
"Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.",
|
||||
"",
|
||||
"Arguments:",
|
||||
" url URL to capture (optional if piped from pipeline)",
|
||||
" --format FORMAT Output format: png (default), jpeg, or pdf",
|
||||
" --selector SEL CSS selector for capturing specific element",
|
||||
" --archive, -arch Archive URL to Wayback/Archive.today/Archive.ph",
|
||||
" --storage LOCATION Storage destination: hydrus, local, 0x0, debrid, or ftp",
|
||||
"",
|
||||
"Examples:",
|
||||
" download-data https://example.com | screen-shot --storage local",
|
||||
" download-data https://twitter.com/user/status/123 | screen-shot --selector 'article[role=article]' --storage hydrus --archive",
|
||||
" screen-shot https://example.com --format jpeg --storage 0x0 --archive",
|
||||
]
|
||||
)
|
||||
351
cmdlets/search_file.py
Normal file
351
cmdlets/search_file.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
|
||||
from fnmatch import fnmatchcase
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from helper.file_storage import FileStorage
|
||||
from helper.search_provider import get_provider, list_providers, SearchResult
|
||||
from metadata import import_pending_sidecars
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg
|
||||
import models
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusClient = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Data Classes (from helper/search.py)
|
||||
# ============================================================================
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tags: str | None = None
|
||||
hash_hex: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tags:
|
||||
payload["tags"] = self.tags
|
||||
if self.hash_hex:
|
||||
payload["hash"] = self.hash_hex
|
||||
return payload
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResultItem:
|
||||
origin: str
|
||||
title: str
|
||||
detail: str
|
||||
annotations: List[str]
|
||||
target: str
|
||||
media_kind: str = "other"
|
||||
hash_hex: Optional[str] = None
|
||||
columns: List[tuple[str, str]] = field(default_factory=list)
|
||||
tag_summary: Optional[str] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
size_bytes: Optional[int] = None
|
||||
full_metadata: Optional[Dict[str, Any]] = None
|
||||
tags: Optional[set[str]] = field(default_factory=set)
|
||||
relationships: Optional[List[str]] = field(default_factory=list)
|
||||
known_urls: Optional[List[str]] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"title": self.title,
|
||||
}
|
||||
|
||||
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
|
||||
payload["origin"] = self.origin
|
||||
payload["target"] = self.target
|
||||
payload["media_kind"] = self.media_kind
|
||||
|
||||
# Always include full_metadata if present (needed by download-data, etc)
|
||||
# This is NOT for display, but for downstream processing
|
||||
if self.full_metadata:
|
||||
payload["full_metadata"] = self.full_metadata
|
||||
|
||||
# Include columns if defined (result renderer will use these for display)
|
||||
if self.columns:
|
||||
payload["columns"] = list(self.columns)
|
||||
else:
|
||||
# If no columns, include the detail for backwards compatibility
|
||||
payload["detail"] = self.detail
|
||||
payload["annotations"] = list(self.annotations)
|
||||
|
||||
# Include optional fields
|
||||
if self.hash_hex:
|
||||
payload["hash"] = self.hash_hex
|
||||
if self.tag_summary:
|
||||
payload["tags"] = self.tag_summary
|
||||
if self.tags:
|
||||
payload["tags_set"] = list(self.tags)
|
||||
if self.relationships:
|
||||
payload["relationships"] = self.relationships
|
||||
if self.known_urls:
|
||||
payload["known_urls"] = self.known_urls
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
|
||||
|
||||
|
||||
def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
|
||||
origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
|
||||
if origin_value not in STORAGE_ORIGINS:
|
||||
return payload
|
||||
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
store_label = payload.get("origin") or payload.get("source") or origin_value
|
||||
normalized = dict(payload)
|
||||
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
|
||||
return normalized
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="search-file",
|
||||
summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
|
||||
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
|
||||
args=[
|
||||
CmdletArg("query", description="Search query string"),
|
||||
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||
CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
|
||||
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
|
||||
],
|
||||
details=[
|
||||
"Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
|
||||
"Use -provider to search a specific source, or -storage to search file backends",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results can be piped to other commands",
|
||||
"Examples:",
|
||||
"search-file foo # Search all file backends",
|
||||
"search-file -provider libgen 'python programming' # Search LibGen books",
|
||||
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
|
||||
"search-file 'music' -provider soulseek # Search Soulseek P2P",
|
||||
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
|
||||
"search-file song -storage hydrus -type audio # Search only Hydrus audio",
|
||||
"search-file movie -tag action -provider debrid # Debrid with filters",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["search-file", "search"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
provider_name: Optional[str] = None
|
||||
limit = 100
|
||||
|
||||
# Simple argument parsing
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
|
||||
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
|
||||
provider_name = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
|
||||
storage_backend = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = int(args_list[i + 1])
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not query and not arg.startswith("-"):
|
||||
query = arg
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Initialize worker for this search command
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from config import get_local_storage_path
|
||||
import uuid
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
db = LocalLibraryDB(library_root)
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search",
|
||||
title=f"Search: {query}",
|
||||
description=f"Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
try:
|
||||
results_list = []
|
||||
|
||||
# Try to search using provider (libgen, soulseek, debrid, openlibrary)
|
||||
if provider_name:
|
||||
debug(f"[search_file] Attempting provider search with: {provider_name}")
|
||||
provider = get_provider(provider_name, config)
|
||||
if not provider:
|
||||
log(f"Provider '{provider_name}' not available", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
|
||||
debug(f"[search_file] Provider loaded, calling search with query: {query}")
|
||||
search_result = provider.search(query, limit=limit)
|
||||
debug(f"[search_file] Provider search returned {len(search_result)} results")
|
||||
|
||||
for item in search_result:
|
||||
item_dict = item.to_dict()
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
debug(f"[search_file] Emitted {len(results_list)} results")
|
||||
|
||||
# Write results to worker stdout
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
|
||||
from helper.file_storage import FileStorage
|
||||
storage = FileStorage(config=config or {})
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if backend_to_search:
|
||||
# Check if requested backend is available
|
||||
if backend_to_search == "hydrus":
|
||||
from helper.hydrus import is_hydrus_available
|
||||
if not is_hydrus_available(config or {}):
|
||||
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
if not storage.supports_search(backend_to_search):
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
results = storage[backend_to_search].search(query, limit=limit)
|
||||
else:
|
||||
# Search all searchable backends, but skip hydrus if unavailable
|
||||
from helper.hydrus import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
|
||||
all_results = []
|
||||
for backend_name in storage.list_searchable_backends():
|
||||
# Skip hydrus if not available
|
||||
if backend_name == "hydrus" and not hydrus_available:
|
||||
continue
|
||||
try:
|
||||
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
break
|
||||
except Exception as exc:
|
||||
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||
results = all_results[:limit]
|
||||
|
||||
# Emit results and collect for workers table
|
||||
if results:
|
||||
for item in results:
|
||||
if isinstance(item, dict):
|
||||
normalized = _ensure_storage_columns(item)
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
elif isinstance(item, ResultItem):
|
||||
item_dict = item.to_dict()
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
else:
|
||||
item_dict = {"title": str(item)}
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
# Write results to worker stdout
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Search failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
|
||||
finally:
|
||||
# Always close the database connection
|
||||
try:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
325
cmdlets/worker.py
Normal file
325
cmdlets/worker.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""Worker cmdlet: Display workers table in ResultTable format."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg
|
||||
import pipeline as ctx
|
||||
from helper.logger import log
|
||||
from config import get_local_storage_path
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name=".worker",
|
||||
summary="Display workers table in result table format.",
|
||||
usage=".worker [status] [-limit N] [@N]",
|
||||
args=[
|
||||
CmdletArg("status", description="Filter by status: running, completed, error (default: all)"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||
CmdletArg("@N", description="Select worker by index (1-based) and display full logs"),
|
||||
],
|
||||
details=[
|
||||
"- Shows all background worker tasks and their output",
|
||||
"- Can filter by status: running, completed, error",
|
||||
"- Search result stdout is captured from each worker",
|
||||
"- Use @N to select a specific worker by index and display its full logs",
|
||||
"Examples:",
|
||||
".worker # Show all workers",
|
||||
".worker running # Show running workers only",
|
||||
".worker completed -limit 50 # Show 50 most recent completed workers",
|
||||
".worker @3 # Show full logs for the 3rd worker",
|
||||
".worker running @2 # Show full logs for the 2nd running worker",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register([".worker", "worker", "workers"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Display workers table or show detailed logs for a specific worker."""
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
selection_indices = ctx.get_last_selection()
|
||||
selection_requested = bool(selection_indices) and isinstance(result, list) and len(result) > 0
|
||||
|
||||
# Parse arguments for list view
|
||||
status_filter: str | None = None
|
||||
limit = 100
|
||||
clear_requested = False
|
||||
worker_id_arg: str | None = None
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
if low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = max(1, int(args_list[i + 1]))
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-id", "--id"} and i + 1 < len(args_list):
|
||||
worker_id_arg = args_list[i + 1]
|
||||
i += 2
|
||||
elif low in {"-clear", "--clear"}:
|
||||
clear_requested = True
|
||||
i += 1
|
||||
elif low in {"running", "completed", "error", "cancelled"}:
|
||||
status_filter = low
|
||||
i += 1
|
||||
elif not arg.startswith("-"):
|
||||
status_filter = low
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
try:
|
||||
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
db: LocalLibraryDB | None = None
|
||||
try:
|
||||
db = LocalLibraryDB(library_root)
|
||||
if clear_requested:
|
||||
count = db.clear_finished_workers()
|
||||
log(f"Cleared {count} finished workers.")
|
||||
return 0
|
||||
|
||||
if worker_id_arg:
|
||||
worker = db.get_worker(worker_id_arg)
|
||||
if worker:
|
||||
events = []
|
||||
try:
|
||||
wid = worker.get("worker_id")
|
||||
if wid and hasattr(db, "get_worker_events"):
|
||||
events = db.get_worker_events(wid)
|
||||
except Exception:
|
||||
pass
|
||||
_emit_worker_detail(worker, events)
|
||||
return 0
|
||||
else:
|
||||
log(f"Worker not found: {worker_id_arg}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if selection_requested:
|
||||
return _render_worker_selection(db, result)
|
||||
return _render_worker_list(db, status_filter, limit)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
except Exception as exc:
|
||||
log(f"Workers query failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
|
||||
workers = db.get_all_workers(limit=limit)
|
||||
if status_filter:
|
||||
workers = [w for w in workers if str(w.get("status", "")).lower() == status_filter]
|
||||
|
||||
if not workers:
|
||||
log("No workers found", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
for worker in workers:
|
||||
started = worker.get("started_at", "")
|
||||
ended = worker.get("completed_at", worker.get("last_updated", ""))
|
||||
|
||||
date_str = _extract_date(started)
|
||||
start_time = _format_event_timestamp(started)
|
||||
end_time = _format_event_timestamp(ended)
|
||||
|
||||
item = {
|
||||
"columns": [
|
||||
("Status", worker.get("status", "")),
|
||||
("Pipe", _summarize_pipe(worker.get("pipe"))),
|
||||
("Date", date_str),
|
||||
("Start Time", start_time),
|
||||
("End Time", end_time),
|
||||
],
|
||||
"__worker_metadata": worker,
|
||||
"_selection_args": ["-id", worker.get("worker_id")]
|
||||
}
|
||||
ctx.emit(item)
|
||||
return 0
|
||||
|
||||
|
||||
def _render_worker_selection(db, selected_items: Any) -> int:
|
||||
if not isinstance(selected_items, list):
|
||||
log("Selection payload missing", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
emitted = False
|
||||
for item in selected_items:
|
||||
worker = _resolve_worker_record(db, item)
|
||||
if not worker:
|
||||
continue
|
||||
events = []
|
||||
try:
|
||||
events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
|
||||
except Exception:
|
||||
events = []
|
||||
_emit_worker_detail(worker, events)
|
||||
emitted = True
|
||||
if not emitted:
|
||||
log("Selected rows no longer exist", file=sys.stderr)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def _resolve_worker_record(db, payload: Any) -> Dict[str, Any] | None:
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
worker_data = payload.get("__worker_metadata")
|
||||
worker_id = None
|
||||
if isinstance(worker_data, dict):
|
||||
worker_id = worker_data.get("worker_id")
|
||||
else:
|
||||
worker_id = payload.get("worker_id")
|
||||
worker_data = None
|
||||
if worker_id:
|
||||
fresh = db.get_worker(worker_id)
|
||||
if fresh:
|
||||
return fresh
|
||||
return worker_data if isinstance(worker_data, dict) else None
|
||||
|
||||
|
||||
def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
|
||||
# Parse stdout logs into rows
|
||||
stdout_content = worker.get("stdout", "") or ""
|
||||
|
||||
# Try to parse lines if they follow the standard log format
|
||||
# Format: YYYY-MM-DD HH:MM:SS - name - level - message
|
||||
lines = stdout_content.splitlines()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Default values
|
||||
timestamp = ""
|
||||
level = "INFO"
|
||||
message = line
|
||||
|
||||
# Try to parse standard format
|
||||
try:
|
||||
parts = line.split(" - ", 3)
|
||||
if len(parts) >= 4:
|
||||
# Full format
|
||||
ts_str, _, lvl, msg = parts
|
||||
timestamp = _format_event_timestamp(ts_str)
|
||||
level = lvl
|
||||
message = msg
|
||||
elif len(parts) == 3:
|
||||
# Missing name or level
|
||||
ts_str, lvl, msg = parts
|
||||
timestamp = _format_event_timestamp(ts_str)
|
||||
level = lvl
|
||||
message = msg
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
item = {
|
||||
"columns": [
|
||||
("Time", timestamp),
|
||||
("Level", level),
|
||||
("Message", message)
|
||||
]
|
||||
}
|
||||
ctx.emit(item)
|
||||
|
||||
# Also emit events if available and not redundant
|
||||
# (For now, just focusing on stdout logs as requested)
|
||||
|
||||
|
||||
def _summarize_pipe(pipe_value: Any, limit: int = 60) -> str:
|
||||
text = str(pipe_value or "").strip()
|
||||
if not text:
|
||||
return "(none)"
|
||||
return text if len(text) <= limit else text[: limit - 3] + "..."
|
||||
|
||||
|
||||
def _format_event_timestamp(raw_timestamp: Any) -> str:
|
||||
dt = _parse_to_local(raw_timestamp)
|
||||
if dt:
|
||||
return dt.strftime("%H:%M:%S")
|
||||
|
||||
if not raw_timestamp:
|
||||
return "--:--:--"
|
||||
text = str(raw_timestamp)
|
||||
if "T" in text:
|
||||
time_part = text.split("T", 1)[1]
|
||||
elif " " in text:
|
||||
time_part = text.split(" ", 1)[1]
|
||||
else:
|
||||
time_part = text
|
||||
return time_part[:8] if len(time_part) >= 8 else time_part
|
||||
|
||||
|
||||
def _parse_to_local(timestamp_str: Any) -> datetime | None:
|
||||
if not timestamp_str:
|
||||
return None
|
||||
text = str(timestamp_str).strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Check for T separator (Python isoformat - Local time)
|
||||
if 'T' in text:
|
||||
return datetime.fromisoformat(text)
|
||||
|
||||
# Check for space separator (SQLite CURRENT_TIMESTAMP - UTC)
|
||||
# Format: YYYY-MM-DD HH:MM:SS
|
||||
if ' ' in text:
|
||||
# Assume UTC
|
||||
dt = datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt.astimezone() # Convert to local
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extract_date(raw_timestamp: Any) -> str:
|
||||
dt = _parse_to_local(raw_timestamp)
|
||||
if dt:
|
||||
return dt.strftime("%m-%d-%y")
|
||||
|
||||
# Fallback
|
||||
if not raw_timestamp:
|
||||
return ""
|
||||
text = str(raw_timestamp)
|
||||
# Extract YYYY-MM-DD part
|
||||
date_part = ""
|
||||
if "T" in text:
|
||||
date_part = text.split("T", 1)[0]
|
||||
elif " " in text:
|
||||
date_part = text.split(" ", 1)[0]
|
||||
else:
|
||||
date_part = text
|
||||
|
||||
# Convert YYYY-MM-DD to MM-DD-YY
|
||||
try:
|
||||
parts = date_part.split("-")
|
||||
if len(parts) == 3:
|
||||
year, month, day = parts
|
||||
return f"{month}-{day}-{year[2:]}"
|
||||
except Exception:
|
||||
pass
|
||||
return date_part
|
||||
Reference in New Issue
Block a user