This commit is contained in:
nose
2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions

139
cmdlets/__init__.py Normal file
View File

@@ -0,0 +1,139 @@
from __future__ import annotations
from typing import Any, Callable, Dict, Iterable, Sequence
from importlib import import_module as _import_module
# A cmdlet is a callable taking (result, args, config) -> int
Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
# Registry of command-name -> cmdlet function
REGISTRY: Dict[str, Cmdlet] = {}
def register(names: Iterable[str]):
"""Decorator to register a function under one or more command names.
Usage:
@register(["add-tag", "add-tags"])
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
for name in names:
REGISTRY[name.replace('_', '-').lower()] = fn
return fn
return _wrap
class AutoRegister:
"""Decorator that automatically registers a cmdlet function using CMDLET.aliases.
Usage:
CMDLET = Cmdlet(
name="delete-file",
aliases=["del", "del-file"],
...
)
@AutoRegister(CMDLET)
def _run(result, args, config) -> int:
...
Registers the cmdlet under:
- Its main name from CMDLET.name
- All aliases from CMDLET.aliases
This allows the help display to show: "cmd: delete-file | alias: del, del-file"
"""
def __init__(self, cmdlet):
self.cmdlet = cmdlet
def __call__(self, fn: Cmdlet) -> Cmdlet:
"""Register fn for the main name and all aliases in cmdlet."""
normalized_name = None
# Register for main name first
if hasattr(self.cmdlet, 'name') and self.cmdlet.name:
normalized_name = self.cmdlet.name.replace('_', '-').lower()
REGISTRY[normalized_name] = fn
# Register for all aliases
if hasattr(self.cmdlet, 'aliases') and self.cmdlet.aliases:
for alias in self.cmdlet.aliases:
normalized_alias = alias.replace('_', '-').lower()
# Always register (aliases are separate from main name)
REGISTRY[normalized_alias] = fn
return fn
def get(cmd_name: str) -> Cmdlet | None:
return REGISTRY.get(cmd_name.replace('_', '-').lower())
def format_cmd_help(cmdlet) -> str:
"""Format a cmdlet for help display showing cmd:name and aliases.
Example output: "delete-file | aliases: del, del-file"
"""
if not hasattr(cmdlet, 'name'):
return str(cmdlet)
cmd_str = f"cmd: {cmdlet.name}"
if hasattr(cmdlet, 'aliases') and cmdlet.aliases:
aliases_str = ", ".join(cmdlet.aliases)
cmd_str += f" | aliases: {aliases_str}"
return cmd_str
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
):
mod_name = filename[:-3]
try:
module = _import_module(f".{mod_name}", __name__)
# Auto-register based on CMDLET object with exec function
# This allows cmdlets to be fully self-contained in the CMDLET object
if hasattr(module, 'CMDLET'):
cmdlet_obj = module.CMDLET
# Get the execution function from the CMDLET object
run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
if callable(run_fn):
# Register main name
if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
normalized_name = cmdlet_obj.name.replace('_', '-').lower()
REGISTRY[normalized_name] = run_fn
# Register all aliases
if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
for alias in cmdlet_obj.aliases:
normalized_alias = alias.replace('_', '-').lower()
REGISTRY[normalized_alias] = run_fn
except Exception:
continue
# Import root-level modules that also register cmdlets
# Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py
# Use search-file -provider libgen, -provider soulseek, or -provider debrid instead
for _root_mod in ("select_cmdlet",):
try:
_import_module(_root_mod)
except Exception:
# Allow missing optional modules
continue
# Also import helper modules that register cmdlets
try:
import helper.alldebrid as _alldebrid
except Exception:
pass

1229
cmdlets/_shared.py Normal file

File diff suppressed because it is too large Load Diff

910
cmdlets/add_file.py Normal file
View File

@@ -0,0 +1,910 @@
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
from collections.abc import Iterable as IterableABC
import json
from pathlib import Path
import sys
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from helper.logger import log, debug
from helper.file_storage import FileStorage
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
merge_sequences, extract_relationships, extract_duration
)
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
from helper.utils import sha256_file
from metadata import embed_metadata_in_file
# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
# Initialize file storage system
storage = FileStorage()
def _guess_media_kind_from_suffix(media_path: Path) -> str:
suffix = media_path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
def _resolve_media_kind(result: Any, media_path: Path) -> str:
if isinstance(result, models.PipeObject):
if getattr(result, 'media_kind', None):
return str(result.media_kind)
elif isinstance(result, dict):
media_kind = result.get('media_kind')
if media_kind:
return str(media_kind)
metadata = result.get('metadata')
if isinstance(metadata, dict) and metadata.get('media_kind'):
return str(metadata['media_kind'])
return _guess_media_kind_from_suffix(media_path)
def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
# For local origin, try to read from local database first
if origin and origin.lower() == "local" and config:
try:
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
try:
db_root = get_local_storage_path(config)
except Exception:
db_root = None
if db_root:
try:
db = LocalLibraryDB(Path(db_root))
try:
# Get tags and metadata from database
tags = db.get_tags(media_path) or []
metadata = db.get_metadata(media_path) or {}
known_urls = metadata.get("known_urls") or []
file_hash = metadata.get("hash")
if tags or known_urls or file_hash:
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
return None, file_hash, tags, known_urls
finally:
db.close()
except Exception as exc:
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
except Exception:
pass
# Fall back to sidecar file lookup
try:
sidecar_path = find_sidecar(media_path)
except Exception:
sidecar_path = None
if not sidecar_path or not sidecar_path.exists():
return None, None, [], []
try:
hash_value, tags, known_urls = read_sidecar(sidecar_path)
return sidecar_path, hash_value, tags or [], known_urls or []
except Exception as exc:
log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
return sidecar_path, None, [], []
def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
candidate = None
if isinstance(result, models.PipeObject):
candidate = result.file_hash
elif isinstance(result, dict):
candidate = result.get('file_hash') or result.get('hash')
candidate = candidate or fallback_hash
if candidate:
return str(candidate)
try:
return sha256_file(file_path)
except Exception as exc:
log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
return None
def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tags'),
media_path.parent / (media_path.name + '.tags.txt'),
]
targets.extend(extra_paths)
for target in targets:
if not target:
continue
try:
path_obj = Path(target)
if path_obj.exists():
path_obj.unlink()
except Exception:
continue
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: list[str],
known_urls: list[str],
file_hash: Optional[str],
relationships: Optional[Dict[str, Any]],
duration: Optional[float],
media_kind: str,
) -> None:
payload = {
'hash': file_hash,
'known_urls': known_urls,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
try:
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
try:
debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
db_path = Path(library_root) / ".downlow_library.db"
debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
with LocalLibraryDB(library_root) as db:
# Save metadata FIRST to ensure file entry is created in DB
if any(payload.values()):
debug(f"[_persist_local_metadata] Saving metadata payload first")
try:
db.save_metadata(dest_path, payload)
debug(f"[_persist_local_metadata] ✅ Metadata saved")
except Exception as meta_exc:
log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
raise
# Save tags to DB synchronously in same transaction
# For local storage, DB is the primary source of truth
if tags:
try:
debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
db.save_tags(dest_path, tags)
debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
except Exception as tag_exc:
log(f"[_persist_local_metadata] ⚠️ Failed to save tags to DB: {tag_exc}", file=sys.stderr)
raise
# NOTE: Sidecar files are intentionally NOT created for local storage
# Local storage uses database as primary source, not sidecar files
debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
except Exception as exc:
log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr)
import traceback
log(traceback.format_exc(), file=sys.stderr)
def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
"""Transfer a file to local storage and return (exit_code, destination_path).
Args:
media_path: Path to source file
destination_root: Destination directory
result: Result object with metadata
config: Configuration dictionary
Returns:
Tuple of (exit_code, destination_path)
- exit_code: 0 on success, 1 on failure
- destination_path: Path to moved file on success, None on failure
"""
destination_root = destination_root.expanduser()
try:
destination_root.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
return 1, None
tags_from_result = extract_tags_from_result(result)
urls_from_result = extract_known_urls_from_result(result)
# Get origin from result if available
result_origin = None
if hasattr(result, "origin"):
result_origin = result.origin
elif isinstance(result, dict):
result_origin = result.get("origin") or result.get("source")
sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
# Merge tags carefully: if URL has title tag, don't include sidecar title tags
# This prevents duplicate title: tags when URL provides a title
has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
if has_url_title:
# URL has a title, filter out any sidecar title tags to avoid duplication
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
else:
# No URL title, use all sidecar tags
merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
relationships = extract_relationships(result)
duration = extract_duration(result)
try:
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
except Exception as exc:
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
return 1, None
dest_path = Path(dest_file)
file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
media_kind = _resolve_media_kind(result, dest_path)
# Ensure only ONE title tag that matches the actual filename
# Remove all existing title tags and add one based on the saved filename
merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
filename_title = dest_path.stem.replace("_", " ").strip()
if filename_title:
merged_tags_no_titles.insert(0, f"title:{filename_title}")
_persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
_cleanup_sidecar_files(media_path, sidecar_path)
debug(f"✅ Moved to local library: {dest_path}")
return 0, dest_path
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Upload/copy a file to specified location.
Returns 0 on success, non-zero on failure.
"""
import sys # For stderr output
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
debug("Starting add-file cmdlet")
# Handle list of results (from piped commands that emit multiple items)
if isinstance(result, list):
debug(f"Processing {len(result)} piped files")
success_count = 0
for item in result:
exit_code = _run(item, _args, config)
if exit_code == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
location: Optional[str] = None
provider_name: Optional[str] = None
delete_after_upload = False
# Check if -path argument was provided to use direct file path instead of piped result
path_arg = parsed.get("path")
if path_arg:
# Create a pseudo-result object from the file path
media_path = Path(str(path_arg).strip())
if not media_path.exists():
log(f"❌ File not found: {media_path}")
return 1
# Create result dict with the file path and origin 'wild' for direct path inputs
result = {"target": str(media_path), "origin": "wild"}
log(f"Using direct file path: {media_path}")
# Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
location = parsed.get("storage")
if location:
location = str(location).lower().strip()
# Get file provider from parsed args
provider_name = parsed.get("provider")
if provider_name:
provider_name = str(provider_name).lower().strip()
# Check for delete flag (presence in parsed dict means it was provided)
delete_after_upload = "delete" in parsed
# Either storage or provider must be specified, but not both
if location is None and provider_name is None:
log("Either -storage or -provider must be specified")
log(" -storage options: 'hydrus', 'local', or a directory path")
log(" -provider options: '0x0'")
return 1
if location is not None and provider_name is not None:
log("❌ Cannot specify both -storage and -provider")
return 1
# Validate location (storage backends)
is_valid_location = False
if location is not None:
valid_locations = {'hydrus', 'local'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
if location is not None and not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
return 1
# Extract tags/known URLs from pipeline objects if available
pipe_object_tags = extract_tags_from_result(result)
if pipe_object_tags:
log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
pipe_known_urls = extract_known_urls_from_result(result)
# Resolve media path: get from piped result
# Support both object attributes (getattr) and dict keys (get)
target = None
origin = None
# Try object attributes first
if hasattr(result, "target"):
target = result.target
elif hasattr(result, "path"):
target = result.path
elif hasattr(result, "file_path"):
target = result.file_path
# Try dict keys if object attributes failed
elif isinstance(result, dict):
target = (result.get("target") or result.get("path") or result.get("file_path") or
result.get("__file_path") or result.get("__path") or result.get("__target"))
# Get origin to detect Hydrus files
if hasattr(result, "origin"):
origin = result.origin
elif hasattr(result, "source"):
origin = result.source
elif isinstance(result, dict):
origin = result.get("origin") or result.get("source") or result.get("__source")
# Convert target to string and preserve URLs (don't let Path() mangle them)
target_str = str(target) if target else None
# Check if this is a playlist item that needs to be downloaded first
is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
# This is a playlist item URL - we need to download it first
log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
# Extract item number if available
item_num = None
if "__action" in result and result["__action"].startswith("playlist-item:"):
item_num = result["__action"].split(":")[1]
elif "index" in result:
item_num = result["index"]
# Call download-data to download this specific item
# Pass the item number so it knows which track to download
from cmdlets import download_data as dl_module
# Capture emissions from download-data to process them
captured_results = []
original_emit = ctx.emit
def capture_emit(obj):
captured_results.append(obj)
# Also emit to original so user sees progress/output if needed
# But since add-file is usually terminal, we might not need to
# original_emit(obj)
# Temporarily hook the pipeline emit function
ctx.emit = capture_emit
try:
if item_num:
# Pass a marker dict to tell download-data which item to get
download_result = dl_module._run(
{
"__playlist_url": str(target_str),
"__playlist_item": int(item_num)
},
[],
config
)
else:
# Fallback: just download the URL (will show all items)
download_result = dl_module._run(None, [str(target_str)], config)
finally:
# Restore original emit function
ctx.emit = original_emit
if download_result != 0:
log(f"❌ Failed to download playlist item", file=sys.stderr)
return 1
log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
# Process the downloaded files recursively
success_count = 0
for res in captured_results:
# Recursively call add-file with the downloaded result
# This ensures tags and metadata from download-data are applied
if _run(res, _args, config) == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Determine media_path from result
media_path: Optional[Path] = None
is_hydrus_file = origin and origin.lower() == "hydrus"
if target_str:
# Check if it's a URL or Hydrus hash
if target_str.lower().startswith(("http://", "https://")):
media_path = None # Will handle as Hydrus file below
elif not is_hydrus_file:
# Only treat as local path if not a Hydrus file
media_path = Path(target_str)
if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
# Check if this is a format object from download-data
if isinstance(result, dict) and result.get('format_id') is not None:
log("❌ Format object received, but add-file expects a downloaded file")
log(f" Tip: Use @N to automatically select and download the format")
log(f" Streamlined workflow:")
log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
log(f" (The @N automatically expands to download-data \"URL\" -item N)")
return 1
log("❌ File not found: provide a piped file result or local file path")
return 1
# Check if this is a Hydrus file - fetch the actual file path from Hydrus
if is_hydrus_file and target_str:
log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
try:
from helper import hydrus
# Get the Hydrus client
client = hydrus.get_client(config)
if not client:
log(f"❌ Hydrus client unavailable", file=sys.stderr)
return 1
# target_str is the hash - need to get the actual file path from Hydrus
file_hash = target_str
# Call the /get_files/file_path endpoint to get the actual file path
response = client.get_file_path(file_hash)
if not response or not isinstance(response, dict):
log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
return 1
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
return 1
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
return 1
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
import traceback
log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
return 1
# Generic URL handler: if target is a URL and we haven't resolved a local path yet
# This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
from cmdlets import download_data as dl_module
dl_args = []
if location:
dl_args.extend(["-storage", location])
# Map provider 0x0 to storage 0x0 for download-data
if provider_name == "0x0":
dl_args.extend(["-storage", "0x0"])
return dl_module._run(result, dl_args, config)
if media_path is None:
log("File path could not be resolved")
return 1
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return 1
# Validate file type - only accept Hydrus-supported files
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
log(f"Hydrus supports the following file types:", file=sys.stderr)
# Display by category from hydrus_wrapper
for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
log(f"Skipping this file: {media_path.name}", file=sys.stderr)
return 1
# Handle based on provider or storage
if provider_name is not None:
# Use file provider (e.g., 0x0.st)
from helper.search_provider import get_file_provider
log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
try:
file_provider = get_file_provider(provider_name, config)
if file_provider is None:
log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
return 1
hoster_url = file_provider.upload(media_path)
log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
# Associate the URL with the file in Hydrus if possible
current_hash = locals().get('file_hash')
if not current_hash:
current_hash = _resolve_file_hash(result, None, media_path)
if current_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(current_hash, hoster_url)
log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
except Exception as exc:
log(f"{provider_name} upload failed: {exc}", file=sys.stderr)
return 1
if delete_after_upload:
try:
media_path.unlink()
_cleanup_sidecar_files(media_path)
log(f"✅ Deleted file and sidecar", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
return 0
# Handle storage-based operations (location is not None here)
valid_locations = {'hydrus', 'local'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
if not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
return 1
if location == 'local':
try:
from config import get_local_storage_path
resolved_dir = get_local_storage_path(config)
except Exception:
resolved_dir = None
if not resolved_dir:
resolved_dir = config.get("LocalDir") or config.get("OutputDir")
if not resolved_dir:
log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
return 1
log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
# After successful local transfer, emit result for pipeline continuation
# This allows downstream commands like add-tags to chain automatically
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
elif is_local_path:
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
return 1
log(f"Moving to local path: {destination_root}", file=sys.stderr)
exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)
# After successful local transfer, emit result for pipeline continuation
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
# location == 'hydrus'
# Compute file hash to check if already in Hydrus
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
try:
file_hash = sha256_file(media_path)
except Exception as exc:
log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
return 1
log(f"File hash: {file_hash}", file=sys.stderr)
# Read sidecar tags and known URLs first (for tagging)
sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
if sidecar_path:
log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
if sidecar_tags:
log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
if sidecar_urls:
log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
else:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
# Merge tags from PipeObject with tags from sidecar
# NOTE: Remove ALL existing title tags and use only filename-based title
# The filename is the source of truth for the title
tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
if not str(t).strip().lower().startswith("title:")]
# Ensure ONE title tag based on the actual filename
filename_title = media_path.stem.replace("_", " ").strip()
if filename_title:
tags = [f"title:{filename_title}"] + tags_without_titles
else:
tags = tags_without_titles
known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
if pipe_object_tags:
log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
# Write metadata to file before uploading (only for local storage, not for Hydrus)
# Hydrus stores tags separately, so we don't need to modify the file
if location != 'hydrus':
try:
if tags:
# Determine file kind from extension
file_kind = ''
sfx = media_path.suffix.lower()
if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_kind = 'audio'
elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_kind = 'video'
if embed_metadata_in_file(media_path, tags, file_kind):
log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
else:
log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
except Exception as exc:
log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
else:
log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
# Use FileStorage backend to upload to Hydrus
try:
file_hash = storage["hydrus"].upload(
media_path,
config=config,
tags=tags,
)
log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
except Exception as exc:
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
return 1
# Associate known URLs in Hydrus metadata
url_count = 0
if known_urls:
try:
client = hydrus_wrapper.get_client(config)
if client:
for url in known_urls:
u = str(url or "").strip()
if not u:
continue
try:
client.associate_url(file_hash, u)
except Exception as exc:
log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
continue
url_count += 1
except Exception as exc:
log(f"Failed to associate URLs: {exc}", file=sys.stderr)
if url_count:
log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
else:
log(f"No URLs to associate", file=sys.stderr)
_cleanup_sidecar_files(media_path, sidecar_path)
# Update in-memory result for downstream pipes
try:
# Only update piped result objects; direct -path usage may have a dummy result
setattr(result, "hash_hex", file_hash)
# Preserve media_kind for downstream commands (e.g., open)
if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
# Try to infer media_kind from file extension or keep existing
suffix = media_path.suffix.lower()
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
setattr(result, "media_kind", "document")
if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
cols = list(getattr(result, "columns"))
if ("Hash", file_hash) not in cols:
cols.append(("Hash", file_hash))
setattr(result, "columns", cols)
except Exception:
pass
# If -delete flag is set, delete the file and .tags after successful upload
if delete_after_upload:
log(f"Deleting local files (as requested)...", file=sys.stderr)
try:
media_path.unlink()
log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete file: {exc}", file=sys.stderr)
# Delete .tags sidecar if it exists
if sidecar_path is not None:
try:
sidecar_path.unlink()
log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
# Emit result for Hydrus uploads so downstream commands know about it
if location == 'hydrus':
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or media_path.name
result_dict = create_pipe_object_result(
source='hydrus',
identifier=file_hash,
file_path=f"hydrus:{file_hash}",
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
extra={
'storage_source': 'hydrus',
'hydrus_hash': file_hash,
'tags': tags,
'known_urls': known_urls,
}
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these Hydrus file results, not format objects
ctx.set_current_stage_table(None)
return 0
CMDLET = Cmdlet(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
args=[
CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
SharedArgs.STORAGE, # For hydrus, local, or directory paths
CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
],
details=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting with public URL",
"- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
"- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
"- When using file provider: uploads to service, adds URL to sidecar",
"- When copying locally: copies file with original metadata preserved",
"- Use -delete flag to automatically delete the file and .tags after successful operation.",
],
)

84
cmdlets/add_note.py Normal file
View File

@@ -0,0 +1,84 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="add-note",
summary="Add or set a note on a Hydrus file.",
usage="add-note [-hash <sha256>] <name> <text>",
args=[
CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
],
details=[
"- Notes are stored in the 'my notes' service by default.",
],
)
@register(["add-note", "set-note", "add_note"]) # aliases
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
from ._shared import parse_cmdlet_args
parsed = parse_cmdlet_args(args, CMDLET)
override_hash = parsed.get("hash")
name = parsed.get("name")
text_parts = parsed.get("text")
if not name:
log("Requires a note name")
return 1
name = str(name).strip()
if isinstance(text_parts, list):
text = " ".join(text_parts).strip()
else:
text = str(text_parts or "").strip()
if not text:
log("Empty note text")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
service_name = "my notes"
client.set_notes(hash_hex, {name: text}, service_name)
except Exception as exc:
log(f"Hydrus add-note failed: {exc}")
return 1
ctx.emit(f"Added note '{name}' ({len(text)} chars)")
return 0

264
cmdlets/add_relationship.py Normal file
View File

@@ -0,0 +1,264 @@
"""Add file relationships in Hydrus based on relationship tags in sidecar."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence
import json
import re
from pathlib import Path
import sys
from helper.logger import log
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.local_library import read_sidecar, find_sidecar
CMDLET = Cmdlet(
name="add-relationship",
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="add-relationship OR add-relationship -path <file>",
args=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
],
details=[
"- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')",
"- Calls Hydrus API to associate the hashes as relationships",
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
"- Works with piped file results or -path argument for direct invocation",
],
)
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
"""Normalize a hash hex string to lowercase 64-char format."""
if not value or not isinstance(value, str):
return None
normalized = value.strip().lower()
if len(normalized) == 64 and all(c in '0123456789abcdef' for c in normalized):
return normalized
return None
def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
"""Parse relationship tag like 'relationship: hash(king)<HASH>,hash(alt)<HASH>'.
Returns a dict like {"king": ["HASH1"], "alt": ["HASH2"], ...}
"""
result: Dict[str, list[str]] = {}
if not isinstance(tag_value, str):
return result
# Match patterns like hash(king)HASH or hash(type)HASH (no angle brackets)
pattern = r'hash\((\w+)\)([a-fA-F0-9]{64})'
matches = re.findall(pattern, tag_value)
for rel_type, hash_value in matches:
normalized = _normalise_hash_hex(hash_value)
if normalized:
if rel_type not in result:
result[rel_type] = []
result[rel_type].append(normalized)
return result
@register(["add-relationship", "add-rel"]) # primary name and alias
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus.
Two modes of operation:
1. Read from sidecar: Looks for relationship tags in the file's sidecar (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>")
2. Pipeline mode: When piping multiple results, the first becomes "king" and subsequent items become "alt"
Returns 0 on success, non-zero on failure.
"""
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
if parsed:
# Get the first arg value (e.g., -path)
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
if first_arg_name and first_arg_name in parsed:
arg_value = parsed[first_arg_name]
try:
arg_path = Path(str(arg_value)).expanduser()
except Exception:
arg_path = Path(str(arg_value))
# Get Hydrus client
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Check if we're in pipeline mode (have a hash) or file mode
file_hash = getattr(result, "hash_hex", None)
# PIPELINE MODE: Track relationships across multiple items
if file_hash:
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# Load or initialize king hash from pipeline context
try:
king_hash = ctx.load_value("relationship_king")
except Exception:
king_hash = None
# If this is the first item, make it the king
if not king_hash:
try:
ctx.store_value("relationship_king", file_hash)
log(f"Established king hash: {file_hash}", file=sys.stderr)
return 0 # First item just becomes the king, no relationships yet
except Exception:
pass
# If we already have a king and this is a different hash, link them
if king_hash and king_hash != file_hash:
try:
client.set_relationship(file_hash, king_hash, "alt")
log(
f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
)
return 0
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
return 0
# FILE MODE: Read relationships from sidecar
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
# Resolve media path from -path arg or result target
target = getattr(result, "target", None) or getattr(result, "path", None)
media_path = arg_path if arg_path is not None else Path(str(target)) if isinstance(target, str) else None
if media_path is None:
log("Provide -path <file> or pipe a local file result", file=sys.stderr)
return 1
# Validate local file
if str(media_path).lower().startswith(("http://", "https://")):
log("This cmdlet requires a local file path, not a URL", file=sys.stderr)
return 1
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}", file=sys.stderr)
return 1
# Build Hydrus client
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
# Read sidecar to find relationship tags
sidecar_path = find_sidecar(media_path)
if sidecar_path is None:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
return 1
try:
_, tags, _ = read_sidecar(sidecar_path)
except Exception as exc:
log(f"Failed to read sidecar: {exc}", file=sys.stderr)
return 1
# Find relationship tags (format: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>")
relationship_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("relationship:")]
if not relationship_tags:
log(f"No relationship tags found in sidecar", file=sys.stderr)
return 0 # Not an error, just nothing to do
# Get the file hash from result (should have been set by add-file)
file_hash = getattr(result, "hash_hex", None)
if not file_hash:
log("File hash not available (run add-file first)", file=sys.stderr)
return 1
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1
# Parse relationships from tags and apply them
success_count = 0
error_count = 0
for rel_tag in relationship_tags:
try:
# Parse: "relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>"
rel_str = rel_tag.split(":", 1)[1].strip() # Get part after "relationship:"
# Parse relationships
rels = _extract_relationships_from_tag(f"relationship: {rel_str}")
# Set the relationships in Hydrus
for rel_type, related_hashes in rels.items():
if not related_hashes:
continue
for related_hash in related_hashes:
# Don't set relationship between hash and itself
if file_hash == related_hash:
continue
try:
client.set_relationship(file_hash, related_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}",
file=sys.stderr
)
success_count += 1
except Exception as exc:
log(f"Failed to set {rel_type} relationship: {exc}", file=sys.stderr)
error_count += 1
except Exception as exc:
log(f"Failed to parse relationship tag: {exc}", file=sys.stderr)
error_count += 1
if success_count > 0:
log(f"Successfully set {success_count} relationship(s) for {media_path.name}", file=sys.stderr)
ctx.emit(f"add-relationship: {media_path.name} ({success_count} relationships set)")
return 0
elif error_count == 0:
log(f"No relationships to set", file=sys.stderr)
return 0 # Success with nothing to do
else:
log(f"Failed with {error_count} error(s)", file=sys.stderr)
return 1

276
cmdlets/add_tags.py Normal file
View File

@@ -0,0 +1,276 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
import json
from pathlib import Path
import sys
from helper.logger import log
from . import register
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from helper import hydrus as hydrus_wrapper
from helper.local_library import read_sidecar, write_sidecar, find_sidecar, has_sidecar, LocalLibraryDB
from metadata import rename_by_metadata
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args
from config import get_local_storage_path
CMDLET = Cmdlet(
name="add-tags",
summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
args=[
CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True),
],
details=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
],
)
@register(["add-tag", "add-tags"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add tags to a file with smart filtering for pipeline results."""
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
# Check for --all flag
include_temp = parsed.get("all", False)
# Normalize input to list
results = normalize_result_input(result)
# Filter by temp status (unless --all is set)
if not include_temp:
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tags from arguments
raw_tags = parsed.get("tags", [])
if isinstance(raw_tags, str):
raw_tags = [raw_tags]
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
l = l.strip()
if l:
raw_tags.append(f"{{{l}}}")
# Parse and expand tags
tags_to_add = parse_tag_arguments(raw_tags)
tags_to_add = expand_tag_groups(tags_to_add)
# Get other flags
hash_override = normalize_hash(parsed.get("hash"))
duplicate_arg = parsed.get("duplicate")
# If no tags provided (and no list), write sidecar files with embedded tags
# Note: Since 'tags' is required=True in CMDLET, this block might be unreachable via CLI
# unless called programmatically or if required check is bypassed.
if not tags_to_add and not duplicate_arg:
# Write sidecar files with the tags that are already in the result dicts
sidecar_count = 0
for res in results:
# Handle both dict and PipeObject formats
file_path = None
tags = []
file_hash = ""
if isinstance(res, models.PipeObject):
file_path = res.file_path
tags = res.extra.get('tags', [])
file_hash = res.file_hash or ""
elif isinstance(res, dict):
file_path = res.get('file_path')
tags = res.get('tags', []) # Check both tags and extra['tags']
if not tags and 'extra' in res:
tags = res['extra'].get('tags', [])
file_hash = res.get('file_hash', "")
if not file_path:
log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr)
ctx.emit(res)
continue
if tags:
# Write sidecar file for this file with its tags
try:
sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
sidecar_count += 1
except Exception as e:
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
ctx.emit(res)
if sidecar_count > 0:
log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
else:
log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
return 0
# Tags ARE provided - append them to each result and write sidecar files or add to Hydrus
sidecar_count = 0
for res in results:
# Handle both dict and PipeObject formats
file_path = None
existing_tags = []
file_hash = ""
storage_source = None
hydrus_hash = None
if isinstance(res, models.PipeObject):
file_path = res.file_path
existing_tags = res.extra.get('tags', [])
file_hash = res.file_hash or ""
storage_source = res.extra.get('storage_source') or res.extra.get('source')
hydrus_hash = res.extra.get('hydrus_hash')
elif isinstance(res, dict):
file_path = res.get('file_path') or res.get('path')
existing_tags = res.get('tags', [])
if not existing_tags and 'extra' in res:
existing_tags = res['extra'].get('tags', [])
file_hash = res.get('file_hash', "")
storage_source = res.get('storage_source') or res.get('source') or res.get('origin')
if not storage_source and 'extra' in res:
storage_source = res['extra'].get('storage_source') or res['extra'].get('source')
# For Hydrus results from search-file, look for hash, hash_hex, or target (all contain the hash)
hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex')
if not hydrus_hash and 'extra' in res:
hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex')
else:
ctx.emit(res)
continue
# Apply hash override if provided
if hash_override:
hydrus_hash = hash_override
# If we have a hash override, we treat it as a Hydrus target
storage_source = "hydrus"
if not file_path and not hydrus_hash:
log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr)
ctx.emit(res)
continue
# Handle -duplicate logic (copy existing tags to new namespaces)
if duplicate_arg:
# Parse duplicate format: source:target1,target2 or source,target1,target2
parts = duplicate_arg.split(':')
source_ns = ""
targets = []
if len(parts) > 1:
# Explicit format: source:target1,target2
source_ns = parts[0]
targets = parts[1].split(',')
else:
# Inferred format: source,target1,target2
parts = duplicate_arg.split(',')
if len(parts) > 1:
source_ns = parts[0]
targets = parts[1:]
if source_ns and targets:
# Find tags in source namespace
source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
for t in source_tags:
value = t.split(':', 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag not in existing_tags and new_tag not in tags_to_add:
tags_to_add.append(new_tag)
# Merge new tags with existing tags, handling namespace overwrites
# When adding a tag like "namespace:value", remove any existing "namespace:*" tags
for new_tag in tags_to_add:
# Check if this is a namespaced tag (format: "namespace:value")
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
# Remove any existing tags with the same namespace
existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))]
# Add the new tag if not already present
if new_tag not in existing_tags:
existing_tags.append(new_tag)
# Update the result's tags
if isinstance(res, models.PipeObject):
res.extra['tags'] = existing_tags
elif isinstance(res, dict):
res['tags'] = existing_tags
# Determine where to add tags: Hydrus, local DB, or sidecar
if storage_source and storage_source.lower() == 'hydrus':
# Add tags to Hydrus using the API
target_hash = hydrus_hash or file_hash
if target_hash:
try:
log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
hydrus_client = hydrus_wrapper.get_client(config)
hydrus_client.add_tags(target_hash, existing_tags, "my tags")
log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr)
sidecar_count += 1
except Exception as e:
log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
else:
log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
elif storage_source and storage_source.lower() == 'local':
# For local storage, save directly to DB (no sidecar needed)
if file_path:
library_root = get_local_storage_path(config)
if library_root:
try:
with LocalLibraryDB(library_root) as db:
db.save_tags(Path(file_path), existing_tags)
log(f"[add_tags] Saved {len(existing_tags)} tag(s) to local DB", file=sys.stderr)
sidecar_count += 1
except Exception as e:
log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr)
else:
log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr)
else:
log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr)
else:
# For other storage types or unknown sources, write sidecar file if we have a file path
if file_path:
try:
sidecar_path = write_sidecar(Path(file_path), existing_tags, [], file_hash)
log(f"[add_tags] Wrote {len(existing_tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
sidecar_count += 1
except Exception as e:
log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
# Emit the modified result
ctx.emit(res)
log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr)
return 0

78
cmdlets/add_url.py Normal file
View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="add-url",
summary="Associate a URL with a Hydrus file.",
usage="add-url [-hash <sha256>] <url>",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("url", required=True, description="The URL to associate with the file."),
],
details=[
"- Adds the URL to the Hydrus file's known URL list.",
],
)
@register(["add-url", "ass-url", "associate-url", "add_url"]) # aliases
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
from ._shared import parse_cmdlet_args
parsed = parse_cmdlet_args(args, CMDLET)
override_hash = parsed.get("hash")
url = parsed.get("url")
if not url:
log("Requires a URL argument")
return 1
url = str(url).strip()
if not url:
log("Requires a non-empty URL")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
client.associate_url(hash_hex, url)
except Exception as exc:
log(f"Hydrus add-url failed: {exc}")
return 1
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
ctx.emit(f"Associated URL with {preview}: {url}")
return 0

148
cmdlets/adjective.py Normal file
View File

@@ -0,0 +1,148 @@
import json
import os
import sys
from typing import List, Dict, Any, Optional, Sequence
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log
from result_table import ResultTable
import pipeline as ctx
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "helper", "adjective.json")
def _load_adjectives() -> Dict[str, List[str]]:
try:
if os.path.exists(ADJECTIVE_FILE):
with open(ADJECTIVE_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
log(f"Error loading adjectives: {e}", file=sys.stderr)
return {}
def _save_adjectives(data: Dict[str, List[str]]) -> bool:
try:
with open(ADJECTIVE_FILE, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
return True
except Exception as e:
log(f"Error saving adjectives: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
data = _load_adjectives()
# Parse arguments manually first to handle positional args
# We expect: .adjective [category] [tag] [-add] [-delete]
# If no args, list categories
if not args:
table = ResultTable("Adjective Categories")
for i, (category, tags) in enumerate(data.items()):
row = table.add_row()
row.add_column("#", str(i + 1))
row.add_column("Category", category)
row.add_column("Tag Amount", str(len(tags)))
# Selection expands to: .adjective "Category Name"
table.set_row_selection_args(i, [category])
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, list(data.keys()))
ctx.set_current_stage_table(table)
print(table)
return 0
# We have args. First arg is likely category.
category = args[0]
# Check if we are adding a new category (implicit if it doesn't exist)
if category not in data:
# If only category provided, create it
if len(args) == 1:
data[category] = []
_save_adjectives(data)
log(f"Created new category: {category}")
# If more args, we might be trying to add to a non-existent category
elif "-add" in args:
data[category] = []
# Continue to add logic
# Handle operations within category
remaining_args = list(args[1:])
# Check for -add flag
if "-add" in remaining_args:
# .adjective category -add tag
# or .adjective category tag -add
add_idx = remaining_args.index("-add")
# Tag could be before or after
tag = None
if add_idx + 1 < len(remaining_args):
tag = remaining_args[add_idx + 1]
elif add_idx > 0:
tag = remaining_args[add_idx - 1]
if tag:
if tag not in data[category]:
data[category].append(tag)
_save_adjectives(data)
log(f"Added '{tag}' to '{category}'")
else:
log(f"Tag '{tag}' already exists in '{category}'")
else:
log("Error: No tag specified to add")
return 1
# Check for -delete flag
elif "-delete" in remaining_args:
# .adjective category -delete tag
# or .adjective category tag -delete
del_idx = remaining_args.index("-delete")
tag = None
if del_idx + 1 < len(remaining_args):
tag = remaining_args[del_idx + 1]
elif del_idx > 0:
tag = remaining_args[del_idx - 1]
if tag:
if tag in data[category]:
data[category].remove(tag)
_save_adjectives(data)
log(f"Deleted '{tag}' from '{category}'")
else:
log(f"Tag '{tag}' not found in '{category}'")
else:
log("Error: No tag specified to delete")
return 1
# List tags in category (Default action if no flags or after modification)
tags = data.get(category, [])
table = ResultTable(f"Tags in '{category}'")
for i, tag in enumerate(tags):
row = table.add_row()
row.add_column("#", str(i + 1))
row.add_column("Tag", tag)
# Selection expands to: .adjective "Category" "Tag"
# This allows typing @N -delete to delete it
table.set_row_selection_args(i, [category, tag])
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, tags)
ctx.set_current_stage_table(table)
print(table)
return 0
CMDLET = Cmdlet(
name=".adjective",
aliases=["adj"],
summary="Manage adjective categories and tags",
usage=".adjective [category] [-add tag] [-delete tag]",
args=[
CmdletArg(name="category", type="string", description="Category name", required=False),
CmdletArg(name="tag", type="string", description="Tag name", required=False),
CmdletArg(name="add", type="flag", description="Add tag"),
CmdletArg(name="delete", type="flag", description="Delete tag"),
],
exec=_run
)

View File

@@ -0,0 +1,153 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from helper.logger import log
from . import register
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
CMDLET = Cmdlet(
name="check-file-status",
summary="Check if a file is active, deleted, or corrupted in Hydrus.",
usage="check-file-status [-hash <sha256>]",
args=[
CmdletArg("-hash", description="File hash (SHA256) to check. If not provided, uses selected result."),
],
details=[
"- Shows whether file is active in Hydrus or marked as deleted",
"- Detects corrupted data (e.g., comma-separated URLs)",
"- Displays file metadata and service locations",
"- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.",
],
)
@register(["check-file-status", "check-status", "file-status", "status"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse arguments
override_hash: str | None = None
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
i += 1
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("No hash provided and no result selected", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
result_data = client.fetch_file_metadata(hashes=[hash_hex])
if not result_data.get("metadata"):
log(f"File not found: {hash_hex[:16]}...", file=sys.stderr)
return 1
file_info = result_data["metadata"][0]
# Status summary
is_deleted = file_info.get("is_deleted", False)
is_local = file_info.get("is_local", False)
is_trashed = file_info.get("is_trashed", False)
status_str = "DELETED" if is_deleted else ("TRASHED" if is_trashed else "ACTIVE")
log(f"File status: {status_str}", file=sys.stderr)
# File info
log(f"\n📄 File Information:", file=sys.stderr)
log(f" Hash: {file_info['hash'][:16]}...", file=sys.stderr)
log(f" Size: {file_info['size']:,} bytes", file=sys.stderr)
log(f" MIME: {file_info['mime']}", file=sys.stderr)
log(f" Dimensions: {file_info.get('width', '?')}x{file_info.get('height', '?')}", file=sys.stderr)
# Service status
file_services = file_info.get("file_services", {})
current_services = file_services.get("current", {})
deleted_services = file_services.get("deleted", {})
if current_services:
log(f"\n✓ In services ({len(current_services)}):", file=sys.stderr)
for service_key, service_info in current_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
log(f" - {sname} ({stype})", file=sys.stderr)
if deleted_services:
log(f"\n✗ Deleted from services ({len(deleted_services)}):", file=sys.stderr)
for service_key, service_info in deleted_services.items():
sname = service_info.get("name", "unknown")
stype = service_info.get("type_pretty", "unknown")
time_deleted = service_info.get("time_deleted", "?")
log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr)
# URL check
urls = file_info.get("known_urls", [])
log(f"\n🔗 URLs ({len(urls)}):", file=sys.stderr)
corrupted_count = 0
for i, url in enumerate(urls, 1):
if "," in url:
corrupted_count += 1
log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr)
else:
log(f" [{i}] {url[:70]}{'...' if len(url) > 70 else ''}", file=sys.stderr)
if corrupted_count > 0:
log(f"\n⚠️ WARNING: Found {corrupted_count} corrupted URL(s)", file=sys.stderr)
# Tags
tags_dict = file_info.get("tags", {})
total_tags = 0
for service_key, service_data in tags_dict.items():
service_name = service_data.get("name", "unknown")
display_tags = service_data.get("display_tags", {}).get("0", [])
total_tags += len(display_tags)
if total_tags > 0:
log(f"\n🏷️ Tags ({total_tags}):", file=sys.stderr)
for service_key, service_data in tags_dict.items():
display_tags = service_data.get("display_tags", {}).get("0", [])
if display_tags:
service_name = service_data.get("name", "unknown")
log(f" {service_name}:", file=sys.stderr)
for tag in display_tags[:5]: # Show first 5
log(f" - {tag}", file=sys.stderr)
if len(display_tags) > 5:
log(f" ... and {len(display_tags) - 5} more", file=sys.stderr)
log("\n", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error checking file status: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1

110
cmdlets/cleanup.py Normal file
View File

@@ -0,0 +1,110 @@
"""Cleanup cmdlet for removing temporary artifacts from pipeline.
This cmdlet processes result lists and removes temporary files (marked with is_temp=True),
then emits the remaining non-temporary results for further pipeline stages.
"""
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import sys
from helper.logger import log
from . import register
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp
import models
import pipeline as pipeline_context
@register(["cleanup"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Remove temporary files from pipeline results.
Accepts:
- Single result object with is_temp field
- List of result objects to clean up
Process:
- Filters results by is_temp=True
- Deletes those files from disk
- Emits only non-temporary results
Typical pipeline usage:
download-data url | screen-shot | add-tag "tag" --all | cleanup
"""
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
import json
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Normalize input to list
results = normalize_result_input(result)
if not results:
log("[cleanup] No results to process", file=sys.stderr)
return 1
# Separate temporary and permanent results
temp_results = pipeline_context.filter_results_by_temp(results, include_temp=True)
perm_results = pipeline_context.filter_results_by_temp(results, include_temp=False)
# Delete temporary files
deleted_count = 0
for temp_result in temp_results:
try:
file_path = get_pipe_object_path(temp_result)
if file_path:
path_obj = Path(file_path)
if path_obj.exists():
# Delete the file
path_obj.unlink()
log(f"[cleanup] Deleted temporary file: {path_obj.name}", file=sys.stderr)
deleted_count += 1
# Clean up any associated sidecar files
for ext in ['.tags', '.metadata']:
sidecar = path_obj.parent / (path_obj.name + ext)
if sidecar.exists():
try:
sidecar.unlink()
log(f"[cleanup] Deleted sidecar: {sidecar.name}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Warning: Could not delete sidecar {sidecar.name}: {e}", file=sys.stderr)
else:
log(f"[cleanup] File does not exist: {file_path}", file=sys.stderr)
except Exception as e:
log(f"[cleanup] Error deleting file: {e}", file=sys.stderr)
# Log summary
log(f"[cleanup] Deleted {deleted_count} temporary file(s), emitting {len(perm_results)} permanent result(s)", file=sys.stderr)
# Emit permanent results for downstream processing
for perm_result in perm_results:
pipeline_context.emit(perm_result)
return 0
CMDLET = Cmdlet(
name="cleanup",
summary="Remove temporary artifacts from pipeline (marked with is_temp=True).",
usage="cleanup",
args=[],
details=[
"- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)",
"- Deletes files marked with is_temp=True from disk",
"- Also cleans up associated sidecar files (.tags, .metadata)",
"- Emits only non-temporary results for further processing",
"- Typical usage at end of pipeline: ... | add-tag \"tag\" --all | cleanup",
"- Exit code 0 if cleanup successful, 1 if no results to process",
],
)

242
cmdlets/delete_file.py Normal file
View File

@@ -0,0 +1,242 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from helper.logger import log
import sqlite3
from pathlib import Path
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
def _delete_database_entry(db_path: Path, file_path: str) -> bool:
"""Delete file and related entries from local library database.
Args:
db_path: Path to the library.db file
file_path: Exact file path string as stored in database
Returns:
True if successful, False otherwise
"""
try:
if not db_path.exists():
log(f"Database not found at {db_path}", file=sys.stderr)
return False
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
log(f"Searching database for file_path: {file_path}", file=sys.stderr)
# Find the file_id using the exact file_path
cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,))
result = cursor.fetchone()
if not result:
log(f"ERROR: File path not found in database", file=sys.stderr)
log(f"Expected: {file_path}", file=sys.stderr)
# Debug: show sample entries
cursor.execute('SELECT id, file_path FROM files LIMIT 3')
samples = cursor.fetchall()
if samples:
log(f"Sample DB entries:", file=sys.stderr)
for fid, fpath in samples:
log(f"{fid}: {fpath}", file=sys.stderr)
conn.close()
return False
file_id = result[0]
log(f"Found file_id={file_id}, deleting all related records", file=sys.stderr)
# Delete related records
cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,))
meta_count = cursor.rowcount
cursor.execute('DELETE FROM tags WHERE file_id = ?', (file_id,))
tags_count = cursor.rowcount
cursor.execute('DELETE FROM notes WHERE file_id = ?', (file_id,))
notes_count = cursor.rowcount
cursor.execute('DELETE FROM files WHERE id = ?', (file_id,))
files_count = cursor.rowcount
conn.commit()
conn.close()
log(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr)
return True
except Exception as exc:
log(f"Database cleanup failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Parse overrides and options
override_hash: str | None = None
conserve: str | None = None
lib_root: str | None = None
reason_tokens: list[str] = []
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
value = str(args[i + 1]).strip().lower()
if value in {"local", "hydrus"}:
conserve = value
i += 2
continue
if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args):
lib_root = str(args[i + 1]).strip()
i += 2
continue
reason_tokens.append(token)
i += 1
# Handle result as either dict or object
if isinstance(result, dict):
hash_hex_raw = result.get("hash_hex") or result.get("hash")
target = result.get("target")
origin = result.get("origin")
else:
hash_hex_raw = getattr(result, "hash_hex", None) or getattr(result, "hash", None)
target = getattr(result, "target", None)
origin = getattr(result, "origin", None)
# For Hydrus files, the target IS the hash
if origin and origin.lower() == "hydrus" and not hash_hex_raw:
hash_hex_raw = target
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw)
reason = " ".join(token for token in reason_tokens if str(token).strip()).strip()
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
if conserve != "local" and local_target:
path = Path(str(target))
file_path_str = str(target) # Keep the original string for DB matching
try:
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
if ctx._PIPE_ACTIVE:
ctx.emit(f"Removed local file: {path}")
log(f"Deleted: {path.name}", file=sys.stderr)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
# Remove common sidecars regardless of file removal success
for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"),
path.with_suffix(".metadata"), path.with_suffix(".notes")):
try:
if sidecar.exists() and sidecar.is_file():
sidecar.unlink()
except Exception:
pass
# Clean up database entry if library root provided - do this regardless of file deletion success
if lib_root:
lib_root_path = Path(lib_root)
db_path = lib_root_path / ".downlow_library.db"
log(f"Attempting DB cleanup: lib_root={lib_root}, db_path={db_path}", file=sys.stderr)
log(f"Deleting DB entry for: {file_path_str}", file=sys.stderr)
if _delete_database_entry(db_path, file_path_str):
if ctx._PIPE_ACTIVE:
ctx.emit(f"Removed database entry: {path.name}")
log(f"Database entry cleaned up", file=sys.stderr)
local_deleted = True # Mark as deleted if DB cleanup succeeded
else:
log(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr)
else:
log(f"No lib_root provided, skipping database cleanup", file=sys.stderr)
hydrus_deleted = False
if conserve != "hydrus" and hash_hex:
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
else:
if client is None:
if not local_deleted:
log("Hydrus client unavailable", file=sys.stderr)
return 1
else:
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
hydrus_deleted = True
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
log(f"Deleted from Hydrus: {preview}", file=sys.stderr)
except Exception as exc:
log(f"Hydrus delete failed: {exc}", file=sys.stderr)
if not local_deleted:
return 1
if hydrus_deleted and hash_hex:
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
if ctx._PIPE_ACTIVE:
if reason:
ctx.emit(f"Deleted {preview} (reason: {reason}).")
else:
ctx.emit(f"Deleted {preview}.")
if hydrus_deleted or local_deleted:
return 0
log("Selected result has neither Hydrus hash nor local file target")
return 1
CMDLET = Cmdlet(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
aliases=["del-file"],
args=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
CmdletArg("lib-root", description="Path to local library root for database cleanup."),
CmdletArg("reason", description="Optional reason for deletion (free text)."),
],
details=[
"Default removes both the local file and Hydrus file.",
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
"Database entries are automatically cleaned up for local files.",
"Any remaining arguments are treated as the Hydrus reason text.",
],
)

79
cmdlets/delete_note.py Normal file
View File

@@ -0,0 +1,79 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="delete-note",
summary="Delete a named note from a Hydrus file.",
usage="i | del-note [-hash <sha256>] <name>",
aliases=["del-note"],
args=[
],
details=[
"- Removes the note with the given name from the Hydrus file.",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
if not args:
log("Requires the note name/key to delete")
return 1
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
if not rest:
log("Requires the note name/key to delete")
return 1
name = str(rest[0] or '').strip()
if not name:
log("Requires a non-empty note name/key")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
service_name = "my notes"
client.delete_notes(hash_hex, [name], service_name)
except Exception as exc:
log(f"Hydrus delete-note failed: {exc}")
return 1
log(f"Deleted note '{name}'")
return 0

219
cmdlets/delete_tag.py Normal file
View File

@@ -0,0 +1,219 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments
from helper.logger import log
CMDLET = Cmdlet(
name="delete-tags",
summary="Remove tags from a Hydrus file.",
usage="del-tags [-hash <sha256>] <tag>[,<tag>...]",
aliases=["del-tag", "del-tags", "delete-tag"],
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
],
details=[
"- Requires a Hydrus file (hash present) or explicit -hash override.",
"- Multiple tags can be comma-separated or space-separated.",
],
)
@register(["del-tag", "del-tags", "delete-tag", "delete-tags"]) # Still needed for backward compatibility
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
has_piped_tag = (result and hasattr(result, '__class__') and
result.__class__.__name__ == 'TagItem' and
hasattr(result, 'tag_name'))
# Check if we have a piped list of TagItems (from @N selection)
has_piped_tag_list = (isinstance(result, list) and result and
hasattr(result[0], '__class__') and
result[0].__class__.__name__ == 'TagItem')
if not args and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Parse -hash override and collect tags from remaining args
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
# Check if first argument is @ syntax (result table selection)
# @5 or @{2,5,8} to delete tags from ResultTable by index
tags_from_at_syntax = []
hash_from_at_syntax = None
if rest and str(rest[0]).startswith("@"):
selector_arg = str(rest[0])
pipe_selector = selector_arg[1:].strip()
# Parse @N or @{N,M,K} syntax
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
# @{2,5,8}
pipe_selector = pipe_selector[1:-1]
try:
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
except ValueError:
log("Invalid selection syntax. Use @2 or @{2,5,8}")
return 1
# Get the last ResultTable from pipeline context
try:
last_table = ctx._LAST_RESULT_TABLE
if last_table:
# Extract tags from selected rows
for idx in indices:
if 1 <= idx <= len(last_table.rows):
# Look for a TagItem in _LAST_RESULT_ITEMS by index
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
item = ctx._LAST_RESULT_ITEMS[idx - 1]
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
tag_name = getattr(item, 'tag_name', None)
if tag_name:
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
tags_from_at_syntax.append(tag_name)
# Also get hash from first item for consistency
if not hash_from_at_syntax:
hash_from_at_syntax = getattr(item, 'hash_hex', None)
if not tags_from_at_syntax:
log(f"No tags found at indices: {indices}")
return 1
else:
log("No ResultTable in pipeline (use @ after running get-tag)")
return 1
except Exception as exc:
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
# If we have a list of TagItems, we want to process ALL of them if no args provided
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
if not args and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
# We will extract tags from the list later
pass
else:
result = result[0]
# Determine tags and hash to use
tags: list[str] = []
hash_hex = None
if tags_from_at_syntax:
# Use tags extracted from @ syntax
tags = tags_from_at_syntax
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
log(f"[delete_tag] Using @ syntax extraction: {len(tags)} tag(s) to delete: {tags}")
elif isinstance(result, list) and result and hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'TagItem':
# Got a list of TagItems (e.g. from delete-tag @1)
tags = [getattr(item, 'tag_name') for item in result if getattr(item, 'tag_name', None)]
# Use hash from first item
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result[0], "hash_hex", None))
elif result and hasattr(result, '__class__') and result.__class__.__name__ == 'TagItem':
# Got a piped TagItem - delete this specific tag
tag_name = getattr(result, 'tag_name', None)
if tag_name:
tags = [tag_name]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
else:
# Traditional mode - parse tag arguments
tags = parse_tag_arguments(rest)
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not tags:
log("No valid tags were provided")
return 1
if not hash_hex:
log("Selected result does not include a hash")
return 1
try:
service_name = hydrus_wrapper.get_tag_service_name(config)
except Exception as exc:
log(f"Failed to resolve tag service: {exc}")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
log(f"[delete_tag] Sending deletion request: hash={hash_hex}, tags={tags}, service={service_name}")
try:
result = client.delete_tags(hash_hex, tags, service_name)
log(f"[delete_tag] Hydrus response: {result}")
except Exception as exc:
log(f"Hydrus del-tag failed: {exc}")
return 1
preview = hash_hex[:12] + ('' if len(hash_hex) > 12 else '')
log(f"Removed {len(tags)} tag(s) from {preview} via '{service_name}'.")
# Re-fetch and emit updated tags after deletion
try:
payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False)
items = payload.get("metadata") if isinstance(payload, dict) else None
if isinstance(items, list) and items:
meta = items[0] if isinstance(items[0], dict) else None
if isinstance(meta, dict):
# Extract tags from updated metadata
from cmdlets.get_tag import _extract_my_tags_from_hydrus_meta, TagItem
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
updated_tags = _extract_my_tags_from_hydrus_meta(meta, service_key, service_name)
# Emit updated tags as TagItem objects
from result_table import ResultTable
table = ResultTable("Tags", max_columns=2)
tag_items = []
for idx, tag_name in enumerate(updated_tags, start=1):
tag_item = TagItem(
tag_name=tag_name,
tag_index=idx,
hash_hex=hash_hex,
source="hydrus",
service_name=service_name,
)
tag_items.append(tag_item)
table.add_result(tag_item)
ctx.emit(tag_item)
# Store items for @ selection in next command (CLI will handle table management)
# Don't call set_last_result_table so we don't pollute history or table context
except Exception as exc:
log(f"Warning: Could not fetch updated tags after deletion: {exc}", file=__import__('sys').stderr)
return 0

82
cmdlets/delete_url.py Normal file
View File

@@ -0,0 +1,82 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="delete-url",
summary="Remove a URL association from a Hydrus file.",
usage="delete-url [-hash <sha256>] <url>",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("<url>", required=True, description="The URL to remove from the file."),
],
details=[
"- Removes the URL from the Hydrus file's known URL list.",
],
)
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
return override_hash, rest
@register(["del-url", "delete-url", "delete_url"]) # aliases
def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
override_hash, rest = _parse_hash_and_rest(args)
if not rest:
log("Requires a URL argument")
return 1
url = str(rest[0] or '').strip()
if not url:
log("Requires a non-empty URL")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
client.delete_url(hash_hex, url)
except Exception as exc:
log(f"Hydrus del-url failed: {exc}")
return 1
log(f"Deleted URL: {url}")
return 0

2633
cmdlets/download_data.py Normal file

File diff suppressed because it is too large Load Diff

1618
cmdlets/get_file.py Normal file

File diff suppressed because it is too large Load Diff

246
cmdlets/get_metadata.py Normal file
View File

@@ -0,0 +1,246 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
import json
import sys
from helper.logger import log
from pathlib import Path
import mimetypes
import os
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Try to determine if this is a local file or Hydrus file
local_path = get_field(result, "target", None) or get_field(result, "path", None)
is_local = False
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
is_local = True
# LOCAL FILE PATH
if is_local and local_path:
try:
file_path = Path(str(local_path))
if file_path.exists() and file_path.is_file():
# Get the hash from result or compute it
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
# If no hash, compute SHA256 of the file
if not hash_hex:
try:
import hashlib
with open(file_path, 'rb') as f:
hash_hex = hashlib.sha256(f.read()).hexdigest()
except Exception:
hash_hex = None
# Get MIME type
mime_type, _ = mimetypes.guess_type(str(file_path))
if not mime_type:
mime_type = "unknown"
# Get file size
try:
file_size = file_path.stat().st_size
except Exception:
file_size = None
# Try to get duration if it's a media file
duration_seconds = None
try:
# Try to use ffprobe if available
import subprocess
result_proc = subprocess.run(
["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
capture_output=True,
text=True,
timeout=5
)
if result_proc.returncode == 0 and result_proc.stdout.strip():
try:
duration_seconds = float(result_proc.stdout.strip())
except ValueError:
pass
except Exception:
pass
# Get format helpers from search module
try:
from .search_file import _format_size as _fmt_size
from .search_file import _format_duration as _fmt_dur
except Exception:
_fmt_size = lambda x: str(x) if x is not None else ""
_fmt_dur = lambda x: str(x) if x is not None else ""
size_label = _fmt_size(file_size) if file_size is not None else ""
dur_label = _fmt_dur(duration_seconds) if duration_seconds is not None else ""
# Get known URLs from sidecar or result
urls = []
sidecar_path = Path(str(file_path) + '.tags')
if sidecar_path.exists():
try:
with open(sidecar_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('known_url:'):
url_value = line.replace('known_url:', '', 1).strip()
if url_value:
urls.append(url_value)
except Exception:
pass
# Fallback to result URLs if not in sidecar
if not urls:
urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
if isinstance(urls_from_result, list):
urls.extend([str(u).strip() for u in urls_from_result if u])
# Display local file metadata
log(f"PATH: {file_path}")
if hash_hex:
log(f"HASH: {hash_hex}")
if mime_type:
log(f"MIME: {mime_type}")
if size_label:
log(f"Size: {size_label}")
if dur_label:
log(f"Duration: {dur_label}")
if urls:
log("URLs:")
for url in urls:
log(f" {url}")
return 0
except Exception as exc:
# Fall through to Hydrus if local file handling fails
pass
# HYDRUS PATH
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_urls=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
return 1
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
log("No metadata found.")
return 0
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
log("No metadata found.")
return 0
mime = meta.get("mime")
size = meta.get("size") or meta.get("file_size")
duration_value = meta.get("duration")
inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
if duration_value is None and isinstance(inner, dict):
duration_value = inner.get("duration")
try:
from .search_file import _format_size as _fmt_size
from .search_file import _format_duration as _fmt_dur
from .search_file import _hydrus_duration_seconds as _dur_secs
except Exception:
_fmt_size = lambda x: str(x) if x is not None else ""
_dur_secs = lambda x: x
_fmt_dur = lambda x: str(x) if x is not None else ""
dur_seconds = _dur_secs(duration_value)
dur_label = _fmt_dur(dur_seconds) if dur_seconds is not None else ""
size_label = _fmt_size(size)
# Display Hydrus file metadata
log(f"PATH: hydrus://file/{hash_hex}")
log(f"Hash: {hash_hex}")
if mime:
log(f"MIME: {mime}")
if dur_label:
log(f"Duration: {dur_label}")
if size_label:
log(f"Size: {size_label}")
urls = meta.get("known_urls") or meta.get("urls")
if isinstance(urls, list) and urls:
log("URLs:")
for url in urls:
try:
text = str(url).strip()
except Exception:
text = ""
if text:
log(f" {text}")
return 0
CMDLET = Cmdlet(
name="get-metadata",
summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
usage="get-metadata [-hash <sha256>]",
aliases=["meta"],
args=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
"- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
"- Automatically detects local vs Hydrus files.",
"- Local file hashes are computed via SHA256 if not already available.",
],
)

87
cmdlets/get_note.py Normal file
View File

@@ -0,0 +1,87 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="get-note",
summary="List notes on a Hydrus file.",
usage="get-note [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Prints notes by service and note name.",
],
)
@register(["get-note", "get-notes", "get_note"]) # aliases
def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
from ._shared import parse_cmdlet_args
parsed = parse_cmdlet_args(args, CMDLET)
override_hash = parsed.get("hash")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
payload = client.fetch_file_metadata(hashes=[hash_hex], include_service_keys_to_tags=False, include_notes=True)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}")
return 1
items = payload.get("metadata") if isinstance(payload, dict) else None
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
notes = {}
if isinstance(meta, dict):
# Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
if notes:
ctx.emit("Notes:")
# Print flattened: service -> (name: text)
if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
for svc, mapping in notes.items():
ctx.emit(f"- {svc}:")
if isinstance(mapping, dict):
for k, v in mapping.items():
ctx.emit(f"{k}: {str(v).strip()}")
elif isinstance(notes, dict):
for k, v in notes.items():
ctx.emit(f"- {k}: {str(v).strip()}")
else:
ctx.emit("No notes found.")
return 0

240
cmdlets/get_relationship.py Normal file
View File

@@ -0,0 +1,240 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from helper.logger import log
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
CMDLET = Cmdlet(
name="get-relationship",
summary="Print Hydrus relationships for the selected file.",
usage="get-relationship [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Lists relationship data as returned by Hydrus.",
],
)
@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"]) # aliases
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
rel = client.get_file_relationships(hash_hex)
except Exception as exc:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
return 1
if not rel:
log("No relationships found.")
return 0
# Extract file_relationships from response
file_rels = rel.get("file_relationships", {})
if not file_rels:
log("No relationships found.")
return 0
# Get the relationships dict for this specific hash
this_file_rels = file_rels.get(hash_hex)
if not this_file_rels:
log("No relationships found.")
return 0
# Extract related hashes from all relationship types
# Keys "0", "1", "3", "8" are relationship type IDs
# Values are lists of hashes
related_hashes = []
for rel_type_id, hash_list in this_file_rels.items():
# Skip non-numeric keys and metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
related_hashes.append(rel_hash)
# Remove duplicates while preserving order
seen = set()
unique_hashes = []
for h in related_hashes:
if h not in seen:
seen.add(h)
unique_hashes.append(h)
if not unique_hashes:
log("No related files found.")
return 0
# Fetch metadata for all related files
try:
metadata_payload = client.fetch_file_metadata(
hashes=unique_hashes,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
return 1
metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None
if not isinstance(metadata_list, list):
log("Hydrus metadata response was not a list", file=sys.stderr)
return 1
# Build metadata map by hash
meta_by_hash: Dict[str, Dict[str, Any]] = {}
for item in metadata_list:
if isinstance(item, dict):
item_hash = normalize_hash(item.get("hash"))
if item_hash:
meta_by_hash[item_hash] = item
# Helper functions for formatting
def _format_duration(seconds: Optional[float]) -> str:
if seconds is None:
return ""
try:
s = int(seconds)
hours = s // 3600
minutes = (s % 3600) // 60
secs = s % 60
if hours > 0:
return f"{hours}:{minutes:02d}:{secs:02d}"
else:
return f"{minutes}:{secs:02d}"
except Exception:
return ""
def _get_title(meta: Dict[str, Any]) -> str:
# Try to extract title from tags
tags_payload = meta.get("tags")
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
storage_tags = service_data.get("storage_tags")
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
if isinstance(tag_list, list):
for tag in tag_list:
tag_str = str(tag).lower()
if tag_str.startswith("title:"):
return str(tag)[6:].strip()
# Fallback to hash prefix
h = meta.get("hash")
return str(h)[:12] if h else "unknown"
def _get_mime_type(meta: Dict[str, Any]) -> str:
mime = meta.get("mime", "")
if not mime:
return ""
# Extract type from mime (e.g., "video/mp4" -> "video")
parts = str(mime).split("/")
return parts[0] if parts else ""
# Print header and separator
log("# | Title | Type | Duration | Size")
log("--+---------------------------+-------+----------+--------")
# Create result objects for each related file
results: List[Any] = []
# Print each related file
for idx, rel_hash in enumerate(unique_hashes, start=1):
meta = meta_by_hash.get(rel_hash)
if not meta:
continue
title = _get_title(meta)
mime_type = _get_mime_type(meta)
# Get duration
duration_value = meta.get("duration")
if duration_value is None and isinstance(meta.get("metadata"), dict):
duration_value = meta["metadata"].get("duration")
duration_str = _format_duration(duration_value)
# Get size
size = meta.get("size") or meta.get("file_size")
size_str = fmt_bytes(size) if size else ""
# Format and print row
title_display = title[:25].ljust(25)
type_display = mime_type[:5].ljust(5)
duration_display = duration_str[:8].ljust(8)
size_display = size_str[:7].ljust(7)
log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}")
# Create result object for pipeline
result_obj = type("RelatedFile", (), {
"hash_hex": rel_hash,
"title": title,
"media_kind": mime_type or "other",
"size": size,
"duration": duration_value,
"known_urls": [],
"annotations": [],
"columns": [
("Title", title),
("Type", mime_type),
("Duration", duration_str),
("Size", size_str),
],
})()
results.append(result_obj)
# Emit results to pipeline
try:
ctx._PIPE_EMITS.extend(results)
except Exception:
pass
return 0

1191
cmdlets/get_tag.py Normal file

File diff suppressed because it is too large Load Diff

90
cmdlets/get_url.py Normal file
View File

@@ -0,0 +1,90 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
CMDLET = Cmdlet(
name="get-url",
summary="List URLs associated with a Hydrus file.",
usage="get-url [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Prints the known URLs for the selected Hydrus file.",
],
)
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
return override_hash, rest
@register(["get-url", "get-urls", "get_url"]) # aliases
def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
override_hash, _ = _parse_hash_and_rest(args)
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}")
return 1
items = payload.get("metadata") if isinstance(payload, dict) else None
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or []
if urls:
ctx.emit("URLs:")
for u in urls:
text = str(u).strip()
if text:
ctx.emit(f"- {text}")
else:
ctx.emit("No URLs found.")
return 0

138
cmdlets/manage_config.py Normal file
View File

@@ -0,0 +1,138 @@
from typing import List, Dict, Any
from ._shared import Cmdlet, CmdletArg
from config import load_config, save_config
CMDLET = Cmdlet(
name=".config",
summary="Manage configuration settings",
usage=".config [key] [value]",
args=[
CmdletArg(
name="key",
description="Configuration key to update (dot-separated)",
required=False
),
CmdletArg(
name="value",
description="New value for the configuration key",
required=False
)
]
)
def flatten_config(config: Dict[str, Any], parent_key: str = '', sep: str = '.') -> List[Dict[str, Any]]:
items = []
for k, v in config.items():
if k.startswith('_'): # Skip internal keys
continue
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(flatten_config(v, new_key, sep=sep))
else:
items.append({
"Key": new_key,
"Value": str(v),
"Type": type(v).__name__,
"_selection_args": [new_key]
})
return items
def set_nested_config(config: Dict[str, Any], key: str, value: str) -> bool:
keys = key.split('.')
d = config
# Navigate to the parent dict
for k in keys[:-1]:
if k not in d or not isinstance(d[k], dict):
d[k] = {}
d = d[k]
last_key = keys[-1]
# Try to preserve type if key exists
if last_key in d:
current_val = d[last_key]
if isinstance(current_val, bool):
if value.lower() in ('true', 'yes', '1', 'on'):
d[last_key] = True
elif value.lower() in ('false', 'no', '0', 'off'):
d[last_key] = False
else:
# Fallback to boolean conversion of string (usually True for non-empty)
# But for config, explicit is better.
print(f"Warning: Could not convert '{value}' to boolean. Using string.")
d[last_key] = value
elif isinstance(current_val, int):
try:
d[last_key] = int(value)
except ValueError:
print(f"Warning: Could not convert '{value}' to int. Using string.")
d[last_key] = value
elif isinstance(current_val, float):
try:
d[last_key] = float(value)
except ValueError:
print(f"Warning: Could not convert '{value}' to float. Using string.")
d[last_key] = value
else:
d[last_key] = value
else:
# New key, try to infer type
if value.lower() in ('true', 'false'):
d[last_key] = (value.lower() == 'true')
elif value.isdigit():
d[last_key] = int(value)
else:
d[last_key] = value
return True
def _run(piped_result: Any, args: List[str], config: Dict[str, Any]) -> int:
# Reload config to ensure we have the latest on disk
# We don't use the passed 'config' because we want to edit the file
# and 'config' might contain runtime objects (like worker manager)
# But load_config() returns a fresh dict from disk (or cache)
# We should use load_config()
current_config = load_config()
# Parse args
# We handle args manually because of the potential for spaces in values
# and the @ expansion logic in CLI.py passing args
if not args:
# List mode
items = flatten_config(current_config)
# Sort by key
items.sort(key=lambda x: x['Key'])
# Emit items for ResultTable
import pipeline as ctx
for item in items:
ctx.emit(item)
return 0
# Update mode
key = args[0]
if len(args) < 2:
print(f"Error: Value required for key '{key}'")
return 1
value = " ".join(args[1:])
# Remove quotes if present
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1]
try:
set_nested_config(current_config, key, value)
save_config(current_config)
print(f"Updated '{key}' to '{value}'")
return 0
except Exception as e:
print(f"Error updating config: {e}")
return 1
CMDLET.exec = _run

916
cmdlets/merge_file.py Normal file
View File

@@ -0,0 +1,916 @@
"""Merge multiple files into a single output file."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, List
from pathlib import Path
import json
import sys
from helper.logger import log
from helper.download import download_media
from models import DownloadOptions
from config import resolve_output_dir
import subprocess as _subprocess
import shutil as _shutil
from ._shared import parse_cmdlet_args
try:
from PyPDF2 import PdfWriter, PdfReader
HAS_PYPDF2 = True
except ImportError:
HAS_PYPDF2 = False
PdfWriter = None
PdfReader = None
try:
from metadata import (
read_tags_from_file,
write_tags_to_file,
dedup_tags_by_namespace,
merge_multiple_tag_lists,
write_tags,
write_metadata
)
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
from . import register
from ._shared import (
Cmdlet,
CmdletArg,
normalize_result_input,
get_pipe_object_path,
get_pipe_object_hash,
)
import models
import pipeline as ctx
def _get_item_value(item: Any, key: str, default: Any = None) -> Any:
"""Helper to read either dict keys or attributes."""
if isinstance(item, dict):
return item.get(key, default)
return getattr(item, key, default)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Merge multiple files into one."""
# Parse help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
delete_after = parsed.get("delete", False)
output_override: Optional[Path] = None
output_arg = parsed.get("output")
if output_arg:
try:
output_override = Path(str(output_arg)).expanduser()
except Exception:
output_override = None
format_spec = parsed.get("format")
if format_spec:
format_spec = str(format_spec).lower().strip()
# Collect files from piped results
# Use normalize_result_input to handle both single items and lists
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
if not files_to_merge:
log("No files provided to merge", file=sys.stderr)
return 1
if len(files_to_merge) < 2:
# Only 1 file - pass it through unchanged
# (merge only happens when multiple files are collected)
item = files_to_merge[0]
ctx.emit(item)
return 0
# Extract file paths and metadata from result objects
source_files: List[Path] = []
source_tags_files: List[Path] = []
source_hashes: List[str] = []
source_urls: List[str] = []
source_tags: List[str] = [] # NEW: collect tags from source files
source_relationships: List[str] = [] # NEW: collect relationships from source files
for item in files_to_merge:
raw_path = get_pipe_object_path(item)
target_path = None
if isinstance(raw_path, Path):
target_path = raw_path
elif isinstance(raw_path, str) and raw_path.strip():
candidate = Path(raw_path).expanduser()
if candidate.exists():
target_path = candidate
# Check for playlist item that needs downloading
if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
try:
playlist_url = item.get('__file_path')
item_idx = int(item['__action'].split(':')[1])
log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
output_dir = resolve_output_dir(config)
opts = DownloadOptions(
url=playlist_url,
output_dir=output_dir,
playlist_items=str(item_idx),
mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
)
res = download_media(opts)
if res and res.path and res.path.exists():
target_path = res.path
log(f"✓ Downloaded: {target_path.name}", flush=True)
except Exception as e:
log(f"Failed to download playlist item: {e}", file=sys.stderr)
if target_path and target_path.exists():
source_files.append(target_path)
# Track the .tags file for this source
tags_file = target_path.with_suffix(target_path.suffix + '.tags')
if tags_file.exists():
source_tags_files.append(tags_file)
# Try to read hash, tags, urls, and relationships from .tags sidecar file
try:
tags_content = tags_file.read_text(encoding='utf-8')
for line in tags_content.split('\n'):
line = line.strip()
if not line:
continue
if line.startswith('hash:'):
hash_value = line[5:].strip()
if hash_value:
source_hashes.append(hash_value)
elif line.startswith('known_url:') or line.startswith('url:'):
# Extract URLs from tags file
url_value = line.split(':', 1)[1].strip() if ':' in line else ''
if url_value and url_value not in source_urls:
source_urls.append(url_value)
elif line.startswith('relationship:'):
# Extract relationships from tags file
rel_value = line.split(':', 1)[1].strip() if ':' in line else ''
if rel_value and rel_value not in source_relationships:
source_relationships.append(rel_value)
else:
# Collect actual tags (not metadata like hash: or known_url:)
source_tags.append(line)
except Exception:
pass
# Extract hash if available in item (as fallback)
hash_value = get_pipe_object_hash(item)
if hash_value and hash_value not in source_hashes:
source_hashes.append(str(hash_value))
# Extract known URLs if available
known_urls = _get_item_value(item, 'known_urls', [])
if isinstance(known_urls, str):
source_urls.append(known_urls)
elif isinstance(known_urls, list):
source_urls.extend(known_urls)
else:
title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown')
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
if len(source_files) < 2:
log("At least 2 valid files required to merge", file=sys.stderr)
return 1
# Detect file types
file_types = set()
for f in source_files:
suffix = f.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_types.add('audio')
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_types.add('video')
elif suffix in {'.pdf'}:
file_types.add('pdf')
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
file_types.add('text')
else:
file_types.add('other')
if len(file_types) > 1 and 'other' not in file_types:
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
log(f"Can only merge files of the same type", file=sys.stderr)
return 1
file_kind = list(file_types)[0] if file_types else 'other'
# Determine output format
output_format = format_spec or 'auto'
if output_format == 'auto':
if file_kind == 'audio':
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
elif file_kind == 'video':
output_format = 'mp4' # Default video codec
elif file_kind == 'pdf':
output_format = 'pdf'
else:
output_format = 'txt'
# Determine output path
if output_override:
if output_override.is_dir():
base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
else:
output_path = output_override
else:
first_file = source_files[0]
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
# Perform merge based on file type
if file_kind == 'audio':
success = _merge_audio(source_files, output_path, output_format)
elif file_kind == 'video':
success = _merge_video(source_files, output_path, output_format)
elif file_kind == 'pdf':
success = _merge_pdf(source_files, output_path)
elif file_kind == 'text':
success = _merge_text(source_files, output_path)
else:
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
return 1
if not success:
log("Merge failed", file=sys.stderr)
return 1
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
# Create .tags sidecar file for the merged output using unified API
tags_path = output_path.with_suffix(output_path.suffix + '.tags')
try:
# Start with title tag
merged_tags = [f"title:{output_path.stem}"]
# Merge tags from source files using metadata API
if source_tags and HAS_METADATA_API:
# Use dedup function to normalize and deduplicate
merged_source_tags = dedup_tags_by_namespace(source_tags)
merged_tags.extend(merged_source_tags)
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
elif source_tags:
# Fallback: simple deduplication if metadata API unavailable
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
# Write merged tags to sidecar file
if HAS_METADATA_API and write_tags_to_file:
# Use unified API for file writing
source_hashes_list = source_hashes if source_hashes else None
source_urls_list = source_urls if source_urls else None
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list)
else:
# Fallback: manual file writing
tags_lines = []
# Add hash first (if available)
if source_hashes:
tags_lines.append(f"hash:{source_hashes[0]}")
# Add regular tags
tags_lines.extend(merged_tags)
# Add known URLs
if source_urls:
for url in source_urls:
tags_lines.append(f"known_url:{url}")
# Add relationships (if available)
if source_relationships:
for rel in source_relationships:
tags_lines.append(f"relationship:{rel}")
with open(tags_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(tags_lines) + '\n')
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
# Also create .metadata file using centralized function
try:
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships)
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
# Emit PipelineItem so the merged file can be piped to next command
try:
# Try to import PipelineItem from downlow module
try:
from downlow import PipelineItem
except ImportError:
# Fallback: create a simple object with the required attributes
class SimpleItem:
def __init__(self, target, title, media_kind, tags=None, known_urls=None):
self.target = target
self.title = title
self.media_kind = media_kind
self.tags = tags or []
self.known_urls = known_urls or []
PipelineItem = SimpleItem
merged_item = PipelineItem(
target=str(output_path),
title=output_path.stem,
media_kind=file_kind,
tags=merged_tags, # Include merged tags
known_urls=source_urls # Include known URLs
)
ctx.emit(merged_item)
except Exception as e:
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
# Still emit a string representation for feedback
ctx.emit(f"Merged: {output_path}")
# Delete source files if requested
if delete_after:
# First delete all .tags files
for tags_file in source_tags_files:
try:
tags_file.unlink()
log(f"Deleted: {tags_file.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
# Then delete all source files
for f in source_files:
try:
f.unlink()
log(f"Deleted: {f.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
return 0
def _sanitize_name(text: str) -> str:
"""Sanitize filename."""
allowed = []
for ch in text:
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
return (" ".join("".join(allowed).split()) or "merged").strip()
def _ext_for_format(fmt: str) -> str:
"""Get file extension for format."""
format_map = {
'mp3': 'mp3',
'm4a': 'm4a',
'aac': 'aac',
'opus': 'opus',
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
'mkv': 'mkv',
'mp4': 'mp4',
'webm': 'webm',
'pdf': 'pdf',
'txt': 'txt',
'auto': 'mka', # Default - MKA for chapters
}
return format_map.get(fmt.lower(), 'mka')
def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
"""Add chapters to an M4A file using mutagen.
Args:
file_path: Path to M4A file
chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
Returns:
True if successful, False otherwise
"""
import logging
logger = logging.getLogger(__name__)
if not chapters:
return True
try:
from mutagen.mp4 import MP4, Atom
from mutagen.mp4._util import Atom as MP4Atom
except ImportError:
logger.warning("[merge-file] mutagen not available for chapter writing")
return False
try:
# Load the MP4 file
audio = MP4(str(file_path))
# Build the chapter atom
# MP4 chapters are stored in a 'chap' atom with specific structure
chapter_data = b''
for i, chapter in enumerate(chapters, 1):
# Each chapter entry: 10-byte header + title
title = chapter.get('title', f'Chapter {i}').encode('utf-8')
start_time_ms = int(chapter.get('start_ms', 0))
# Chapter atom format for M4A:
# (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
# This is complex, so we'll use a simpler atom approach
pass
# Unfortunately, mutagen doesn't have built-in chapter writing for MP4
# Chapter writing requires low-level atom manipulation
# For now, we'll just return and note this limitation
logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
return False
except Exception as e:
logger.warning(f"[merge-file] Error writing chapters: {e}")
return False
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge audio files with chapters based on file boundaries."""
import logging
logger = logging.getLogger(__name__)
ffmpeg_path = _shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
# Step 1: Get duration of each file to calculate chapter timestamps
chapters = []
current_time_ms = 0
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
logger.info(f"[merge-file] Analyzing files for chapters")
for file_path in files:
# Get duration using ffprobe
try:
ffprobe_cmd = [
'ffprobe', '-v', 'error', '-show_entries',
'format=duration', '-print_format',
'default=noprint_wrappers=1:nokey=1', str(file_path)
]
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
if probe_result.returncode == 0 and probe_result.stdout.strip():
try:
duration_sec = float(probe_result.stdout.strip())
except ValueError:
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
duration_sec = 0
else:
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
duration_sec = 0
except Exception as e:
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
duration_sec = 0
# Create chapter entry - use title: tag from metadata if available
title = file_path.stem # Default to filename without extension
if HAS_METADATA_API:
try:
# Try to read tags from .tags sidecar file
tags_file = file_path.with_suffix(file_path.suffix + '.tags')
if tags_file.exists():
tags = read_tags_from_file(tags_file)
if tags:
# Look for title: tag
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith('title:'):
# Extract the title value after the colon
title = tag.split(':', 1)[1].strip()
break
except Exception as e:
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
pass # Fall back to filename
# Convert seconds to HH:MM:SS.mmm format
hours = int(current_time_ms // 3600000)
minutes = int((current_time_ms % 3600000) // 60000)
seconds = int((current_time_ms % 60000) // 1000)
millis = int(current_time_ms % 1000)
chapters.append({
'time_ms': current_time_ms,
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
'title': title,
'duration_sec': duration_sec
})
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
current_time_ms += int(duration_sec * 1000)
# Step 2: Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"
concat_lines = []
for f in files:
# Escape quotes in path
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
# Step 3: Create FFmpeg metadata file with chapters
metadata_file = output.parent / f".metadata_{output.stem}.txt"
metadata_lines = [';FFMETADATA1']
for i, chapter in enumerate(chapters):
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
metadata_lines.append('[CHAPTER]')
metadata_lines.append('TIMEBASE=1/1000')
metadata_lines.append(f'START={chapter["time_ms"]}')
# Calculate end time (start of next chapter or end of file)
if i < len(chapters) - 1:
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
else:
metadata_lines.append(f'END={current_time_ms}')
metadata_lines.append(f'title={chapter["title"]}')
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
logger.info(f"[merge-file] Created {len(chapters)} chapters")
# Step 4: Build FFmpeg command to merge and embed chapters
# Strategy: First merge audio, then add metadata in separate pass
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
# Add threading options for speed
cmd.extend(['-threads', '0']) # Use all available threads
# Audio codec selection for first input
if output_format == 'mp3':
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
elif output_format == 'm4a':
# Use copy if possible (much faster), otherwise re-encode
# Check if inputs are already AAC/M4A to avoid re-encoding
# For now, default to copy if format matches, otherwise re-encode
# But since we are merging potentially different codecs, re-encoding is safer
# To speed up re-encoding, we can use a faster preset or hardware accel if available
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
elif output_format == 'aac':
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'opus':
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
elif output_format == 'mka':
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
# If we want speed, copy is best. If we want compatibility, re-encode.
# Let's try copy first if inputs are same format, but that's hard to detect here.
# Defaulting to copy for MKA as it's a container that supports many codecs
cmd.extend(['-c:a', 'copy'])
else:
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
# Add the output file
cmd.append(str(output))
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
# Run ffmpeg with progress monitoring
try:
from helper.progress import print_progress, print_final_progress
import re
process = _subprocess.Popen(
cmd,
stdout=_subprocess.PIPE,
stderr=_subprocess.PIPE,
text=True,
encoding='utf-8',
errors='replace'
)
# Monitor progress
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
total_duration_sec = current_time_ms / 1000.0
while True:
# Read stderr line by line (ffmpeg writes progress to stderr)
if process.stderr:
line = process.stderr.readline()
if not line and process.poll() is not None:
break
if line:
# Parse time=HH:MM:SS.mm
match = duration_re.search(line)
if match and total_duration_sec > 0:
h, m, s, cs = map(int, match.groups())
current_sec = h * 3600 + m * 60 + s + cs / 100.0
# Calculate speed/bitrate if available (optional)
# For now just show percentage
print_progress(
output.name,
int(current_sec * 1000), # Use ms as "bytes" for progress bar
int(total_duration_sec * 1000),
speed=0
)
else:
break
# Wait for completion
stdout, stderr = process.communicate()
if process.returncode != 0:
log(f"FFmpeg error: {stderr}", file=sys.stderr)
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
except Exception as e:
logger.exception(f"[merge-file] ffmpeg process error: {e}")
raise
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
if output_format == 'mka' or output.suffix.lower() == '.mka':
# MKA/MKV format has native chapter support via FFMetadata
# Re-mux the file with chapters embedded (copy streams, no re-encode)
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
temp_output = output.parent / f".temp_{output.stem}.mka"
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
mkvmerge_path = _shutil.which('mkvmerge')
if mkvmerge_path:
# mkvmerge is the best tool for embedding chapters in Matroska files
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
cmd2 = [
mkvmerge_path, '-o', str(temp_output),
'--chapters', str(metadata_file),
str(output)
]
else:
# Fallback to ffmpeg with proper chapter embedding for Matroska
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
# For Matroska files, the metadata must be provided via -f ffmetadata input
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
]
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
try:
# Run chapter embedding silently (progress handled by worker thread)
_subprocess.run(
cmd2,
capture_output=True,
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
logger.info(f"[merge-file] Chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
try:
temp_output.unlink()
except Exception:
pass
else:
logger.warning(f"[merge-file] Chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
# MP4/M4A format has native chapter support via iTunes metadata atoms
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
cmd2 = [
ffmpeg_path, '-y',
'-i', str(output), # Input: merged audio
'-i', str(metadata_file), # Input: FFMetadata file
'-c:a', 'copy', # Copy audio without re-encoding
'-threads', '0', # Use all threads
'-map', '0', # Map all from first input
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
str(temp_output) # Output
]
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
try:
# Run MP4 chapter embedding silently (progress handled by worker thread)
_subprocess.run(
cmd2,
capture_output=True,
text=True,
stdin=_subprocess.DEVNULL,
timeout=600,
check=False
)
# Replace original with temp if successful
if temp_output.exists() and temp_output.stat().st_size > 0:
try:
import shutil
if output.exists():
output.unlink()
shutil.move(str(temp_output), str(output))
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
logger.info(f"[merge-file] MP4 chapters embedded successfully")
except Exception as e:
logger.warning(f"[merge-file] Could not replace file: {e}")
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
try:
temp_output.unlink()
except Exception:
pass
else:
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
except Exception as e:
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
else:
# For other formats, chapters would require external tools
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
# Clean up temp files
try:
concat_file.unlink()
except Exception:
pass
try:
metadata_file.unlink()
except Exception:
pass
return True
except Exception as e:
log(f"Audio merge error: {e}", file=sys.stderr)
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
return False
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge video files."""
ffmpeg_path = _shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
# Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"
concat_lines = []
for f in files:
safe_path = str(f).replace("'", "'\\''")
concat_lines.append(f"file '{safe_path}'")
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
# Build FFmpeg command for video merge
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
# Video codec selection
if output_format == 'mp4':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
elif output_format == 'mkv':
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
else:
cmd.extend(['-c', 'copy']) # Copy without re-encoding
cmd.append(str(output))
log(f"Merging {len(files)} video files...", file=sys.stderr)
result = _subprocess.run(cmd, capture_output=True, text=True)
# Clean up concat file
try:
concat_file.unlink()
except Exception:
pass
if result.returncode != 0:
stderr = (result.stderr or '').strip()
log(f"FFmpeg error: {stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Video merge error: {e}", file=sys.stderr)
return False
def _merge_text(files: List[Path], output: Path) -> bool:
"""Merge text files."""
try:
with open(output, 'w', encoding='utf-8') as outf:
for i, f in enumerate(files):
if i > 0:
outf.write('\n---\n') # Separator between files
try:
content = f.read_text(encoding='utf-8', errors='replace')
outf.write(content)
except Exception as e:
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
return True
except Exception as e:
log(f"Text merge error: {e}", file=sys.stderr)
return False
def _merge_pdf(files: List[Path], output: Path) -> bool:
"""Merge PDF files."""
if not HAS_PYPDF2:
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
return False
try:
if HAS_PYPDF2:
writer = PdfWriter()
else:
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
return False
for f in files:
try:
reader = PdfReader(f)
for page in reader.pages:
writer.add_page(page)
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
except Exception as e:
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
return False
with open(output, 'wb') as outf:
writer.write(outf)
return True
except Exception as e:
log(f"PDF merge error: {e}", file=sys.stderr)
return False
CMDLET = Cmdlet(
name="merge-file",
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
args=[
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
CmdletArg("-output", description="Override output file path."),
CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
],
details=[
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
"- Audio files merge with minimal quality loss using specified codec.",
"- Video files merge into MP4 or MKV containers.",
"- PDF files merge into a single PDF document.",
"- Text/document files are concatenated.",
"- Output name derived from first file with ' (merged)' suffix.",
"- -delete flag removes all source files after successful merge.",
],
)

335
cmdlets/pipe.py Normal file
View File

@@ -0,0 +1,335 @@
from typing import Any, Dict, Sequence, List, Optional
import sys
import json
import platform
import socket
import re
import subprocess
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log
from result_table import ResultTable
from .get_file import _get_fixed_ipc_pipe
import pipeline as ctx
def _send_ipc_command(command: Dict[str, Any]) -> Optional[Any]:
"""Send a command to the MPV IPC pipe and return the response."""
ipc_pipe = _get_fixed_ipc_pipe()
request = json.dumps(command) + "\n"
try:
if platform.system() == 'Windows':
# Windows named pipe
# Opening in r+b mode to read response
try:
with open(ipc_pipe, 'r+b', buffering=0) as pipe:
pipe.write(request.encode('utf-8'))
pipe.flush()
# Read response
# We'll try to read a line. This might block if MPV is unresponsive.
response_line = pipe.readline()
if response_line:
return json.loads(response_line.decode('utf-8'))
except FileNotFoundError:
return None # MPV not running
except Exception as e:
log(f"Windows IPC Error: {e}", file=sys.stderr)
return None
else:
# Unix socket
af_unix = getattr(socket, 'AF_UNIX', None)
if af_unix is None:
log("Unix sockets not supported on this platform", file=sys.stderr)
return None
try:
sock = socket.socket(af_unix, socket.SOCK_STREAM)
sock.settimeout(2.0)
sock.connect(ipc_pipe)
sock.sendall(request.encode('utf-8'))
# Read response
response_data = b""
while True:
try:
chunk = sock.recv(4096)
if not chunk:
break
response_data += chunk
if b"\n" in chunk:
break
except socket.timeout:
break
sock.close()
if response_data:
# Parse lines, look for response to our request
lines = response_data.decode('utf-8').strip().split('\n')
for line in lines:
try:
resp = json.loads(line)
# If it has 'error' field, it's a response
if 'error' in resp:
return resp
except:
pass
except (FileNotFoundError, ConnectionRefusedError):
return None # MPV not running
except Exception as e:
log(f"Unix IPC Error: {e}", file=sys.stderr)
return None
except Exception as e:
log(f"IPC Error: {e}", file=sys.stderr)
return None
return None
def _get_playlist() -> List[Dict[str, Any]]:
"""Get the current playlist from MPV."""
cmd = {"command": ["get_property", "playlist"], "request_id": 100}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
return resp.get("data", [])
return []
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Manage and play items in the MPV playlist via IPC."""
parsed = parse_cmdlet_args(args, CMDLET)
# Handle positional index argument if provided
index_arg = parsed.get("index")
clear_mode = parsed.get("clear")
list_mode = parsed.get("list")
# Handle piped input (add to playlist)
if result:
# If result is a list of items, add them to playlist
items_to_add = []
if isinstance(result, list):
items_to_add = result
elif isinstance(result, dict):
items_to_add = [result]
added_count = 0
for i, item in enumerate(items_to_add):
# Extract URL/Path
target = None
title = None
if isinstance(item, dict):
target = item.get("target") or item.get("url") or item.get("path")
title = item.get("title") or item.get("name")
elif hasattr(item, "target"):
target = item.target
title = getattr(item, "title", None)
elif isinstance(item, str):
target = item
if target:
# Add to MPV playlist
# We use loadfile with append flag
# Configure 1080p limit for streams (bestvideo<=1080p + bestaudio)
options = {
"ytdl-format": "bestvideo[height<=?1080]+bestaudio/best[height<=?1080]"
}
if title:
options["force-media-title"] = title
cmd = {"command": ["loadfile", target, "append", options], "request_id": 200}
resp = _send_ipc_command(cmd)
if resp is None:
# MPV not running (or died)
# Start MPV with remaining items
_start_mpv(items_to_add[i:])
return 0
elif resp.get("error") == "success":
added_count += 1
if title:
log(f"Queued: {title}")
else:
log(f"Queued: {target}")
if added_count > 0:
# If we added items, we might want to play the first one if nothing is playing?
# For now, just list the playlist
pass
# Get playlist from MPV
items = _get_playlist()
if not items:
log("MPV playlist is empty or MPV is not running.")
return 0
# If index is provided, perform action (Play or Clear)
if index_arg is not None:
try:
# Handle 1-based index
idx = int(index_arg) - 1
if idx < 0 or idx >= len(items):
log(f"Index {index_arg} out of range (1-{len(items)}).")
return 1
item = items[idx]
title = item.get("title") or item.get("filename") or "Unknown"
if clear_mode:
# Remove item
cmd = {"command": ["playlist-remove", idx], "request_id": 101}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
log(f"Removed: {title}")
# Refresh items for listing
items = _get_playlist()
list_mode = True
index_arg = None
else:
log(f"Failed to remove item: {resp.get('error') if resp else 'No response'}")
return 1
else:
# Play item
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
log(f"Playing: {title}")
return 0
else:
log(f"Failed to play item: {resp.get('error') if resp else 'No response'}")
return 1
except ValueError:
log(f"Invalid index: {index_arg}")
return 1
# List items (Default action or after clear)
if list_mode or index_arg is None:
if not items:
log("MPV playlist is empty.")
return 0
table = ResultTable("MPV Playlist")
for i, item in enumerate(items):
is_current = item.get("current", False)
title = item.get("title") or ""
filename = item.get("filename") or ""
# Special handling for memory:// M3U playlists (used to pass titles via IPC)
if "memory://" in filename and "#EXTINF:" in filename:
try:
# Extract title from #EXTINF:-1,Title
# Use regex to find title between #EXTINF:-1, and newline
match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename)
if match:
extracted_title = match.group(1).strip()
if not title or title == "memory://":
title = extracted_title
# Extract actual URL
# Find the first line that looks like a URL and not a directive
lines = filename.splitlines()
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('memory://'):
filename = line
break
except Exception:
pass
# Truncate if too long
if len(title) > 57:
title = title[:57] + "..."
if len(filename) > 27:
filename = filename[:27] + "..."
row = table.add_row()
row.add_column("#", str(i + 1))
row.add_column("Current", "*" if is_current else "")
row.add_column("Title", title)
row.add_column("Filename", filename)
table.set_row_selection_args(i, [str(i + 1)])
table.set_source_command(".pipe")
# Register results with pipeline context so @N selection works
ctx.set_last_result_table_overlay(table, items)
ctx.set_current_stage_table(table)
print(table)
return 0
def _start_mpv(items: List[Any]) -> None:
"""Start MPV with a list of items."""
ipc_pipe = _get_fixed_ipc_pipe()
cmd = ['mpv', f'--input-ipc-server={ipc_pipe}']
cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]')
# Add items
first_title_set = False
for item in items:
target = None
title = None
if isinstance(item, dict):
target = item.get("target") or item.get("url") or item.get("path")
title = item.get("title") or item.get("name")
elif hasattr(item, "target"):
target = item.target
title = getattr(item, "title", None)
elif isinstance(item, str):
target = item
if target:
if not first_title_set and title:
cmd.append(f'--force-media-title={title}')
first_title_set = True
cmd.append(target)
if len(cmd) > 3: # mpv + ipc + format + at least one file
try:
kwargs = {}
if platform.system() == 'Windows':
kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
log(f"Started MPV with {len(cmd)-3} items")
except Exception as e:
log(f"Error starting MPV: {e}", file=sys.stderr)
CMDLET = Cmdlet(
name=".pipe",
aliases=["pipe", "playlist", "queue", "ls-pipe"],
summary="Manage and play items in the MPV playlist via IPC",
usage=".pipe [index] [-clear]",
args=[
CmdletArg(
name="index",
type="int",
description="Index of item to play or clear",
required=False
),
CmdletArg(
name="clear",
type="flag",
description="Remove the selected item from the playlist"
),
CmdletArg(
name="list",
type="flag",
description="List items (default)"
),
],
exec=_run
)

739
cmdlets/screen_shot.py Normal file
View File

@@ -0,0 +1,739 @@
"""Screen-shot cmdlet for capturing screenshots of URLs in a pipeline.
This cmdlet processes files through the pipeline and creates screenshots using
Playwright, marking them as temporary artifacts for cleanup.
"""
from __future__ import annotations
import contextlib
import hashlib
import importlib
import sys
import time
import httpx
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
from urllib.parse import urlsplit, quote, urljoin
from helper.logger import log
from helper.http_client import HTTPClient
from . import register
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input
import models
import pipeline as pipeline_context
# ============================================================================
# CMDLET Metadata Declaration
# ============================================================================
# ============================================================================
# Playwright & Screenshot Dependencies
# ============================================================================
try:
from playwright.sync_api import (
TimeoutError as PlaywrightTimeoutError,
ViewportSize,
sync_playwright,
)
except Exception as exc:
raise RuntimeError(
"playwright is required for screenshot capture; install with 'pip install playwright'"
) from exc
try:
from config import resolve_output_dir
except ImportError:
try:
_parent_dir = str(Path(__file__).parent.parent)
if _parent_dir not in sys.path:
sys.path.insert(0, _parent_dir)
from config import resolve_output_dir
except ImportError:
resolve_output_dir = None
# ============================================================================
# Screenshot Constants & Configuration
# ============================================================================
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
ARCHIVE_TIMEOUT = 30.0
class ScreenshotError(RuntimeError):
"""Raised when screenshot capture or upload fails."""
@dataclass(slots=True)
class ScreenshotOptions:
"""Options controlling screenshot capture and post-processing."""
url: str
output_dir: Path
output_path: Optional[Path] = None
full_page: bool = True
headless: bool = True
wait_after_load: float = 2.0
wait_for_article: bool = False
replace_video_posters: bool = True
tags: Sequence[str] = ()
archive: bool = False
archive_timeout: float = ARCHIVE_TIMEOUT
known_urls: Sequence[str] = ()
output_format: Optional[str] = None
prefer_platform_target: bool = False
target_selectors: Optional[Sequence[str]] = None
selector_timeout_ms: int = 10_000
@dataclass(slots=True)
class ScreenshotResult:
"""Details about the captured screenshot."""
path: Path
url: str
tags_applied: List[str]
archive_urls: List[str]
known_urls: List[str]
warnings: List[str] = field(default_factory=list)
# ============================================================================
# Helper Functions
# ============================================================================
def _ensure_directory(path: Path) -> None:
"""Ensure directory exists."""
if not isinstance(path, Path):
path = Path(path)
path.mkdir(parents=True, exist_ok=True)
def _unique_path(path: Path) -> Path:
"""Get unique path by appending numbers if file exists."""
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
counter = 1
while True:
new_path = parent / f"{stem}_{counter}{suffix}"
if not new_path.exists():
return new_path
counter += 1
def _unique_preserve_order(items: Sequence[str]) -> List[str]:
"""Remove duplicates while preserving order."""
seen = set()
result = []
for item in items:
if item not in seen:
seen.add(item)
result.append(item)
return result
def _slugify_url(url: str) -> str:
"""Convert URL to filesystem-safe slug."""
parsed = urlsplit(url)
candidate = f"{parsed.netloc}{parsed.path}"
if parsed.query:
candidate += f"?{parsed.query}"
slug = "".join(char if char.isalnum() else "-" for char in candidate.lower())
slug = slug.strip("-") or "screenshot"
return slug[:100]
def _normalise_format(fmt: Optional[str]) -> str:
"""Normalize output format to valid values."""
if not fmt:
return "png"
value = fmt.strip().lower()
if value in {"jpg", "jpeg"}:
return "jpeg"
if value in {"png", "pdf"}:
return value
return "png"
def _format_suffix(fmt: str) -> str:
"""Get file suffix for format."""
if fmt == "jpeg":
return ".jpg"
return f".{fmt}"
def _selectors_for_url(url: str) -> List[str]:
"""Return a list of likely content selectors for known platforms."""
u = url.lower()
sels: List[str] = []
# Twitter/X
if "twitter.com" in u or "x.com" in u:
sels.extend([
"article[role='article']",
"div[data-testid='tweet']",
"div[data-testid='cellInnerDiv'] article",
])
# Instagram
if "instagram.com" in u:
sels.extend([
"article[role='presentation']",
"article[role='article']",
"div[role='dialog'] article",
"section main article",
])
# Reddit
if "reddit.com" in u:
sels.extend([
"shreddit-post",
"div[data-testid='post-container']",
"div[data-click-id='background']",
"article",
])
# Rumble (video post)
if "rumble.com" in u:
sels.extend([
"rumble-player, iframe.rumble",
"div.video-item--main",
"main article",
])
return sels or ["article"]
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
"""Best-effort page tweaks for popular platforms before capture."""
u = url.lower()
def _try_click_texts(texts: List[str], passes: int = 2, per_timeout: int = 700) -> int:
clicks = 0
for _ in range(max(1, passes)):
for t in texts:
try:
page.locator(f"text=/{t}/i").first.click(timeout=per_timeout)
clicks += 1
except PlaywrightTimeoutError:
pass
except Exception:
pass
time.sleep(0.1)
return clicks
# Dismiss common cookie/consent prompts
_try_click_texts(["accept", "i agree", "agree", "got it", "allow all", "consent"])
# Platform-specific expansions
if "reddit.com" in u:
_try_click_texts(["see more", "read more", "show more", "more"])
if ("twitter.com" in u) or ("x.com" in u):
_try_click_texts(["show more", "more"])
if "instagram.com" in u:
_try_click_texts(["more", "see more"])
if "tiktok.com" in u:
_try_click_texts(["more", "see more"])
if ("facebook.com" in u) or ("fb.watch" in u):
_try_click_texts(["see more", "show more", "more"])
if "rumble.com" in u:
_try_click_texts(["accept", "agree", "close"])
def _submit_wayback(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Internet Archive Wayback Machine."""
encoded = quote(url, safe="/:?=&")
with HTTPClient() as client:
response = client.get(f"https://web.archive.org/save/{encoded}")
response.raise_for_status()
content_location = response.headers.get("Content-Location")
if content_location:
return urljoin("https://web.archive.org", content_location)
return str(response.url)
def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Archive.today."""
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
response = client.get(f"https://archive.today/submit/?url={encoded}")
response.raise_for_status()
final = str(response.url)
if final and ("archive.today" in final or "archive.ph" in final):
return final
return None
def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
"""Submit URL to Archive.ph."""
encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
response = client.get(f"https://archive.ph/submit/?url={encoded}")
response.raise_for_status()
final = str(response.url)
if final and "archive.ph" in final:
return final
return None
def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
"""Submit URL to all available archive services."""
archives: List[str] = []
warnings: List[str] = []
for submitter, label in (
(_submit_wayback, "wayback"),
(_submit_archive_today, "archive.today"),
(_submit_archive_ph, "archive.ph"),
):
try:
log(f"Archiving to {label}...", flush=True)
archived = submitter(url, timeout)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 429:
warnings.append(f"archive {label} rate limited (HTTP 429)")
log(f"{label}: Rate limited (HTTP 429)", flush=True)
else:
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
except httpx.RequestError as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: Connection error: {exc}", flush=True)
except Exception as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: {exc}", flush=True)
else:
if archived:
archives.append(archived)
log(f"{label}: Success - {archived}", flush=True)
else:
log(f"{label}: No archive link returned", flush=True)
return archives, warnings
def _prepare_output_path(options: ScreenshotOptions) -> Path:
"""Prepare and validate output path for screenshot."""
_ensure_directory(options.output_dir)
explicit_format = _normalise_format(options.output_format) if options.output_format else None
inferred_format: Optional[str] = None
if options.output_path is not None:
path = options.output_path
if not path.is_absolute():
path = options.output_dir / path
suffix = path.suffix.lower()
if suffix:
inferred_format = _normalise_format(suffix[1:])
else:
stamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"{_slugify_url(options.url)}_{stamp}"
path = options.output_dir / filename
final_format = explicit_format or inferred_format or "png"
if not path.suffix:
path = path.with_suffix(_format_suffix(final_format))
else:
current_suffix = path.suffix.lower()
expected = _format_suffix(final_format)
if current_suffix != expected:
path = path.with_suffix(expected)
options.output_format = final_format
return _unique_path(path)
def _capture_with_playwright(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
"""Capture screenshot using Playwright."""
playwright = None
browser = None
context = None
try:
log("Starting Playwright...", flush=True)
playwright = sync_playwright().start()
log("Launching Chromium browser...", flush=True)
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
if format_name == "pdf" and not options.headless:
warnings.append("pdf output requires headless Chromium; overriding headless mode")
browser = playwright.chromium.launch(
headless=headless,
args=["--disable-blink-features=AutomationControlled"],
)
log("Creating browser context...", flush=True)
context = browser.new_context(
user_agent=USER_AGENT,
viewport=DEFAULT_VIEWPORT,
ignore_https_errors=True,
)
page = context.new_page()
log(f"Navigating to {options.url}...", flush=True)
try:
page.goto(options.url, timeout=90_000, wait_until="domcontentloaded")
log("Page loaded successfully", flush=True)
except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
log("Waiting for article element...", flush=True)
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
if (v.poster) {
const img = document.createElement('img');
img.src = v.poster;
img.style.maxWidth = '100%';
img.style.borderRadius = '12px';
v.replaceWith(img);
}
});
"""
)
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
try:
_platform_preprocess(options.url, page, warnings)
except Exception:
pass
selectors = list(options.target_selectors or [])
if not selectors:
selectors = _selectors_for_url(options.url)
for sel in selectors:
try:
log(f"Trying selector: {sel}", flush=True)
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
continue
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
else:
log(f"Capturing full page to {destination}...", flush=True)
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90
if options.full_page:
page.screenshot(full_page=True, **screenshot_kwargs)
else:
article = page.query_selector("article")
if article is not None:
article_kwargs = dict(screenshot_kwargs)
article_kwargs.pop("full_page", None)
article.screenshot(**article_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
except Exception as exc:
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
finally:
log("Cleaning up browser resources...", flush=True)
with contextlib.suppress(Exception):
if context is not None:
context.close()
with contextlib.suppress(Exception):
if browser is not None:
browser.close()
with contextlib.suppress(Exception):
if playwright is not None:
playwright.stop()
log("Cleanup complete", flush=True)
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
"""Capture a screenshot for the given options."""
destination = _prepare_output_path(options)
warnings: List[str] = []
_capture_with_playwright(options, destination, warnings)
known_urls = _unique_preserve_order([options.url, *options.known_urls])
archive_urls: List[str] = []
if options.archive:
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
archive_urls.extend(archives)
warnings.extend(archive_warnings)
if archives:
known_urls = _unique_preserve_order([*known_urls, *archives])
applied_tags = _unique_preserve_order(list(tag for tag in options.tags if tag.strip()))
return ScreenshotResult(
path=destination,
url=options.url,
tags_applied=applied_tags,
archive_urls=archive_urls,
known_urls=known_urls,
warnings=warnings,
)
# ============================================================================
# Main Cmdlet Function
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of URLs in the pipeline.
Accepts:
- Single result object (dict or PipeObject) with 'file_path' field
- List of result objects to screenshot each
- Direct URL as string
Emits PipeObject-formatted results for each screenshot with:
- action: 'cmdlet:screen-shot'
- is_temp: True (screenshots are temporary artifacts)
- parent_id: hash of the original file/URL
Screenshots are created using Playwright and marked as temporary
so they can be cleaned up later with the cleanup cmdlet.
"""
from ._shared import parse_cmdlet_args
# Help check
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# ========================================================================
# ARGUMENT PARSING
# ========================================================================
parsed = parse_cmdlet_args(args, CMDLET)
format_value = parsed.get("format")
storage_value = parsed.get("storage")
selector_arg = parsed.get("selector")
selectors = [selector_arg] if selector_arg else []
archive_enabled = parsed.get("archive", False)
# Positional URL argument (if provided)
url_arg = parsed.get("url")
positional_urls = [str(url_arg)] if url_arg else []
# ========================================================================
# INPUT PROCESSING - Extract URLs from pipeline or command arguments
# ========================================================================
piped_results = normalize_result_input(result)
urls_to_process = []
# Extract URLs from piped results
if piped_results:
for item in piped_results:
url = None
if isinstance(item, dict):
url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target')
else:
url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None)
if url:
urls_to_process.append(str(url))
# Use positional arguments if no pipeline input
if not urls_to_process and positional_urls:
urls_to_process = positional_urls
if not urls_to_process:
log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr)
return 1
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
# ========================================================================
screenshot_dir: Optional[Path] = None
# Primary: Use --storage if provided (highest priority)
if storage_value:
try:
screenshot_dir = SharedArgs.resolve_storage(storage_value)
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
# Secondary: Use config-based resolver ONLY if --storage not provided
if screenshot_dir is None and resolve_output_dir is not None:
try:
screenshot_dir = resolve_output_dir(config)
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
except Exception:
pass
# Tertiary: Use config outfile ONLY if neither --storage nor resolver worked
if screenshot_dir is None and config and config.get("outfile"):
try:
screenshot_dir = Path(config["outfile"]).expanduser()
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
_ensure_directory(screenshot_dir)
# ========================================================================
# PREPARE SCREENSHOT OPTIONS
# ========================================================================
format_name = _normalise_format(format_value)
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
target_selectors = filtered_selectors if filtered_selectors else None
all_emitted = []
exit_code = 0
# ========================================================================
# PROCESS URLs AND CAPTURE SCREENSHOTS
# ========================================================================
for url in urls_to_process:
# Validate URL format
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
continue
try:
# Create screenshot with provided options
options = ScreenshotOptions(
url=url,
output_dir=screenshot_dir,
output_format=format_name,
archive=archive_enabled,
target_selectors=target_selectors,
prefer_platform_target=False,
wait_for_article=False,
full_page=True,
)
screenshot_result = _capture_screenshot(options)
# Log results and warnings
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
if screenshot_result.archive_urls:
log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True)
for warning in screenshot_result.warnings:
log(f"Warning: {warning}", flush=True)
# Compute hash of screenshot file
screenshot_hash = None
try:
with open(screenshot_result.path, 'rb') as f:
screenshot_hash = hashlib.sha256(f.read()).hexdigest()
except Exception:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
pipe_obj = create_pipe_object_result(
source='screenshot',
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
title=f"Screenshot: {Path(screenshot_result.path).name}",
file_hash=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
extra={
'source_url': url,
'archive_urls': screenshot_result.archive_urls,
'known_urls': screenshot_result.known_urls,
'target': str(screenshot_result.path), # Explicit target for add-file
}
)
# Emit the result so downstream cmdlets (like add-file) can use it
pipeline_context.emit(pipe_obj)
all_emitted.append(pipe_obj)
except ScreenshotError as exc:
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
exit_code = 1
except Exception as exc:
log(f"Unexpected error taking screenshot of {url}: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
exit_code = 1
if not all_emitted:
log(f"No screenshots were successfully captured", file=sys.stderr)
return 1
# Log completion message
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
return exit_code
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a screenshot of a URL or file and mark as temporary artifact",
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
aliases=["screenshot", "ss"],
args=[
CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"),
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.ARCHIVE, # Use shared archive argument
SharedArgs.STORAGE, # Use shared storage argument
],
details=[
"Take screenshots of URLs with optional archiving and element targeting.",
"Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.",
"",
"Arguments:",
" url URL to capture (optional if piped from pipeline)",
" --format FORMAT Output format: png (default), jpeg, or pdf",
" --selector SEL CSS selector for capturing specific element",
" --archive, -arch Archive URL to Wayback/Archive.today/Archive.ph",
" --storage LOCATION Storage destination: hydrus, local, 0x0, debrid, or ftp",
"",
"Examples:",
" download-data https://example.com | screen-shot --storage local",
" download-data https://twitter.com/user/status/123 | screen-shot --selector 'article[role=article]' --storage hydrus --archive",
" screen-shot https://example.com --format jpeg --storage 0x0 --archive",
]
)

351
cmdlets/search_file.py Normal file
View File

@@ -0,0 +1,351 @@
"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
from fnmatch import fnmatchcase
from pathlib import Path
from dataclasses import dataclass, field
import json
import os
import sys
from helper.logger import log, debug
import shutil
import subprocess
from helper.file_storage import FileStorage
from helper.search_provider import get_provider, list_providers, SearchResult
from metadata import import_pending_sidecars
from . import register
from ._shared import Cmdlet, CmdletArg
import models
import pipeline as ctx
# Optional dependencies
try:
import mutagen # type: ignore
except ImportError: # pragma: no cover
mutagen = None # type: ignore
try:
from config import get_hydrus_url, resolve_output_dir
except Exception: # pragma: no cover
get_hydrus_url = None # type: ignore
resolve_output_dir = None # type: ignore
try:
from helper.hydrus import HydrusClient, HydrusRequestError
except ImportError: # pragma: no cover
HydrusClient = None # type: ignore
HydrusRequestError = RuntimeError # type: ignore
try:
from helper.utils import sha256_file
except ImportError: # pragma: no cover
sha256_file = None # type: ignore
try:
from helper.utils_constant import mime_maps
except ImportError: # pragma: no cover
mime_maps = {} # type: ignore
# ============================================================================
# Data Classes (from helper/search.py)
# ============================================================================
@dataclass(slots=True)
class SearchRecord:
path: str
size_bytes: int | None = None
duration_seconds: str | None = None
tags: str | None = None
hash_hex: str | None = None
def as_dict(self) -> dict[str, str]:
payload: dict[str, str] = {"path": self.path}
if self.size_bytes is not None:
payload["size"] = str(self.size_bytes)
if self.duration_seconds:
payload["duration"] = self.duration_seconds
if self.tags:
payload["tags"] = self.tags
if self.hash_hex:
payload["hash"] = self.hash_hex
return payload
@dataclass
class ResultItem:
origin: str
title: str
detail: str
annotations: List[str]
target: str
media_kind: str = "other"
hash_hex: Optional[str] = None
columns: List[tuple[str, str]] = field(default_factory=list)
tag_summary: Optional[str] = None
duration_seconds: Optional[float] = None
size_bytes: Optional[int] = None
full_metadata: Optional[Dict[str, Any]] = None
tags: Optional[set[str]] = field(default_factory=set)
relationships: Optional[List[str]] = field(default_factory=list)
known_urls: Optional[List[str]] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"title": self.title,
}
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
payload["origin"] = self.origin
payload["target"] = self.target
payload["media_kind"] = self.media_kind
# Always include full_metadata if present (needed by download-data, etc)
# This is NOT for display, but for downstream processing
if self.full_metadata:
payload["full_metadata"] = self.full_metadata
# Include columns if defined (result renderer will use these for display)
if self.columns:
payload["columns"] = list(self.columns)
else:
# If no columns, include the detail for backwards compatibility
payload["detail"] = self.detail
payload["annotations"] = list(self.annotations)
# Include optional fields
if self.hash_hex:
payload["hash"] = self.hash_hex
if self.tag_summary:
payload["tags"] = self.tag_summary
if self.tags:
payload["tags_set"] = list(self.tags)
if self.relationships:
payload["relationships"] = self.relationships
if self.known_urls:
payload["known_urls"] = self.known_urls
return payload
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
if origin_value not in STORAGE_ORIGINS:
return payload
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
store_label = payload.get("origin") or payload.get("source") or origin_value
normalized = dict(payload)
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
return normalized
CMDLET = Cmdlet(
name="search-file",
summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
args=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
],
details=[
"Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
"Use -provider to search a specific source, or -storage to search file backends",
"Filter results by: tag, size, type, duration",
"Results can be piped to other commands",
"Examples:",
"search-file foo # Search all file backends",
"search-file -provider libgen 'python programming' # Search LibGen books",
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
"search-file 'music' -provider soulseek # Search Soulseek P2P",
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
"search-file song -storage hydrus -type audio # Search only Hydrus audio",
"search-file movie -tag action -provider debrid # Debrid with filters",
],
)
@register(["search-file", "search"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
args_list = [str(arg) for arg in (args or [])]
# Parse arguments
query = ""
tag_filters: List[str] = []
size_filter: Optional[Tuple[str, int]] = None
duration_filter: Optional[Tuple[str, float]] = None
type_filter: Optional[str] = None
storage_backend: Optional[str] = None
provider_name: Optional[str] = None
limit = 100
# Simple argument parsing
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
provider_name = args_list[i + 1].lower()
i += 2
elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
storage_backend = args_list[i + 1].lower()
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower()
i += 2
elif not query and not arg.startswith("-"):
query = arg
i += 1
else:
i += 1
if not query:
log("Provide a search query", file=sys.stderr)
return 1
# Initialize worker for this search command
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
db = LocalLibraryDB(library_root)
db.insert_worker(
worker_id,
"search",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
try:
results_list = []
# Try to search using provider (libgen, soulseek, debrid, openlibrary)
if provider_name:
debug(f"[search_file] Attempting provider search with: {provider_name}")
provider = get_provider(provider_name, config)
if not provider:
log(f"Provider '{provider_name}' not available", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
debug(f"[search_file] Provider loaded, calling search with query: {query}")
search_result = provider.search(query, limit=limit)
debug(f"[search_file] Provider search returned {len(search_result)} results")
for item in search_result:
item_dict = item.to_dict()
results_list.append(item_dict)
ctx.emit(item_dict)
debug(f"[search_file] Emitted {len(results_list)} results")
# Write results to worker stdout
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
from helper.file_storage import FileStorage
storage = FileStorage(config=config or {})
backend_to_search = storage_backend or None
if backend_to_search:
# Check if requested backend is available
if backend_to_search == "hydrus":
from helper.hydrus import is_hydrus_available
if not is_hydrus_available(config or {}):
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
if not storage.supports_search(backend_to_search):
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
results = storage[backend_to_search].search(query, limit=limit)
else:
# Search all searchable backends, but skip hydrus if unavailable
from helper.hydrus import is_hydrus_available
hydrus_available = is_hydrus_available(config or {})
all_results = []
for backend_name in storage.list_searchable_backends():
# Skip hydrus if not available
if backend_name == "hydrus" and not hydrus_available:
continue
try:
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
# Emit results and collect for workers table
if results:
for item in results:
if isinstance(item, dict):
normalized = _ensure_storage_columns(item)
results_list.append(normalized)
ctx.emit(normalized)
elif isinstance(item, ResultItem):
item_dict = item.to_dict()
results_list.append(item_dict)
ctx.emit(item_dict)
else:
item_dict = {"title": str(item)}
results_list.append(item_dict)
ctx.emit(item_dict)
# Write results to worker stdout
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
finally:
# Always close the database connection
try:
db.close()
except Exception:
pass

325
cmdlets/worker.py Normal file
View File

@@ -0,0 +1,325 @@
"""Worker cmdlet: Display workers table in ResultTable format."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List
import json
import sys
from datetime import datetime, timezone
from . import register
from ._shared import Cmdlet, CmdletArg
import pipeline as ctx
from helper.logger import log
from config import get_local_storage_path
CMDLET = Cmdlet(
name=".worker",
summary="Display workers table in result table format.",
usage=".worker [status] [-limit N] [@N]",
args=[
CmdletArg("status", description="Filter by status: running, completed, error (default: all)"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("@N", description="Select worker by index (1-based) and display full logs"),
],
details=[
"- Shows all background worker tasks and their output",
"- Can filter by status: running, completed, error",
"- Search result stdout is captured from each worker",
"- Use @N to select a specific worker by index and display its full logs",
"Examples:",
".worker # Show all workers",
".worker running # Show running workers only",
".worker completed -limit 50 # Show 50 most recent completed workers",
".worker @3 # Show full logs for the 3rd worker",
".worker running @2 # Show full logs for the 2nd running worker",
],
)
@register([".worker", "worker", "workers"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Display workers table or show detailed logs for a specific worker."""
args_list = [str(arg) for arg in (args or [])]
selection_indices = ctx.get_last_selection()
selection_requested = bool(selection_indices) and isinstance(result, list) and len(result) > 0
# Parse arguments for list view
status_filter: str | None = None
limit = 100
clear_requested = False
worker_id_arg: str | None = None
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = max(1, int(args_list[i + 1]))
except ValueError:
limit = 100
i += 2
elif low in {"-id", "--id"} and i + 1 < len(args_list):
worker_id_arg = args_list[i + 1]
i += 2
elif low in {"-clear", "--clear"}:
clear_requested = True
i += 1
elif low in {"running", "completed", "error", "cancelled"}:
status_filter = low
i += 1
elif not arg.startswith("-"):
status_filter = low
i += 1
else:
i += 1
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
try:
from helper.local_library import LocalLibraryDB
db: LocalLibraryDB | None = None
try:
db = LocalLibraryDB(library_root)
if clear_requested:
count = db.clear_finished_workers()
log(f"Cleared {count} finished workers.")
return 0
if worker_id_arg:
worker = db.get_worker(worker_id_arg)
if worker:
events = []
try:
wid = worker.get("worker_id")
if wid and hasattr(db, "get_worker_events"):
events = db.get_worker_events(wid)
except Exception:
pass
_emit_worker_detail(worker, events)
return 0
else:
log(f"Worker not found: {worker_id_arg}", file=sys.stderr)
return 1
if selection_requested:
return _render_worker_selection(db, result)
return _render_worker_list(db, status_filter, limit)
finally:
if db:
db.close()
except Exception as exc:
log(f"Workers query failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
def _render_worker_list(db, status_filter: str | None, limit: int) -> int:
workers = db.get_all_workers(limit=limit)
if status_filter:
workers = [w for w in workers if str(w.get("status", "")).lower() == status_filter]
if not workers:
log("No workers found", file=sys.stderr)
return 0
for worker in workers:
started = worker.get("started_at", "")
ended = worker.get("completed_at", worker.get("last_updated", ""))
date_str = _extract_date(started)
start_time = _format_event_timestamp(started)
end_time = _format_event_timestamp(ended)
item = {
"columns": [
("Status", worker.get("status", "")),
("Pipe", _summarize_pipe(worker.get("pipe"))),
("Date", date_str),
("Start Time", start_time),
("End Time", end_time),
],
"__worker_metadata": worker,
"_selection_args": ["-id", worker.get("worker_id")]
}
ctx.emit(item)
return 0
def _render_worker_selection(db, selected_items: Any) -> int:
if not isinstance(selected_items, list):
log("Selection payload missing", file=sys.stderr)
return 1
emitted = False
for item in selected_items:
worker = _resolve_worker_record(db, item)
if not worker:
continue
events = []
try:
events = db.get_worker_events(worker.get("worker_id")) if hasattr(db, "get_worker_events") else []
except Exception:
events = []
_emit_worker_detail(worker, events)
emitted = True
if not emitted:
log("Selected rows no longer exist", file=sys.stderr)
return 1
return 0
def _resolve_worker_record(db, payload: Any) -> Dict[str, Any] | None:
if not isinstance(payload, dict):
return None
worker_data = payload.get("__worker_metadata")
worker_id = None
if isinstance(worker_data, dict):
worker_id = worker_data.get("worker_id")
else:
worker_id = payload.get("worker_id")
worker_data = None
if worker_id:
fresh = db.get_worker(worker_id)
if fresh:
return fresh
return worker_data if isinstance(worker_data, dict) else None
def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> None:
# Parse stdout logs into rows
stdout_content = worker.get("stdout", "") or ""
# Try to parse lines if they follow the standard log format
# Format: YYYY-MM-DD HH:MM:SS - name - level - message
lines = stdout_content.splitlines()
for line in lines:
line = line.strip()
if not line:
continue
# Default values
timestamp = ""
level = "INFO"
message = line
# Try to parse standard format
try:
parts = line.split(" - ", 3)
if len(parts) >= 4:
# Full format
ts_str, _, lvl, msg = parts
timestamp = _format_event_timestamp(ts_str)
level = lvl
message = msg
elif len(parts) == 3:
# Missing name or level
ts_str, lvl, msg = parts
timestamp = _format_event_timestamp(ts_str)
level = lvl
message = msg
except Exception:
pass
item = {
"columns": [
("Time", timestamp),
("Level", level),
("Message", message)
]
}
ctx.emit(item)
# Also emit events if available and not redundant
# (For now, just focusing on stdout logs as requested)
def _summarize_pipe(pipe_value: Any, limit: int = 60) -> str:
text = str(pipe_value or "").strip()
if not text:
return "(none)"
return text if len(text) <= limit else text[: limit - 3] + "..."
def _format_event_timestamp(raw_timestamp: Any) -> str:
dt = _parse_to_local(raw_timestamp)
if dt:
return dt.strftime("%H:%M:%S")
if not raw_timestamp:
return "--:--:--"
text = str(raw_timestamp)
if "T" in text:
time_part = text.split("T", 1)[1]
elif " " in text:
time_part = text.split(" ", 1)[1]
else:
time_part = text
return time_part[:8] if len(time_part) >= 8 else time_part
def _parse_to_local(timestamp_str: Any) -> datetime | None:
if not timestamp_str:
return None
text = str(timestamp_str).strip()
if not text:
return None
try:
# Check for T separator (Python isoformat - Local time)
if 'T' in text:
return datetime.fromisoformat(text)
# Check for space separator (SQLite CURRENT_TIMESTAMP - UTC)
# Format: YYYY-MM-DD HH:MM:SS
if ' ' in text:
# Assume UTC
dt = datetime.strptime(text, "%Y-%m-%d %H:%M:%S")
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone() # Convert to local
except Exception:
pass
return None
def _extract_date(raw_timestamp: Any) -> str:
dt = _parse_to_local(raw_timestamp)
if dt:
return dt.strftime("%m-%d-%y")
# Fallback
if not raw_timestamp:
return ""
text = str(raw_timestamp)
# Extract YYYY-MM-DD part
date_part = ""
if "T" in text:
date_part = text.split("T", 1)[0]
elif " " in text:
date_part = text.split(" ", 1)[0]
else:
date_part = text
# Convert YYYY-MM-DD to MM-DD-YY
try:
parts = date_part.split("-")
if len(parts) == 3:
year, month, day = parts
return f"{month}-{day}-{year[2:]}"
except Exception:
pass
return date_part