2700 lines
88 KiB
Python
2700 lines
88 KiB
Python
""" """
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import hashlib
|
|
import json
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
from collections.abc import Iterable as IterableABC
|
|
|
|
from SYS.logger import log
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
|
|
from dataclasses import dataclass, field
|
|
from SYS import models
|
|
|
|
|
|
@dataclass
|
|
class CmdletArg:
|
|
"""Represents a single cmdlet argument with optional enum choices."""
|
|
|
|
name: str
|
|
"""Argument name, e.g., '-path' or 'location'"""
|
|
type: str = "string"
|
|
"""Argument type: 'string', 'int', 'flag', 'enum', etc."""
|
|
required: bool = False
|
|
"""Whether this argument is required"""
|
|
|
|
description: str = ""
|
|
"""Human-readable description of the argument"""
|
|
choices: List[str] = field(default_factory=list)
|
|
"""Optional list of valid choices for enum/autocomplete, e.g., ['hydrus', 'local', '0x0.st']"""
|
|
alias: str = ""
|
|
"""Optional alias for the argument name, e.g., 'loc' for 'location'"""
|
|
handler: Optional[Any] = None
|
|
"""Optional handler function/callable for processing this argument's value"""
|
|
variadic: bool = False
|
|
"""Whether this argument accepts multiple values (consumes remaining positional args)"""
|
|
usage: str = ""
|
|
"""dsf"""
|
|
requires_db: bool = False
|
|
"""Whether this argument requires the local DB/library root to be configured."""
|
|
|
|
# Query-mapping support:
|
|
# Some cmdlets use a unified `-query` string. When configured, individual args
|
|
# can be populated from fields inside `-query` (e.g., -query "hash:<sha256>").
|
|
query_key: Optional[str] = None
|
|
"""Field name inside -query that maps to this argument (e.g., 'hash')."""
|
|
query_aliases: List[str] = field(default_factory=list)
|
|
"""Additional field names inside -query that map to this argument."""
|
|
query_only: bool = False
|
|
"""When True, do not accept a dedicated CLI flag for this arg; only map from -query."""
|
|
|
|
def resolve(self, value: Any) -> Any:
|
|
"""Resolve/process the argument value using the handler if available.
|
|
|
|
Args:
|
|
value: The raw argument value to process
|
|
|
|
Returns:
|
|
Processed value from handler, or original value if no handler
|
|
|
|
Example:
|
|
# For STORAGE arg with a handler
|
|
storage_path = SharedArgs.STORAGE.resolve('local') # Returns Path.home() / "Videos"
|
|
"""
|
|
if self.handler is not None and callable(self.handler):
|
|
return self.handler(value)
|
|
return value
|
|
|
|
def to_flags(self) -> tuple[str, ...]:
|
|
"""Generate all flag variants (short and long form) for this argument.
|
|
|
|
Returns a tuple of all valid flag forms for this argument, including:
|
|
- Long form with double dash: --name
|
|
- Single dash multi-char form: -name (for convenience)
|
|
- Short form with single dash: -alias (if alias exists)
|
|
|
|
For flags, also generates negation forms:
|
|
- --no-name, -name (negation of multi-char form)
|
|
- --no-name, -nalias (negation with alias)
|
|
|
|
Returns:
|
|
Tuple of flag strings, e.g., ('--archive', '-archive', '-arch')
|
|
or for flags: ('--archive', '-archive', '-arch', '--no-archive', '-narch')
|
|
|
|
Example:
|
|
archive_flags = SharedArgs.ARCHIVE.to_flags()
|
|
# Returns: ('--archive', '-archive', '-arch', '--no-archive', '-narch')
|
|
|
|
storage_flags = SharedArgs.STORAGE.to_flags()
|
|
# Returns: ('--storage', '-storage', '-s')
|
|
"""
|
|
flags = [
|
|
f"--{self.name}",
|
|
f"-{self.name}"
|
|
] # Both double-dash and single-dash variants
|
|
|
|
# Add short form if alias exists
|
|
if self.alias:
|
|
flags.append(f"-{self.alias}")
|
|
|
|
# Add negation forms for flag type
|
|
if self.type == "flag":
|
|
flags.append(f"--no-{self.name}")
|
|
flags.append(f"-no{self.name}") # Single-dash negation variant
|
|
if self.alias:
|
|
flags.append(f"-n{self.alias}")
|
|
|
|
return tuple(flags)
|
|
|
|
|
|
def QueryArg(
|
|
name: str,
|
|
*,
|
|
key: Optional[str] = None,
|
|
aliases: Optional[Sequence[str]] = None,
|
|
type: str = "string",
|
|
required: bool = False,
|
|
description: str = "",
|
|
choices: Optional[Sequence[str]] = None,
|
|
handler: Optional[Any] = None,
|
|
query_only: bool = True,
|
|
) -> CmdletArg:
|
|
"""Create an argument that can be populated from `-query` fields.
|
|
|
|
By default, this does NOT create a dedicated flag (query_only=True). This is
|
|
useful for deprecating bloat flags like `-hash` while still making `hash:` a
|
|
first-class, documented, reusable field.
|
|
"""
|
|
return CmdletArg(
|
|
name=str(name),
|
|
type=str(type or "string"),
|
|
required=bool(required),
|
|
description=str(description or ""),
|
|
choices=list(choices or []),
|
|
handler=handler,
|
|
query_key=str(key or name).strip().lower()
|
|
if str(key or name).strip() else None,
|
|
query_aliases=[
|
|
str(a).strip().lower() for a in (aliases or []) if str(a).strip()
|
|
],
|
|
query_only=bool(query_only),
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
|
|
# ============================================================================
|
|
|
|
|
|
class SharedArgs:
|
|
"""Registry of shared CmdletArg definitions used across multiple cmdlet.
|
|
|
|
This class provides a centralized location for common arguments so they're
|
|
defined once and used consistently everywhere. Reduces duplication and ensures
|
|
all cmdlet handle the same arguments identically.
|
|
|
|
Example:
|
|
CMDLET = Cmdlet(
|
|
name="my-cmdlet",
|
|
summary="Does something",
|
|
usage="my-cmdlet",
|
|
args=[
|
|
SharedArgs.QUERY, # Use predefined shared arg (e.g., -query "hash:<sha256>")
|
|
SharedArgs.LOCATION, # Use another shared arg
|
|
CmdletArg(...), # Mix with custom args
|
|
]
|
|
)
|
|
"""
|
|
|
|
# NOTE: This project no longer exposes a dedicated -hash flag.
|
|
# Use SharedArgs.QUERY with `hash:` syntax instead (e.g., -query "hash:<sha256>").
|
|
|
|
STORE = CmdletArg(
|
|
name="store",
|
|
type="enum",
|
|
choices=[], # Dynamically populated via get_store_choices()
|
|
description="Selects store",
|
|
query_key="store",
|
|
)
|
|
|
|
PATH = CmdletArg(
|
|
name="path",
|
|
type="string",
|
|
choices=[], # Dynamically populated via get_store_choices()
|
|
description="selects store",
|
|
)
|
|
|
|
URL = CmdletArg(
|
|
name="url",
|
|
type="string",
|
|
description="http parser",
|
|
)
|
|
PROVIDER = CmdletArg(
|
|
name="provider",
|
|
type="string",
|
|
description="selects provider",
|
|
)
|
|
|
|
@staticmethod
|
|
def get_store_choices(config: Optional[Dict[str, Any]] = None) -> List[str]:
|
|
"""Get list of available store backend names.
|
|
|
|
This method dynamically discovers all configured storage backends
|
|
instead of using a static list. Should be called when building
|
|
autocomplete choices or validating store names.
|
|
|
|
Args:
|
|
config: Optional config dict. If not provided, will try to load from config module.
|
|
|
|
Returns:
|
|
List of backend names (e.g., ['default', 'test', 'home', 'work'])
|
|
|
|
Example:
|
|
SharedArgs.STORE.choices = SharedArgs.get_store_choices(config)
|
|
"""
|
|
try:
|
|
# Use the non-instantiating helper so autocomplete doesn't trigger backend init.
|
|
from Store.registry import list_configured_backend_names
|
|
|
|
# If no config provided, try to load it
|
|
if config is None:
|
|
try:
|
|
from SYS.config import load_config
|
|
config = load_config()
|
|
except Exception:
|
|
return []
|
|
|
|
return list_configured_backend_names(config)
|
|
except Exception:
|
|
# Fallback to empty list if FileStorage isn't available
|
|
return []
|
|
|
|
LOCATION = CmdletArg(
|
|
"location",
|
|
type="enum",
|
|
choices=["hydrus",
|
|
"0x0",
|
|
"local"],
|
|
required=True,
|
|
description="Destination location",
|
|
)
|
|
|
|
DELETE = CmdletArg(
|
|
"delete",
|
|
type="flag",
|
|
description="Delete the file and its .tag after successful operation.",
|
|
)
|
|
|
|
# Metadata arguments
|
|
ARTIST = CmdletArg(
|
|
"artist",
|
|
type="string",
|
|
description="Filter by artist name (case-insensitive, partial match).",
|
|
)
|
|
|
|
ALBUM = CmdletArg(
|
|
"album",
|
|
type="string",
|
|
description="Filter by album name (case-insensitive, partial match).",
|
|
)
|
|
|
|
TRACK = CmdletArg(
|
|
"track",
|
|
type="string",
|
|
description="Filter by track title (case-insensitive, partial match).",
|
|
)
|
|
|
|
# Library/Search arguments
|
|
LIBRARY = CmdletArg(
|
|
"library",
|
|
type="string",
|
|
choices=["hydrus",
|
|
"local",
|
|
"soulseek",
|
|
"libgen",
|
|
"ftp"],
|
|
description="Search library or source location.",
|
|
)
|
|
|
|
TIMEOUT = CmdletArg(
|
|
"timeout",
|
|
type="integer",
|
|
description="Search or operation timeout in seconds."
|
|
)
|
|
|
|
LIMIT = CmdletArg(
|
|
"limit",
|
|
type="integer",
|
|
description="Maximum number of results to return."
|
|
)
|
|
|
|
# Path/File arguments
|
|
PATH = CmdletArg("path", type="string", description="File or directory path.")
|
|
|
|
OUTPUT = CmdletArg("output", type="string", description="Output file path.")
|
|
|
|
# Generic arguments
|
|
QUERY = CmdletArg(
|
|
"query",
|
|
type="string",
|
|
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>}).",
|
|
)
|
|
|
|
REASON = CmdletArg(
|
|
"reason",
|
|
type="string",
|
|
description="Reason or explanation for the operation."
|
|
)
|
|
|
|
ARCHIVE = CmdletArg(
|
|
"archive",
|
|
type="flag",
|
|
description=
|
|
"Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).",
|
|
alias="arch",
|
|
)
|
|
|
|
@staticmethod
|
|
def resolve_storage(
|
|
storage_value: Optional[str],
|
|
default: Optional[Path] = None
|
|
) -> Path:
|
|
"""Resolve a storage location name to a filesystem Path.
|
|
|
|
Maps storage identifiers (hydrus, local, ftp) to their actual
|
|
filesystem paths. This is the single source of truth for storage location resolution.
|
|
Note: 0x0.st is now accessed via file providers (-provider 0x0), not storage.
|
|
|
|
Args:
|
|
storage_value: One of 'hydrus', 'local', 'ftp', or None
|
|
default: Path to return if storage_value is None (defaults to Videos)
|
|
|
|
Returns:
|
|
Resolved Path object for the storage location
|
|
|
|
Raises:
|
|
ValueError: If storage_value is not a recognized storage type
|
|
|
|
Example:
|
|
# In a cmdlet:
|
|
storage_path = SharedArgs.resolve_storage(parsed.storage)
|
|
|
|
# With defaults:
|
|
path = SharedArgs.resolve_storage(None) # Returns home/Videos
|
|
path = SharedArgs.resolve_storage('local') # Returns home/Videos
|
|
path = SharedArgs.resolve_storage('hydrus') # Returns home/.hydrus/client_files
|
|
"""
|
|
storage_map = {
|
|
"local": Path.home() / "Videos",
|
|
"hydrus": Path.home() / ".hydrus" / "client_files",
|
|
"ftp": Path.home() / "FTP",
|
|
"matrix": Path.home() / "Matrix", # Placeholder, not used for upload path
|
|
}
|
|
|
|
if storage_value is None:
|
|
return default or (Path.home() / "Videos")
|
|
|
|
storage_lower = storage_value.lower()
|
|
if storage_lower not in storage_map:
|
|
raise ValueError(
|
|
f"Unknown storage location '{storage_value}'. "
|
|
f"Must be one of: {', '.join(storage_map.keys())}"
|
|
)
|
|
|
|
return storage_map[storage_lower]
|
|
|
|
@classmethod
|
|
def get(cls, name: str) -> Optional[CmdletArg]:
|
|
"""Get a shared argument by name.
|
|
|
|
Args:
|
|
name: Uppercase name like 'HASH', 'LOCATION', etc.
|
|
|
|
Returns:
|
|
CmdletArg if found, None otherwise
|
|
|
|
Example:
|
|
arg = SharedArgs.get('QUERY') # Returns SharedArgs.QUERY
|
|
"""
|
|
try:
|
|
return getattr(cls, name.upper())
|
|
except AttributeError:
|
|
return None
|
|
|
|
|
|
@dataclass
|
|
class Cmdlet:
|
|
"""Represents a cmdlet with metadata and arguments.
|
|
|
|
Example:
|
|
cmd = Cmdlet(
|
|
name="add-file",
|
|
summary="Upload a media file",
|
|
usage="add-file <location>",
|
|
aliases=["add-file-alias"],
|
|
args=[
|
|
CmdletArg("location", required=True, description="Destination location"),
|
|
CmdletArg("-delete", type="flag", description="Delete after upload"),
|
|
],
|
|
details=[
|
|
"- This is a detail line",
|
|
"- Another detail",
|
|
]
|
|
)
|
|
|
|
# Access properties
|
|
log(cmd.name) # "add-file"
|
|
log(cmd.summary) # "Upload a media file"
|
|
log(cmd.args[0].name) # "location"
|
|
"""
|
|
|
|
name: str
|
|
""""""
|
|
summary: str
|
|
"""One-line summary of the cmdlet"""
|
|
usage: str
|
|
"""Usage string, e.g., 'add-file <location> [-delete]'"""
|
|
alias: List[str] = field(default_factory=list)
|
|
"""List of aliases for this cmdlet, e.g., ['add', 'add-f']"""
|
|
arg: List[CmdletArg] = field(default_factory=list)
|
|
"""List of arguments accepted by this cmdlet"""
|
|
detail: List[str] = field(default_factory=list)
|
|
"""Detailed explanation lines (for help text)"""
|
|
examples: List[str] = field(default_factory=list)
|
|
"""Example invocations shown in `.help`."""
|
|
# Execution function: func(result, args, config) -> int
|
|
exec: Optional[Callable[[Any,
|
|
Sequence[str],
|
|
Dict[str,
|
|
Any]],
|
|
int]] = field(default=None)
|
|
|
|
def _collect_names(self) -> List[str]:
|
|
"""Collect primary name plus aliases, de-duplicated and normalized."""
|
|
names: List[str] = []
|
|
if self.name:
|
|
names.append(self.name)
|
|
for alias in self.alias or []:
|
|
if alias:
|
|
names.append(alias)
|
|
for alias in getattr(self, "aliases", None) or []:
|
|
if alias:
|
|
names.append(alias)
|
|
|
|
seen: Set[str] = set()
|
|
deduped: List[str] = []
|
|
for name in names:
|
|
key = name.replace("_", "-").lower()
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
deduped.append(name)
|
|
return deduped
|
|
|
|
def register(self) -> "Cmdlet":
|
|
"""Register this cmdlet's exec under its name and aliases."""
|
|
if not callable(self.exec):
|
|
return self
|
|
try:
|
|
from . import (
|
|
register_callable as _register_callable,
|
|
) # Local import to avoid circular import cost
|
|
except Exception:
|
|
return self
|
|
|
|
names = self._collect_names()
|
|
if not names:
|
|
return self
|
|
|
|
_register_callable(names, self.exec)
|
|
return self
|
|
|
|
def get_flags(self, arg_name: str) -> set[str]:
|
|
"""Generate -name and --name flag variants for an argument.
|
|
|
|
Args:
|
|
arg_name: The argument name (e.g., 'library', 'tag', 'size')
|
|
|
|
Returns:
|
|
Set containing both single-dash and double-dash variants
|
|
(e.g., {'-library', '--library'})
|
|
|
|
Example:
|
|
if low in cmdlet.get_flags('library'):
|
|
# handle library flag
|
|
"""
|
|
return {f"-{arg_name}",
|
|
f"--{arg_name}"}
|
|
|
|
def build_flag_registry(self) -> Dict[str, set[str]]:
|
|
"""Build a registry of all flag variants for this cmdlet's arguments.
|
|
|
|
Automatically generates all -name and --name variants for each argument.
|
|
Useful for parsing command-line arguments without hardcoding flags.
|
|
|
|
Returns:
|
|
Dict mapping argument names to their flag sets
|
|
(e.g., {'library': {'-library', '--library'}, 'tag': {'-tag', '--tag'}})
|
|
|
|
Example:
|
|
flags = cmdlet.build_flag_registry()
|
|
|
|
if low in flags.get('library', set()):
|
|
# handle library
|
|
elif low in flags.get('tag', set()):
|
|
# handle tag
|
|
"""
|
|
return {
|
|
arg.name: self.get_flags(arg.name)
|
|
for arg in self.arg
|
|
}
|
|
|
|
|
|
# Tag groups cache (loaded from JSON config file)
|
|
_TAG_GROUPS_CACHE: Optional[Dict[str, List[str]]] = None
|
|
_TAG_GROUPS_MTIME: Optional[float] = None
|
|
|
|
# Path to tag groups configuration (set by caller or lazily discovered)
|
|
TAG_GROUPS_PATH: Optional[Path] = None
|
|
|
|
|
|
def set_tag_groups_path(path: Path) -> None:
|
|
"""Set the path to the tag groups JSON file."""
|
|
global TAG_GROUPS_PATH
|
|
TAG_GROUPS_PATH = path
|
|
|
|
|
|
def parse_cmdlet_args(args: Sequence[str],
|
|
cmdlet_spec: Dict[str,
|
|
Any] | Cmdlet) -> Dict[str,
|
|
Any]:
|
|
"""Parse command-line arguments based on cmdlet specification.
|
|
|
|
Extracts argument values from command-line tokens using the argument names
|
|
and types defined in the cmdlet metadata. Automatically supports single-dash
|
|
and double-dash variants of flag names. Arguments without dashes in definition
|
|
are treated as positional arguments.
|
|
|
|
Args:
|
|
args: Command-line arguments (e.g., ["-path", "/home/file.txt", "-foo", "bar"])
|
|
cmdlet_spec: Cmdlet metadata dict with "args" key containing list of arg specs,
|
|
or a Cmdlet object. Each arg spec should have at least "name" key.
|
|
Argument names can be defined with or without prefixes.
|
|
|
|
Returns:
|
|
Dict mapping canonical arg names to their parsed values. If an arg is not
|
|
provided, it will not be in the dict. Lookup will normalize prefixes.
|
|
|
|
Example:
|
|
cmdlet = {
|
|
"args": [
|
|
{"name": "path", "type": "string"}, # Positional - matches bare value or -path/--path
|
|
{"name": "count", "type": "int"} # Positional - matches bare value or -count/--count
|
|
]
|
|
}
|
|
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
|
|
# result = {"path": "value1", "count": "5"}
|
|
"""
|
|
result: Dict[str,
|
|
Any] = {}
|
|
|
|
# Only accept Cmdlet objects
|
|
if not isinstance(cmdlet_spec, Cmdlet):
|
|
raise TypeError(f"Expected Cmdlet, got {type(cmdlet_spec).__name__}")
|
|
|
|
# Build arg specs from cmdlet
|
|
arg_specs: List[CmdletArg] = cmdlet_spec.arg
|
|
positional_args: List[CmdletArg] = [] # args without prefix in definition
|
|
flagged_args: List[CmdletArg] = [] # args with prefix in definition
|
|
query_mapped_args: List[CmdletArg] = []
|
|
|
|
arg_spec_map: Dict[str,
|
|
str] = {} # prefix variant -> canonical name (without prefix)
|
|
|
|
for spec in arg_specs:
|
|
name = spec.name
|
|
if not name:
|
|
continue
|
|
|
|
# Track args that can be populated from -query.
|
|
try:
|
|
if getattr(spec, "query_key", None):
|
|
query_mapped_args.append(spec)
|
|
except Exception:
|
|
pass
|
|
|
|
name_str = str(name)
|
|
canonical_name = name_str.lstrip("-")
|
|
|
|
# Query-only args do not register dedicated flags/positionals.
|
|
try:
|
|
if bool(getattr(spec, "query_only", False)):
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
# Determine if this is positional (no dashes in original definition)
|
|
if "-" not in name_str:
|
|
positional_args.append(spec)
|
|
else:
|
|
flagged_args.append(spec)
|
|
|
|
# Register all prefix variants for flagged lookup
|
|
arg_spec_map[canonical_name.lower()] = canonical_name # bare name
|
|
arg_spec_map[f"-{canonical_name}".lower()] = canonical_name # single dash
|
|
arg_spec_map[f"--{canonical_name}".lower()] = canonical_name # double dash
|
|
|
|
# Parse arguments
|
|
i = 0
|
|
positional_index = 0 # Track which positional arg we're on
|
|
|
|
while i < len(args):
|
|
token = str(args[i])
|
|
token_lower = token.lower()
|
|
|
|
# Legacy guidance: -hash/--hash was removed in favor of -query "hash:...".
|
|
# However, some cmdlets may explicitly re-introduce a -hash flag.
|
|
if token_lower in {"-hash",
|
|
"--hash"} and token_lower not in arg_spec_map:
|
|
try:
|
|
log(
|
|
'Legacy flag -hash is no longer supported. Use: -query "hash:<sha256>"',
|
|
file=sys.stderr,
|
|
)
|
|
except Exception:
|
|
pass
|
|
i += 1
|
|
continue
|
|
|
|
# Check if this token is a known flagged argument
|
|
if token_lower in arg_spec_map:
|
|
canonical_name = arg_spec_map[token_lower]
|
|
spec = next(
|
|
(
|
|
s for s in arg_specs
|
|
if str(s.name).lstrip("-").lower() == canonical_name.lower()
|
|
),
|
|
None,
|
|
)
|
|
|
|
# Check if it's a flag type (which doesn't consume next value, just marks presence)
|
|
is_flag = spec and spec.type == "flag"
|
|
|
|
if is_flag:
|
|
# For flags, just mark presence without consuming next token
|
|
result[canonical_name] = True
|
|
i += 1
|
|
else:
|
|
# For non-flags, consume next token as the value
|
|
if i + 1 < len(args) and not str(args[i + 1]).startswith("-"):
|
|
value = args[i + 1]
|
|
|
|
# Check if variadic
|
|
is_variadic = spec and spec.variadic
|
|
if is_variadic:
|
|
if canonical_name not in result:
|
|
result[canonical_name] = []
|
|
elif not isinstance(result[canonical_name], list):
|
|
result[canonical_name] = [result[canonical_name]]
|
|
result[canonical_name].append(value)
|
|
else:
|
|
result[canonical_name] = value
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
# Otherwise treat as positional if we have positional args remaining
|
|
elif positional_index < len(positional_args):
|
|
positional_spec = positional_args[positional_index]
|
|
canonical_name = str(positional_spec.name).lstrip("-")
|
|
is_variadic = positional_spec.variadic
|
|
|
|
if is_variadic:
|
|
# For variadic args, append to a list
|
|
if canonical_name not in result:
|
|
result[canonical_name] = []
|
|
elif not isinstance(result[canonical_name], list):
|
|
# Should not happen if logic is correct, but safety check
|
|
result[canonical_name] = [result[canonical_name]]
|
|
|
|
result[canonical_name].append(token)
|
|
# Do not increment positional_index so subsequent tokens also match this arg
|
|
# Note: Variadic args should typically be the last positional argument
|
|
i += 1
|
|
else:
|
|
result[canonical_name] = token
|
|
positional_index += 1
|
|
i += 1
|
|
else:
|
|
# Unknown token, skip it
|
|
i += 1
|
|
|
|
# Populate query-mapped args from the unified -query string.
|
|
try:
|
|
raw_query = result.get("query")
|
|
except Exception:
|
|
raw_query = None
|
|
|
|
if query_mapped_args and raw_query is not None:
|
|
try:
|
|
from SYS.cli_syntax import parse_query as _parse_query
|
|
|
|
parsed_query = _parse_query(str(raw_query))
|
|
fields = parsed_query.get("fields",
|
|
{}) if isinstance(parsed_query,
|
|
dict) else {}
|
|
norm_fields = (
|
|
{
|
|
str(k).strip().lower(): v
|
|
for k, v in fields.items()
|
|
} if isinstance(fields,
|
|
dict) else {}
|
|
)
|
|
except Exception:
|
|
norm_fields = {}
|
|
|
|
for spec in query_mapped_args:
|
|
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
|
|
if not canonical_name:
|
|
continue
|
|
# Do not override explicit flags.
|
|
if canonical_name in result and result.get(canonical_name) not in (None,
|
|
""):
|
|
continue
|
|
try:
|
|
key = str(getattr(spec, "query_key", "") or "").strip().lower()
|
|
aliases = getattr(spec, "query_aliases", None)
|
|
alias_list = [
|
|
str(a).strip().lower() for a in (aliases or []) if str(a).strip()
|
|
]
|
|
except Exception:
|
|
key = ""
|
|
alias_list = []
|
|
candidates = [k for k in [key, canonical_name] + alias_list if k]
|
|
val = None
|
|
for k in candidates:
|
|
if k in norm_fields:
|
|
val = norm_fields.get(k)
|
|
break
|
|
if val is None:
|
|
continue
|
|
try:
|
|
result[canonical_name] = spec.resolve(val)
|
|
except Exception:
|
|
result[canonical_name] = val
|
|
|
|
return result
|
|
|
|
|
|
def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
|
|
"""Normalize a hash string to lowercase, or return None if invalid.
|
|
|
|
Args:
|
|
hash_hex: String that should be a hex hash
|
|
|
|
Returns:
|
|
Lowercase hash string, or None if input is not a string or is empty
|
|
"""
|
|
if not isinstance(hash_hex, str):
|
|
return None
|
|
text = hash_hex.strip().lower()
|
|
if not text:
|
|
return None
|
|
if len(text) != 64:
|
|
return None
|
|
if not all(ch in "0123456789abcdef" for ch in text):
|
|
return None
|
|
return text
|
|
|
|
|
|
def parse_hash_query(query: Optional[str]) -> List[str]:
|
|
"""Parse a unified query string for `hash:` into normalized SHA256 hashes.
|
|
|
|
Supported examples:
|
|
- hash:<h1>
|
|
- hash:<h1>,<h2>,<h3>
|
|
- Hash: <h1> <h2> <h3>
|
|
- hash:{<h1>, <h2>}
|
|
|
|
Returns:
|
|
List of unique normalized 64-hex SHA256 hashes.
|
|
"""
|
|
import re
|
|
|
|
q = str(query or "").strip()
|
|
if not q:
|
|
return []
|
|
|
|
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
|
|
if not m:
|
|
return []
|
|
|
|
rest = (m.group(1) or "").strip()
|
|
if rest.startswith("{") and rest.endswith("}"):
|
|
rest = rest[1:-1].strip()
|
|
if rest.startswith("[") and rest.endswith("]"):
|
|
rest = rest[1:-1].strip()
|
|
|
|
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
|
|
out: List[str] = []
|
|
for part in raw_parts:
|
|
h = normalize_hash(part)
|
|
if not h:
|
|
continue
|
|
if h not in out:
|
|
out.append(h)
|
|
return out
|
|
|
|
|
|
def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
|
|
"""Parse `hash:` query and require exactly one hash."""
|
|
hashes = parse_hash_query(query)
|
|
if len(hashes) != 1:
|
|
return None
|
|
return hashes[0]
|
|
|
|
|
|
def get_hash_for_operation(
|
|
override_hash: Optional[str],
|
|
result: Any,
|
|
field_name: str = "hash"
|
|
) -> Optional[str]:
|
|
"""Get normalized hash from override or result object, consolidating common pattern.
|
|
|
|
Eliminates repeated pattern: normalize_hash(override) if override else normalize_hash(get_field(result, ...))
|
|
|
|
Args:
|
|
override_hash: Hash passed as command argument (takes precedence)
|
|
result: Object containing hash field (fallback)
|
|
field_name: Name of hash field in result object (default: "hash")
|
|
|
|
Returns:
|
|
Normalized hash string, or None if neither override nor result provides valid hash
|
|
"""
|
|
if override_hash:
|
|
return normalize_hash(override_hash)
|
|
hash_value = (
|
|
get_field(result,
|
|
field_name) or getattr(result,
|
|
field_name,
|
|
None) or getattr(result,
|
|
"hash",
|
|
None)
|
|
)
|
|
return normalize_hash(hash_value)
|
|
|
|
|
|
def fetch_hydrus_metadata(
|
|
config: Any,
|
|
hash_hex: str,
|
|
*,
|
|
store_name: Optional[str] = None,
|
|
hydrus_client: Any = None,
|
|
**kwargs,
|
|
) -> tuple[Optional[Dict[str,
|
|
Any]],
|
|
Optional[int]]:
|
|
"""Fetch metadata from Hydrus for a given hash, consolidating common fetch pattern.
|
|
|
|
Eliminates repeated boilerplate: client initialization, error handling, metadata extraction.
|
|
|
|
Args:
|
|
config: Configuration object (passed to hydrus_wrapper.get_client)
|
|
hash_hex: File hash to fetch metadata for
|
|
store_name: Optional Hydrus store name. When provided, do not fall back to a global/default Hydrus client.
|
|
hydrus_client: Optional explicit Hydrus client. When provided, takes precedence.
|
|
**kwargs: Additional arguments to pass to client.fetch_file_metadata()
|
|
Common: include_service_keys_to_tags, include_notes, include_file_url, include_duration, etc.
|
|
|
|
Returns:
|
|
Tuple of (metadata_dict, error_code)
|
|
- metadata_dict: Dict from Hydrus (first item in metadata list) or None if unavailable
|
|
- error_code: 0 on success, 1 on any error (suitable for returning from cmdlet execute())
|
|
"""
|
|
from API import HydrusNetwork
|
|
|
|
hydrus_wrapper = HydrusNetwork
|
|
|
|
client = hydrus_client
|
|
if client is None:
|
|
if store_name:
|
|
# Store specified: do not fall back to a global/default Hydrus client.
|
|
try:
|
|
from Store import Store
|
|
|
|
store = Store(config)
|
|
backend = store[str(store_name)]
|
|
candidate = getattr(backend, "_client", None)
|
|
if candidate is not None and hasattr(candidate, "fetch_file_metadata"):
|
|
client = candidate
|
|
except Exception as exc:
|
|
log(f"Hydrus client unavailable for store '{store_name}': {exc}")
|
|
client = None
|
|
if client is None:
|
|
log(f"Hydrus client unavailable for store '{store_name}'")
|
|
return None, 1
|
|
else:
|
|
try:
|
|
client = hydrus_wrapper.get_client(config)
|
|
except Exception as exc:
|
|
log(f"Hydrus client unavailable: {exc}")
|
|
return None, 1
|
|
|
|
if client is None:
|
|
log("Hydrus client unavailable")
|
|
return None, 1
|
|
|
|
try:
|
|
payload = client.fetch_file_metadata(hashes=[hash_hex], **kwargs)
|
|
except Exception as exc:
|
|
log(f"Hydrus metadata fetch failed: {exc}")
|
|
return None, 1
|
|
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
meta = items[0] if (
|
|
isinstance(items,
|
|
list) and items and isinstance(items[0],
|
|
dict)
|
|
) else None
|
|
|
|
return meta, 0
|
|
|
|
|
|
def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
|
"""Extract a field from either a dict or object with fallback default.
|
|
|
|
Handles both dict.get(field) and getattr(obj, field) access patterns.
|
|
Also handles lists by accessing the first element.
|
|
For PipeObjects, checks the extra field as well.
|
|
Used throughout cmdlet to uniformly access fields from mixed types.
|
|
|
|
Args:
|
|
obj: Dict, object, or list to extract from
|
|
field: Field name to retrieve
|
|
default: Value to return if field not found (default: None)
|
|
|
|
Returns:
|
|
Field value if found, otherwise the default value
|
|
|
|
Examples:
|
|
get_field(result, "hash") # From dict or object
|
|
get_field(result, "table", "unknown") # With default
|
|
"""
|
|
# Handle lists by accessing the first element
|
|
if isinstance(obj, list):
|
|
if not obj:
|
|
return default
|
|
obj = obj[0]
|
|
|
|
if isinstance(obj, dict):
|
|
return obj.get(field, default)
|
|
else:
|
|
# Try direct attribute access first
|
|
value = getattr(obj, field, None)
|
|
if value is not None:
|
|
return value
|
|
|
|
# For PipeObjects, also check the extra field
|
|
extra_val = getattr(obj, "extra", None)
|
|
if isinstance(extra_val, dict):
|
|
return extra_val.get(field, default)
|
|
|
|
return default
|
|
|
|
|
|
def should_show_help(args: Sequence[str]) -> bool:
|
|
"""Check if help flag was passed in arguments.
|
|
|
|
Consolidates repeated pattern of checking for help flags across cmdlet.
|
|
|
|
Args:
|
|
args: Command arguments to check
|
|
|
|
Returns:
|
|
True if any help flag is present (-?, /?, --help, -h, help, --cmdlet)
|
|
|
|
Examples:
|
|
if should_show_help(args):
|
|
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
|
return 0
|
|
"""
|
|
try:
|
|
return any(
|
|
str(a).lower() in {"-?",
|
|
"/?",
|
|
"--help",
|
|
"-h",
|
|
"help",
|
|
"--cmdlet"} for a in args
|
|
)
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def looks_like_hash(candidate: Optional[str]) -> bool:
|
|
"""Check if a string looks like a SHA256 hash (64 hex chars).
|
|
|
|
Args:
|
|
candidate: String to test
|
|
|
|
Returns:
|
|
True if the string is 64 lowercase hex characters
|
|
"""
|
|
if not isinstance(candidate, str):
|
|
return False
|
|
text = candidate.strip().lower()
|
|
return len(text) == 64 and all(ch in "0123456789abcdef" for ch in text)
|
|
|
|
|
|
def pipeline_item_local_path(item: Any) -> Optional[str]:
|
|
"""Extract local file path from a pipeline item.
|
|
|
|
Supports both dataclass objects with .path attribute and dicts.
|
|
Returns None for HTTP/HTTPS url.
|
|
|
|
Args:
|
|
item: Pipeline item (PipelineItem dataclass, dict, or other)
|
|
|
|
Returns:
|
|
Local file path string, or None if item is not a local file
|
|
"""
|
|
path_value: Optional[str] = None
|
|
if hasattr(item, "path"):
|
|
path_value = getattr(item, "path", None)
|
|
elif isinstance(item, dict):
|
|
raw = item.get("path") or item.get("url")
|
|
path_value = str(raw) if raw is not None else None
|
|
if not isinstance(path_value, str):
|
|
return None
|
|
text = path_value.strip()
|
|
if not text:
|
|
return None
|
|
if text.lower().startswith(("http://", "https://")):
|
|
return None
|
|
return text
|
|
|
|
|
|
def collect_relationship_labels(
|
|
payload: Any,
|
|
label_stack: List[str] | None = None,
|
|
mapping: Dict[str,
|
|
str] | None = None
|
|
) -> Dict[str,
|
|
str]:
|
|
"""Recursively extract hash-to-label mappings from nested relationship data.
|
|
|
|
Walks through nested dicts/lists looking for sha256-like strings (64 hex chars)
|
|
and builds a mapping from hash to its path in the structure.
|
|
|
|
Example:
|
|
data = {
|
|
"duplicates": [
|
|
"abc123...", # Will be mapped to "duplicates"
|
|
{"type": "related", "items": ["def456..."]} # Will be mapped to "duplicates / type / items"
|
|
]
|
|
}
|
|
result = collect_relationship_labels(data)
|
|
# result = {"abc123...": "duplicates", "def456...": "duplicates / type / items"}
|
|
|
|
Args:
|
|
payload: Nested data structure (dict, list, string, etc.)
|
|
label_stack: Internal use - tracks path during recursion
|
|
mapping: Internal use - accumulates hash->label mappings
|
|
|
|
Returns:
|
|
Dict mapping hash strings to their path labels
|
|
"""
|
|
if label_stack is None:
|
|
label_stack = []
|
|
if mapping is None:
|
|
mapping = {}
|
|
|
|
if isinstance(payload, dict):
|
|
for key, value in payload.items():
|
|
next_stack = label_stack
|
|
if isinstance(key, str) and key:
|
|
formatted = key.replace("_", " ").strip()
|
|
next_stack = label_stack + [formatted]
|
|
collect_relationship_labels(value, next_stack, mapping)
|
|
elif isinstance(payload, (list, tuple, set)):
|
|
for value in payload:
|
|
collect_relationship_labels(value, label_stack, mapping)
|
|
elif isinstance(payload, str) and looks_like_hash(payload):
|
|
hash_value = payload.lower()
|
|
if label_stack:
|
|
label = " / ".join(item for item in label_stack if item)
|
|
else:
|
|
label = "related"
|
|
mapping.setdefault(hash_value, label)
|
|
|
|
return mapping
|
|
|
|
|
|
def parse_tag_arguments(arguments: Sequence[str]) -> List[str]:
|
|
"""Parse tag arguments from command line tokens.
|
|
|
|
- Supports comma-separated tags.
|
|
- Supports pipe namespace shorthand: "artist:A|B|C" -> artist:A, artist:B, artist:C.
|
|
|
|
Args:
|
|
arguments: Sequence of argument strings
|
|
|
|
Returns:
|
|
List of normalized tag strings (empty strings filtered out)
|
|
"""
|
|
|
|
def _expand_pipe_namespace(text: str) -> List[str]:
|
|
parts = text.split("|")
|
|
expanded: List[str] = []
|
|
last_ns: Optional[str] = None
|
|
for part in parts:
|
|
segment = part.strip()
|
|
if not segment:
|
|
continue
|
|
if ":" in segment:
|
|
ns, val = segment.split(":", 1)
|
|
ns = ns.strip()
|
|
val = val.strip()
|
|
last_ns = ns or last_ns
|
|
if last_ns and val:
|
|
expanded.append(f"{last_ns}:{val}")
|
|
elif ns or val:
|
|
expanded.append(f"{ns}:{val}".strip(":"))
|
|
else:
|
|
if last_ns:
|
|
expanded.append(f"{last_ns}:{segment}")
|
|
else:
|
|
expanded.append(segment)
|
|
return expanded
|
|
|
|
tags: List[str] = []
|
|
for argument in arguments:
|
|
for token in argument.split(","):
|
|
text = token.strip()
|
|
if not text:
|
|
continue
|
|
# Expand namespace shorthand with pipes
|
|
pipe_expanded = _expand_pipe_namespace(text)
|
|
for entry in pipe_expanded:
|
|
candidate = entry.strip()
|
|
if not candidate:
|
|
continue
|
|
if ":" in candidate:
|
|
ns, val = candidate.split(":", 1)
|
|
ns = ns.strip()
|
|
val = val.strip()
|
|
candidate = f"{ns}:{val}" if ns or val else ""
|
|
if candidate:
|
|
tags.append(candidate)
|
|
return tags
|
|
|
|
|
|
def fmt_bytes(n: Optional[int]) -> str:
|
|
"""Format bytes as human-readable with 1 decimal place (MB/GB).
|
|
|
|
Args:
|
|
n: Number of bytes, or None
|
|
|
|
Returns:
|
|
Formatted string like "1.5 MB" or "2.0 GB", or "unknown"
|
|
"""
|
|
if n is None or n < 0:
|
|
return "unknown"
|
|
gb = n / (1024.0 * 1024.0 * 1024.0)
|
|
if gb >= 1.0:
|
|
return f"{gb:.1f} GB"
|
|
mb = n / (1024.0 * 1024.0)
|
|
return f"{mb:.1f} MB"
|
|
|
|
|
|
def _normalise_tag_group_entry(value: Any) -> Optional[str]:
|
|
"""Internal: Normalize a single tag group entry."""
|
|
if not isinstance(value, str):
|
|
value = str(value)
|
|
text = value.strip()
|
|
return text or None
|
|
|
|
|
|
def _load_tag_groups() -> Dict[str, List[str]]:
|
|
"""Load tag group definitions from JSON file with caching."""
|
|
global _TAG_GROUPS_CACHE, _TAG_GROUPS_MTIME, TAG_GROUPS_PATH
|
|
|
|
# Auto-discover adjective.json if not set
|
|
if TAG_GROUPS_PATH is None:
|
|
# Try to find adjective.json in the script directory or helper subdirectory
|
|
try:
|
|
script_dir = Path(__file__).parent.parent
|
|
|
|
# Check root directory
|
|
candidate = script_dir / "adjective.json"
|
|
if candidate.exists():
|
|
TAG_GROUPS_PATH = candidate
|
|
else:
|
|
# Check helper directory
|
|
candidate = script_dir / "helper" / "adjective.json"
|
|
if candidate.exists():
|
|
TAG_GROUPS_PATH = candidate
|
|
except Exception:
|
|
pass
|
|
|
|
if TAG_GROUPS_PATH is None:
|
|
return {}
|
|
|
|
path = TAG_GROUPS_PATH
|
|
try:
|
|
stat_result = path.stat()
|
|
except FileNotFoundError:
|
|
_TAG_GROUPS_CACHE = {}
|
|
_TAG_GROUPS_MTIME = None
|
|
return {}
|
|
except OSError as exc:
|
|
log(f"Failed to read tag groups: {exc}", file=sys.stderr)
|
|
_TAG_GROUPS_CACHE = {}
|
|
_TAG_GROUPS_MTIME = None
|
|
return {}
|
|
|
|
mtime = stat_result.st_mtime
|
|
if _TAG_GROUPS_CACHE is not None and _TAG_GROUPS_MTIME == mtime:
|
|
return _TAG_GROUPS_CACHE
|
|
|
|
try:
|
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
except (OSError, json.JSONDecodeError) as exc:
|
|
log(f"Invalid tag group JSON ({path}): {exc}", file=sys.stderr)
|
|
_TAG_GROUPS_CACHE = {}
|
|
_TAG_GROUPS_MTIME = mtime
|
|
return {}
|
|
|
|
groups: Dict[str,
|
|
List[str]] = {}
|
|
if isinstance(payload, dict):
|
|
for key, value in payload.items():
|
|
if not isinstance(key, str):
|
|
continue
|
|
name = key.strip().lower()
|
|
if not name:
|
|
continue
|
|
members: List[str] = []
|
|
if isinstance(value, list):
|
|
for entry in value:
|
|
normalised = _normalise_tag_group_entry(entry)
|
|
if normalised:
|
|
members.append(normalised)
|
|
elif isinstance(value, str):
|
|
normalised = _normalise_tag_group_entry(value)
|
|
if normalised:
|
|
members.extend(
|
|
token.strip() for token in normalised.split(",")
|
|
if token.strip()
|
|
)
|
|
if members:
|
|
groups[name] = members
|
|
|
|
_TAG_GROUPS_CACHE = groups
|
|
_TAG_GROUPS_MTIME = mtime
|
|
return groups
|
|
|
|
|
|
def expand_tag_groups(raw_tags: Iterable[str]) -> List[str]:
|
|
"""Expand tag group references (e.g., {my_group}) into member tags.
|
|
|
|
Tag groups are defined in JSON and can be nested. Groups are referenced
|
|
with curly braces: {group_name}.
|
|
|
|
Args:
|
|
raw_tags: Sequence of tag strings, some may reference groups like "{group_name}"
|
|
|
|
Returns:
|
|
List of expanded tags with group references replaced
|
|
"""
|
|
groups = _load_tag_groups()
|
|
if not groups:
|
|
return [tag for tag in raw_tags if isinstance(tag, str) and tag.strip()]
|
|
|
|
def _expand(tokens: Iterable[str], seen: Set[str]) -> List[str]:
|
|
result: List[str] = []
|
|
for token in tokens:
|
|
if not isinstance(token, str):
|
|
continue
|
|
candidate = token.strip()
|
|
if not candidate:
|
|
continue
|
|
if candidate.startswith("{") and candidate.endswith("}") and len(candidate
|
|
) > 2:
|
|
name = candidate[1:-1].strip().lower()
|
|
if not name:
|
|
continue
|
|
if name in seen:
|
|
log(
|
|
f"Tag group recursion detected for {{{name}}}; skipping",
|
|
file=sys.stderr
|
|
)
|
|
continue
|
|
members = groups.get(name)
|
|
if not members:
|
|
log(f"Unknown tag group {{{name}}}", file=sys.stderr)
|
|
result.append(candidate)
|
|
continue
|
|
result.extend(_expand(members,
|
|
seen | {name}))
|
|
else:
|
|
result.append(candidate)
|
|
return result
|
|
|
|
return _expand(raw_tags, set())
|
|
|
|
|
|
def first_title_tag(source: Optional[Iterable[str]]) -> Optional[str]:
|
|
"""Find the first tag starting with "title:" in a collection.
|
|
|
|
Args:
|
|
source: Iterable of tag strings
|
|
|
|
Returns:
|
|
First title: tag found, or None
|
|
"""
|
|
if not source:
|
|
return None
|
|
for item in source:
|
|
if not isinstance(item, str):
|
|
continue
|
|
candidate = item.strip()
|
|
if candidate and candidate.lower().startswith("title:"):
|
|
return candidate
|
|
return None
|
|
|
|
|
|
def apply_preferred_title(tags: List[str], preferred: Optional[str]) -> List[str]:
|
|
"""Replace any title: tags with a preferred title tag.
|
|
|
|
Args:
|
|
tags: List of tags (may contain multiple "title:" entries)
|
|
preferred: Preferred title tag to use (full "title: ..." format)
|
|
|
|
Returns:
|
|
List with old title tags removed and preferred title added (at most once)
|
|
"""
|
|
if not preferred:
|
|
return tags
|
|
preferred_clean = preferred.strip()
|
|
if not preferred_clean:
|
|
return tags
|
|
preferred_lower = preferred_clean.lower()
|
|
filtered: List[str] = []
|
|
has_preferred = False
|
|
for tag in tags:
|
|
candidate = tag.strip()
|
|
if not candidate:
|
|
continue
|
|
if candidate.lower().startswith("title:"):
|
|
if candidate.lower() == preferred_lower:
|
|
if not has_preferred:
|
|
filtered.append(candidate)
|
|
has_preferred = True
|
|
continue
|
|
filtered.append(candidate)
|
|
if not has_preferred:
|
|
filtered.append(preferred_clean)
|
|
return filtered
|
|
|
|
|
|
# ============================================================================
|
|
# PIPEOBJECT UTILITIES (for chainable cmdlet and multi-action pipelines)
|
|
# ============================================================================
|
|
|
|
|
|
def create_pipe_object_result(
|
|
source: str,
|
|
identifier: str,
|
|
file_path: str,
|
|
cmdlet_name: str,
|
|
title: Optional[str] = None,
|
|
hash_value: Optional[str] = None,
|
|
is_temp: bool = False,
|
|
parent_hash: Optional[str] = None,
|
|
tag: Optional[List[str]] = None,
|
|
**extra: Any,
|
|
) -> Dict[str,
|
|
Any]:
|
|
"""Create a PipeObject-compatible result dict for pipeline chaining.
|
|
|
|
This is a helper to emit results in the standard format that downstream
|
|
cmdlet can process (filter, tag, cleanup, etc.).
|
|
|
|
Args:
|
|
source: Source system (e.g., 'local', 'hydrus', 'download')
|
|
identifier: Unique ID from source
|
|
file_path: Path to the file
|
|
cmdlet_name: Name of the cmdlet that created this (e.g., 'download-data', 'screen-shot')
|
|
title: Human-readable title
|
|
hash_value: SHA-256 hash of file (for integrity)
|
|
is_temp: If True, this is a temporary/intermediate artifact
|
|
parent_hash: Hash of the parent file in the chain (for provenance)
|
|
tag: List of tag values to apply
|
|
**extra: Additional fields
|
|
|
|
Returns:
|
|
Dict with all PipeObject fields for emission
|
|
"""
|
|
result: Dict[str, Any] = {
|
|
"source": source,
|
|
"id": identifier,
|
|
"path": file_path,
|
|
"action": f"cmdlet:{cmdlet_name}", # Format: cmdlet:cmdlet_name
|
|
}
|
|
|
|
if title:
|
|
result["title"] = title
|
|
if hash_value:
|
|
result["hash"] = hash_value
|
|
if is_temp:
|
|
result["is_temp"] = True
|
|
if parent_hash:
|
|
result["parent_hash"] = parent_hash
|
|
if tag:
|
|
result["tag"] = tag
|
|
|
|
# Canonical store field: use source for compatibility
|
|
try:
|
|
if source:
|
|
result["store"] = source
|
|
except Exception:
|
|
pass
|
|
|
|
# Add any extra fields
|
|
result.update(extra)
|
|
|
|
return result
|
|
|
|
|
|
def mark_as_temp(pipe_object: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Mark a PipeObject dict as temporary (intermediate artifact).
|
|
|
|
Args:
|
|
pipe_object: Result dict from cmdlet emission
|
|
|
|
Returns:
|
|
Modified dict with is_temp=True
|
|
"""
|
|
pipe_object["is_temp"] = True
|
|
return pipe_object
|
|
|
|
|
|
def set_parent_hash(pipe_object: Dict[str, Any], parent_hash: str) -> Dict[str, Any]:
|
|
"""Set the parent_hash for provenance tracking.
|
|
|
|
Args:
|
|
pipe_object: Result dict
|
|
parent_hash: Parent file's hash
|
|
|
|
Returns:
|
|
Modified dict with parent_hash set to the hash
|
|
"""
|
|
pipe_object["parent_hash"] = parent_hash
|
|
return pipe_object
|
|
|
|
|
|
def get_pipe_object_path(pipe_object: Any) -> Optional[str]:
|
|
"""Extract file path from PipeObject, dict, or pipeline-friendly object."""
|
|
if pipe_object is None:
|
|
return None
|
|
for attr in ("path", "target"):
|
|
if hasattr(pipe_object, attr):
|
|
value = getattr(pipe_object, attr)
|
|
if value:
|
|
return value
|
|
if isinstance(pipe_object, dict):
|
|
for key in ("path", "target"):
|
|
value = pipe_object.get(key)
|
|
if value:
|
|
return value
|
|
return None
|
|
|
|
|
|
def _extract_flag_value(args: Sequence[str], *flags: str) -> Optional[str]:
|
|
"""Return the value for the first matching flag in args.
|
|
|
|
This is intentionally lightweight (no cmdlet spec required) so callers in CLI/pipeline
|
|
can share the same behavior.
|
|
"""
|
|
if not args:
|
|
return None
|
|
want = {str(f).strip().lower()
|
|
for f in flags if str(f).strip()}
|
|
if not want:
|
|
return None
|
|
try:
|
|
tokens = [str(a) for a in args]
|
|
except Exception:
|
|
tokens = list(args) # type: ignore[list-item]
|
|
for i, tok in enumerate(tokens):
|
|
low = str(tok).strip().lower()
|
|
if low in want:
|
|
if i + 1 >= len(tokens):
|
|
return None
|
|
nxt = str(tokens[i + 1])
|
|
# Allow paths like "-"? Treat missing value as None.
|
|
if not nxt.strip():
|
|
return None
|
|
# Don't consume another flag as value.
|
|
if nxt.startswith("-"):
|
|
return None
|
|
return nxt
|
|
return None
|
|
|
|
|
|
def _unique_destination_path(dest: Path) -> Path:
|
|
"""Generate a non-colliding destination path by appending " (N)"."""
|
|
try:
|
|
if not dest.exists():
|
|
return dest
|
|
except Exception:
|
|
return dest
|
|
|
|
parent = dest.parent
|
|
stem = dest.stem
|
|
suffix = dest.suffix
|
|
for i in range(1, 10_000):
|
|
candidate = parent / f"{stem} ({i}){suffix}"
|
|
try:
|
|
if not candidate.exists():
|
|
return candidate
|
|
except Exception:
|
|
return candidate
|
|
return dest
|
|
|
|
|
|
def _print_live_safe_stderr(message: str) -> None:
|
|
"""Print to stderr without breaking Rich Live progress output."""
|
|
try:
|
|
from SYS.rich_display import stderr_console # type: ignore
|
|
except Exception:
|
|
return
|
|
|
|
cm = None
|
|
try:
|
|
from SYS import pipeline as _pipeline_ctx # type: ignore
|
|
|
|
suspend = getattr(_pipeline_ctx, "suspend_live_progress", None)
|
|
cm = suspend() if callable(suspend) else None
|
|
except Exception:
|
|
cm = None
|
|
|
|
try:
|
|
from contextlib import nullcontext
|
|
except Exception:
|
|
nullcontext = None # type: ignore
|
|
if cm is None:
|
|
cm = nullcontext() if callable(nullcontext) else None
|
|
|
|
try:
|
|
if cm is not None:
|
|
with cm:
|
|
stderr_console.print(str(message))
|
|
else:
|
|
stderr_console.print(str(message))
|
|
except Exception:
|
|
return
|
|
|
|
|
|
def apply_output_path_from_pipeobjects(
|
|
*,
|
|
cmd_name: str,
|
|
args: Sequence[str],
|
|
emits: Sequence[Any],
|
|
) -> List[Any]:
|
|
"""If the user supplied `-path`, move emitted temp/PATH files there.
|
|
|
|
This enables a dynamic pattern:
|
|
- Any cmdlet can include `SharedArgs.PATH`.
|
|
- If it emits a file-backed PipeObject (`path` exists on disk) and the item is
|
|
a temp/PATH artifact, then `-path <dest>` will save it to that location.
|
|
|
|
Rules:
|
|
- Only affects items whose `action` matches the current cmdlet.
|
|
- Only affects items that look like local artifacts (`is_temp` True or `store` == PATH).
|
|
- Updates the emitted object's `path` (and `target` when it points at the same file).
|
|
"""
|
|
dest_raw = _extract_flag_value(args, "-path", "--path")
|
|
if not dest_raw:
|
|
return list(emits or [])
|
|
|
|
# Guard: users sometimes pass a URL into -path by mistake (e.g. `-path https://...`).
|
|
# Treat that as invalid for filesystem moves and avoid breaking Rich Live output.
|
|
try:
|
|
dest_str = str(dest_raw).strip()
|
|
if "://" in dest_str:
|
|
_print_live_safe_stderr(
|
|
f"Ignoring -path value that looks like a URL: {dest_str}"
|
|
)
|
|
return list(emits or [])
|
|
except Exception:
|
|
pass
|
|
|
|
cmd_norm = str(cmd_name or "").replace("_", "-").strip().lower()
|
|
if not cmd_norm:
|
|
return list(emits or [])
|
|
|
|
try:
|
|
dest_hint_dir = str(dest_raw).endswith(("/", "\\"))
|
|
except Exception:
|
|
dest_hint_dir = False
|
|
|
|
try:
|
|
dest_path = Path(str(dest_raw)).expanduser()
|
|
except Exception:
|
|
return list(emits or [])
|
|
|
|
items = list(emits or [])
|
|
# Identify which emitted items are actually file artifacts produced by this cmdlet.
|
|
artifact_indices: List[int] = []
|
|
artifact_paths: List[Path] = []
|
|
for idx, item in enumerate(items):
|
|
action = str(get_field(item, "action", "") or "").strip().lower()
|
|
if not action.startswith("cmdlet:"):
|
|
continue
|
|
action_name = action.split(":", 1)[-1].strip().lower()
|
|
if action_name != cmd_norm:
|
|
continue
|
|
|
|
store = str(get_field(item, "store", "") or "").strip().lower()
|
|
is_temp = bool(get_field(item, "is_temp", False))
|
|
if not (is_temp or store == "path"):
|
|
continue
|
|
|
|
src_str = get_pipe_object_path(item)
|
|
if not src_str:
|
|
continue
|
|
try:
|
|
src = Path(str(src_str)).expanduser()
|
|
except Exception:
|
|
continue
|
|
try:
|
|
if not src.exists() or not src.is_file():
|
|
continue
|
|
except Exception:
|
|
continue
|
|
|
|
artifact_indices.append(idx)
|
|
artifact_paths.append(src)
|
|
|
|
if not artifact_indices:
|
|
return items
|
|
|
|
# Decide whether the destination is a directory or a single file.
|
|
if len(artifact_indices) > 1:
|
|
# Multiple artifacts: always treat destination as a directory.
|
|
if dest_path.suffix:
|
|
dest_dir = dest_path.parent
|
|
else:
|
|
dest_dir = dest_path
|
|
try:
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception as exc:
|
|
_print_live_safe_stderr(
|
|
f"Failed to create destination directory: {dest_dir} ({exc})"
|
|
)
|
|
return items
|
|
|
|
for idx, src in zip(artifact_indices, artifact_paths):
|
|
final = dest_dir / src.name
|
|
final = _unique_destination_path(final)
|
|
try:
|
|
if src.resolve() == final.resolve():
|
|
_apply_saved_path_update(
|
|
items[idx],
|
|
old_path=str(src),
|
|
new_path=str(final)
|
|
)
|
|
_print_saved_output_panel(items[idx], final)
|
|
continue
|
|
except Exception:
|
|
pass
|
|
try:
|
|
shutil.move(str(src), str(final))
|
|
except Exception as exc:
|
|
_print_live_safe_stderr(f"Failed to save output to {final}: {exc}")
|
|
continue
|
|
_apply_saved_path_update(items[idx], old_path=str(src), new_path=str(final))
|
|
_print_saved_output_panel(items[idx], final)
|
|
|
|
return items
|
|
|
|
# Single artifact: destination can be a directory or a concrete file path.
|
|
src = artifact_paths[0]
|
|
idx = artifact_indices[0]
|
|
final: Path
|
|
try:
|
|
if dest_hint_dir or (dest_path.exists() and dest_path.is_dir()):
|
|
final = dest_path / src.name
|
|
else:
|
|
final = dest_path
|
|
except Exception:
|
|
final = dest_path
|
|
|
|
try:
|
|
final.parent.mkdir(parents=True, exist_ok=True)
|
|
except Exception as exc:
|
|
_print_live_safe_stderr(
|
|
f"Failed to create destination directory: {final.parent} ({exc})"
|
|
)
|
|
return items
|
|
|
|
final = _unique_destination_path(final)
|
|
try:
|
|
if src.resolve() != final.resolve():
|
|
shutil.move(str(src), str(final))
|
|
except Exception as exc:
|
|
_print_live_safe_stderr(f"Failed to save output to {final}: {exc}")
|
|
return items
|
|
|
|
_apply_saved_path_update(items[idx], old_path=str(src), new_path=str(final))
|
|
_print_saved_output_panel(items[idx], final)
|
|
return items
|
|
|
|
|
|
def _print_saved_output_panel(item: Any, final_path: Path) -> None:
|
|
"""When -path is used, print a Rich panel summarizing the saved output.
|
|
|
|
Shows: Title, Location, Hash.
|
|
Best-effort: reads existing fields first to avoid recomputing hashes.
|
|
"""
|
|
try:
|
|
from rich.panel import Panel # type: ignore
|
|
from rich.table import Table # type: ignore
|
|
from SYS.rich_display import stderr_console # type: ignore
|
|
except Exception:
|
|
return
|
|
|
|
# If Rich Live progress is active, pause it while printing so the panel
|
|
# doesn't get overwritten/truncated by Live's cursor control.
|
|
try:
|
|
from SYS import pipeline as _pipeline_ctx # type: ignore
|
|
|
|
suspend = getattr(_pipeline_ctx, "suspend_live_progress", None)
|
|
cm = suspend() if callable(suspend) else None
|
|
except Exception:
|
|
cm = None
|
|
|
|
try:
|
|
from contextlib import nullcontext
|
|
except Exception:
|
|
nullcontext = None # type: ignore
|
|
|
|
if cm is None:
|
|
cm = nullcontext() if callable(nullcontext) else None
|
|
|
|
try:
|
|
location = str(final_path)
|
|
except Exception:
|
|
location = ""
|
|
|
|
title = ""
|
|
try:
|
|
title = str(get_field(item, "title") or get_field(item, "name") or "").strip()
|
|
except Exception:
|
|
title = ""
|
|
if not title:
|
|
try:
|
|
title = str(final_path.stem or final_path.name)
|
|
except Exception:
|
|
title = ""
|
|
|
|
file_hash = ""
|
|
try:
|
|
file_hash = str(get_field(item,
|
|
"hash") or get_field(item,
|
|
"sha256") or "").strip()
|
|
except Exception:
|
|
file_hash = ""
|
|
if not file_hash:
|
|
try:
|
|
from SYS.utils import sha256_file # type: ignore
|
|
|
|
file_hash = str(sha256_file(final_path) or "").strip()
|
|
except Exception:
|
|
file_hash = ""
|
|
|
|
grid = Table.grid(padding=(0, 1))
|
|
grid.add_column(justify="right", style="bold")
|
|
grid.add_column()
|
|
grid.add_row("Title", title or "(unknown)")
|
|
grid.add_row("Location", location or "(unknown)")
|
|
grid.add_row("Hash", file_hash or "(unknown)")
|
|
|
|
try:
|
|
if cm is not None:
|
|
with cm:
|
|
stderr_console.print(Panel(grid, title="Saved", expand=False))
|
|
else:
|
|
stderr_console.print(Panel(grid, title="Saved", expand=False))
|
|
except Exception:
|
|
return
|
|
|
|
|
|
def _apply_saved_path_update(item: Any, *, old_path: str, new_path: str) -> None:
|
|
"""Update a PipeObject-like item after its backing file has moved."""
|
|
old_str = str(old_path)
|
|
new_str = str(new_path)
|
|
if isinstance(item, dict):
|
|
try:
|
|
if str(item.get("path") or "") == old_str:
|
|
item["path"] = new_str
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if str(item.get("target") or "") == old_str:
|
|
item["target"] = new_str
|
|
except Exception:
|
|
pass
|
|
try:
|
|
extra = item.get("extra")
|
|
if isinstance(extra, dict):
|
|
if str(extra.get("target") or "") == old_str:
|
|
extra["target"] = new_str
|
|
if str(extra.get("path") or "") == old_str:
|
|
extra["path"] = new_str
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
# models.PipeObject or PipeObject-ish
|
|
try:
|
|
if getattr(item, "path", None) == old_str:
|
|
setattr(item, "path", new_str)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
extra = getattr(item, "extra", None)
|
|
if isinstance(extra, dict):
|
|
if str(extra.get("target") or "") == old_str:
|
|
extra["target"] = new_str
|
|
if str(extra.get("path") or "") == old_str:
|
|
extra["path"] = new_str
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def get_pipe_object_hash(pipe_object: Any) -> Optional[str]:
|
|
"""Extract file hash from PipeObject, dict, or pipeline-friendly object."""
|
|
if pipe_object is None:
|
|
return None
|
|
for attr in ("hash",
|
|
):
|
|
if hasattr(pipe_object, attr):
|
|
value = getattr(pipe_object, attr)
|
|
if value:
|
|
return value
|
|
if isinstance(pipe_object, dict):
|
|
for key in ("hash",
|
|
):
|
|
value = pipe_object.get(key)
|
|
if value:
|
|
return value
|
|
return None
|
|
|
|
|
|
def normalize_result_input(result: Any) -> List[Dict[str, Any]]:
|
|
"""Normalize input result to a list of dicts.
|
|
|
|
Handles:
|
|
- None -> []
|
|
- Dict -> [dict]
|
|
- List of dicts -> list as-is
|
|
- PipeObject -> [dict]
|
|
- List of PipeObjects -> list of dicts
|
|
|
|
Args:
|
|
result: Result from piped input
|
|
|
|
Returns:
|
|
List of result dicts (may be empty)
|
|
"""
|
|
if result is None:
|
|
return []
|
|
|
|
# Single dict
|
|
if isinstance(result, dict):
|
|
return [result]
|
|
|
|
# List - convert each item to dict if needed
|
|
if isinstance(result, list):
|
|
output = []
|
|
for item in result:
|
|
if isinstance(item, dict):
|
|
output.append(item)
|
|
elif hasattr(item, "to_dict"):
|
|
output.append(item.to_dict())
|
|
else:
|
|
# Try as-is
|
|
output.append(item)
|
|
return output
|
|
|
|
# PipeObject or other object with to_dict
|
|
if hasattr(result, "to_dict"):
|
|
return [result.to_dict()]
|
|
|
|
# Fallback: wrap it
|
|
if isinstance(result, dict):
|
|
return [result]
|
|
|
|
return []
|
|
|
|
|
|
def filter_results_by_temp(results: List[Any], include_temp: bool = False) -> List[Any]:
|
|
"""Filter results by temporary status.
|
|
|
|
Args:
|
|
results: List of result dicts or PipeObjects
|
|
include_temp: If True, keep temp files; if False, exclude them
|
|
|
|
Returns:
|
|
Filtered list
|
|
"""
|
|
if include_temp:
|
|
return results
|
|
|
|
filtered = []
|
|
for result in results:
|
|
is_temp = False
|
|
|
|
# Check PipeObject
|
|
if hasattr(result, "is_temp"):
|
|
is_temp = result.is_temp
|
|
# Check dict
|
|
elif isinstance(result, dict):
|
|
is_temp = result.get("is_temp", False)
|
|
|
|
if not is_temp:
|
|
filtered.append(result)
|
|
|
|
return filtered
|
|
|
|
|
|
def merge_sequences(*sources: Optional[Iterable[Any]],
|
|
case_sensitive: bool = True) -> list[str]:
|
|
"""Merge iterable sources while preserving order and removing duplicates."""
|
|
seen: set[str] = set()
|
|
merged: list[str] = []
|
|
for source in sources:
|
|
if not source:
|
|
continue
|
|
if isinstance(source, str) or not isinstance(source, IterableABC):
|
|
iterable = [source]
|
|
else:
|
|
iterable = source
|
|
for value in iterable:
|
|
if value is None:
|
|
continue
|
|
text = str(value).strip()
|
|
if not text:
|
|
continue
|
|
key = text if case_sensitive else text.lower()
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
merged.append(text)
|
|
return merged
|
|
|
|
|
|
def collapse_namespace_tags(
|
|
tags: Optional[Iterable[Any]],
|
|
namespace: str,
|
|
prefer: str = "last"
|
|
) -> list[str]:
|
|
"""Reduce tags so only one entry for a given namespace remains.
|
|
|
|
Keeps either the first or last occurrence (default last) while preserving overall order
|
|
for non-matching tags. Useful for ensuring a single title: tag.
|
|
"""
|
|
if not tags:
|
|
return []
|
|
ns = str(namespace or "").strip().lower()
|
|
if not ns:
|
|
return list(tags) if isinstance(tags, list) else list(tags)
|
|
|
|
prefer_last = str(prefer or "last").lower() != "first"
|
|
ns_prefix = ns + ":"
|
|
|
|
items = list(tags)
|
|
if prefer_last:
|
|
kept: list[str] = []
|
|
seen_ns = False
|
|
for tag in reversed(items):
|
|
text = str(tag)
|
|
if text.lower().startswith(ns_prefix):
|
|
if seen_ns:
|
|
continue
|
|
seen_ns = True
|
|
kept.append(text)
|
|
kept.reverse()
|
|
return kept
|
|
else:
|
|
kept_ns = False
|
|
result: list[str] = []
|
|
for tag in items:
|
|
text = str(tag)
|
|
if text.lower().startswith(ns_prefix):
|
|
if kept_ns:
|
|
continue
|
|
kept_ns = True
|
|
result.append(text)
|
|
return result
|
|
|
|
|
|
def collapse_namespace_tag(
|
|
tags: Optional[Iterable[Any]],
|
|
namespace: str,
|
|
prefer: str = "last"
|
|
) -> list[str]:
|
|
"""Singular alias for collapse_namespace_tags.
|
|
|
|
Some cmdlet prefer the singular name; keep behavior centralized.
|
|
"""
|
|
return collapse_namespace_tags(tags, namespace, prefer=prefer)
|
|
|
|
|
|
def extract_tag_from_result(result: Any) -> list[str]:
|
|
tag: list[str] = []
|
|
if isinstance(result, models.PipeObject):
|
|
tag.extend(result.tag or [])
|
|
if isinstance(result.extra, dict):
|
|
extra_tag = result.extra.get("tag")
|
|
if isinstance(extra_tag, list):
|
|
tag.extend(extra_tag)
|
|
elif isinstance(extra_tag, str):
|
|
tag.append(extra_tag)
|
|
elif hasattr(result, "tag"):
|
|
# Handle objects with tag attribute (e.g. SearchResult)
|
|
val = getattr(result, "tag")
|
|
if isinstance(val, (list, set, tuple)):
|
|
tag.extend(val)
|
|
elif isinstance(val, str):
|
|
tag.append(val)
|
|
|
|
if isinstance(result, dict):
|
|
raw_tag = result.get("tag")
|
|
if isinstance(raw_tag, list):
|
|
tag.extend(raw_tag)
|
|
elif isinstance(raw_tag, str):
|
|
tag.append(raw_tag)
|
|
extra = result.get("extra")
|
|
if isinstance(extra, dict):
|
|
extra_tag = extra.get("tag")
|
|
if isinstance(extra_tag, list):
|
|
tag.extend(extra_tag)
|
|
elif isinstance(extra_tag, str):
|
|
tag.append(extra_tag)
|
|
return merge_sequences(tag, case_sensitive=True)
|
|
|
|
|
|
def extract_title_from_result(result: Any) -> Optional[str]:
|
|
"""Extract the title from a result dict or PipeObject."""
|
|
if isinstance(result, models.PipeObject):
|
|
return result.title
|
|
elif hasattr(result, "title"):
|
|
return getattr(result, "title")
|
|
elif isinstance(result, dict):
|
|
return result.get("title")
|
|
return None
|
|
|
|
|
|
def extract_url_from_result(result: Any) -> list[str]:
|
|
url: list[str] = []
|
|
|
|
def _extend(candidate: Any) -> None:
|
|
if not candidate:
|
|
return
|
|
if isinstance(candidate, list):
|
|
url.extend(candidate)
|
|
elif isinstance(candidate, str):
|
|
url.append(candidate)
|
|
|
|
if isinstance(result, models.PipeObject):
|
|
_extend(result.extra.get("url"))
|
|
_extend(result.extra.get("url")) # Also check singular url
|
|
if isinstance(result.metadata, dict):
|
|
_extend(result.metadata.get("url"))
|
|
_extend(result.metadata.get("url"))
|
|
_extend(result.metadata.get("url"))
|
|
if isinstance(getattr(result, "full_metadata", None), dict):
|
|
fm = getattr(result, "full_metadata", None)
|
|
if isinstance(fm, dict):
|
|
_extend(fm.get("url"))
|
|
_extend(fm.get("url"))
|
|
_extend(fm.get("url"))
|
|
elif hasattr(result, "url") or hasattr(result, "url"):
|
|
# Handle objects with url/url attribute
|
|
_extend(getattr(result, "url", None))
|
|
_extend(getattr(result, "url", None))
|
|
|
|
if isinstance(result, dict):
|
|
_extend(result.get("url"))
|
|
_extend(result.get("url"))
|
|
_extend(result.get("url"))
|
|
fm = result.get("full_metadata")
|
|
if isinstance(fm, dict):
|
|
_extend(fm.get("url"))
|
|
_extend(fm.get("url"))
|
|
_extend(fm.get("url"))
|
|
extra = result.get("extra")
|
|
if isinstance(extra, dict):
|
|
_extend(extra.get("url"))
|
|
_extend(extra.get("url"))
|
|
_extend(extra.get("url"))
|
|
|
|
return merge_sequences(url, case_sensitive=True)
|
|
|
|
|
|
def extract_relationships(result: Any) -> Optional[Dict[str, Any]]:
|
|
if isinstance(result, models.PipeObject):
|
|
relationships = result.get_relationships()
|
|
return relationships or None
|
|
if isinstance(result, dict):
|
|
relationships = result.get("relationships")
|
|
if isinstance(relationships, dict) and relationships:
|
|
return relationships
|
|
return None
|
|
|
|
|
|
def extract_duration(result: Any) -> Optional[float]:
|
|
duration = None
|
|
if isinstance(result, models.PipeObject):
|
|
duration = result.duration
|
|
elif isinstance(result, dict):
|
|
duration = result.get("duration")
|
|
if duration is None:
|
|
metadata = result.get("metadata")
|
|
if isinstance(metadata, dict):
|
|
duration = metadata.get("duration")
|
|
if duration is None:
|
|
return None
|
|
try:
|
|
return float(duration)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def coerce_to_pipe_object(
|
|
value: Any,
|
|
default_path: Optional[str] = None
|
|
) -> models.PipeObject:
|
|
"""Normalize any incoming result to a PipeObject for single-source-of-truth state.
|
|
|
|
Uses hash+store canonical pattern.
|
|
"""
|
|
# Debug: Print ResultItem details if coming from search_file.py
|
|
try:
|
|
from SYS.logger import is_debug_enabled, debug
|
|
|
|
if (is_debug_enabled() and hasattr(value,
|
|
"__class__")
|
|
and value.__class__.__name__ == "ResultItem"):
|
|
debug("[ResultItem -> PipeObject conversion]")
|
|
debug(f" title={getattr(value, 'title', None)}")
|
|
debug(f" target={getattr(value, 'target', None)}")
|
|
debug(f" hash={getattr(value, 'hash', None)}")
|
|
debug(f" media_kind={getattr(value, 'media_kind', None)}")
|
|
debug(f" tag={getattr(value, 'tag', None)}")
|
|
debug(f" tag_summary={getattr(value, 'tag_summary', None)}")
|
|
debug(f" size_bytes={getattr(value, 'size_bytes', None)}")
|
|
debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}")
|
|
debug(f" relationships={getattr(value, 'relationships', None)}")
|
|
debug(f" url={getattr(value, 'url', None)}")
|
|
debug(
|
|
f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}"
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
if isinstance(value, models.PipeObject):
|
|
return value
|
|
|
|
known_keys = {
|
|
"hash",
|
|
"store",
|
|
"tag",
|
|
"title",
|
|
"url",
|
|
"source_url",
|
|
"duration",
|
|
"metadata",
|
|
"warnings",
|
|
"path",
|
|
"relationships",
|
|
"is_temp",
|
|
"action",
|
|
"parent_hash",
|
|
}
|
|
|
|
# Convert common object-like results into a dict so we can preserve fields like
|
|
# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
|
|
#
|
|
# Priority:
|
|
# 1) explicit to_dict()
|
|
# 2) best-effort attribute extraction for known PipeObject-ish fields
|
|
if hasattr(value, "to_dict"):
|
|
value = value.to_dict()
|
|
elif not isinstance(value, dict):
|
|
try:
|
|
obj_map: Dict[str,
|
|
Any] = {}
|
|
for k in (
|
|
"hash",
|
|
"store",
|
|
"provider",
|
|
"prov",
|
|
"tag",
|
|
"title",
|
|
"url",
|
|
"source_url",
|
|
"duration",
|
|
"duration_seconds",
|
|
"metadata",
|
|
"full_metadata",
|
|
"warnings",
|
|
"path",
|
|
"target",
|
|
"relationships",
|
|
"is_temp",
|
|
"action",
|
|
"parent_hash",
|
|
"extra",
|
|
"media_kind",
|
|
):
|
|
if hasattr(value, k):
|
|
obj_map[k] = getattr(value, k)
|
|
if obj_map:
|
|
value = obj_map
|
|
except Exception:
|
|
pass
|
|
|
|
if isinstance(value, dict):
|
|
# Extract hash and store (canonical identifiers)
|
|
hash_val = value.get("hash")
|
|
store_val = value.get("store") or "PATH"
|
|
if not store_val or store_val == "PATH":
|
|
try:
|
|
extra_store = value.get("extra",
|
|
{}).get("store")
|
|
except Exception:
|
|
extra_store = None
|
|
if extra_store:
|
|
store_val = extra_store
|
|
|
|
# If no hash, try to compute from path or use placeholder
|
|
if not hash_val:
|
|
path_val = value.get("path")
|
|
if path_val:
|
|
try:
|
|
from SYS.utils import sha256_file
|
|
from pathlib import Path
|
|
|
|
hash_val = sha256_file(Path(path_val))
|
|
except Exception:
|
|
hash_val = "unknown"
|
|
else:
|
|
hash_val = "unknown"
|
|
|
|
# Extract title from filename if not provided
|
|
title_val = value.get("title")
|
|
if not title_val:
|
|
path_val = value.get("path")
|
|
if path_val:
|
|
try:
|
|
from pathlib import Path
|
|
|
|
title_val = Path(path_val).stem
|
|
except Exception:
|
|
pass
|
|
|
|
extra = {
|
|
k: v
|
|
for k, v in value.items() if k not in known_keys
|
|
}
|
|
|
|
# Extract URL: prefer direct url field, then url list
|
|
from SYS.metadata import normalize_urls
|
|
|
|
url_list = normalize_urls(value.get("url"))
|
|
url_val = url_list[0] if url_list else None
|
|
if len(url_list) > 1:
|
|
extra["url"] = url_list
|
|
|
|
# Extract relationships
|
|
rels = value.get("relationships") or {}
|
|
|
|
# Canonical tag: accept list or single string
|
|
tag_val: list[str] = []
|
|
if "tag" in value:
|
|
raw_tag = value["tag"]
|
|
if isinstance(raw_tag, list):
|
|
tag_val = [str(t) for t in raw_tag if t is not None]
|
|
elif isinstance(raw_tag, str):
|
|
tag_val = [raw_tag]
|
|
|
|
# Consolidate path: prefer explicit path key, but NOT target if it's a URL
|
|
path_val = value.get("path")
|
|
# Only use target as path if it's not a URL (url should stay in url field)
|
|
if not path_val and "target" in value:
|
|
target = value["target"]
|
|
if target and not (isinstance(target,
|
|
str) and (target.startswith("http://")
|
|
or target.startswith("https://"))):
|
|
path_val = target
|
|
|
|
# If the path value is actually a URL, move it to url_val and clear path_val
|
|
try:
|
|
if isinstance(path_val,
|
|
str) and (path_val.startswith("http://")
|
|
or path_val.startswith("https://")):
|
|
# Prefer existing url_val if present, otherwise move path_val into url_val
|
|
if not url_val:
|
|
url_val = path_val
|
|
path_val = None
|
|
except Exception:
|
|
pass
|
|
|
|
# Extract media_kind if available
|
|
if "media_kind" in value:
|
|
extra["media_kind"] = value["media_kind"]
|
|
|
|
pipe_obj = models.PipeObject(
|
|
hash=hash_val,
|
|
store=store_val,
|
|
provider=str(
|
|
value.get("provider") or value.get("prov") or extra.get("provider")
|
|
or ""
|
|
).strip() or None,
|
|
tag=tag_val,
|
|
title=title_val,
|
|
url=url_val,
|
|
source_url=value.get("source_url"),
|
|
duration=value.get("duration") or value.get("duration_seconds"),
|
|
metadata=value.get("metadata") or value.get("full_metadata") or {},
|
|
warnings=list(value.get("warnings") or []),
|
|
path=path_val,
|
|
relationships=rels,
|
|
is_temp=bool(value.get("is_temp",
|
|
False)),
|
|
action=value.get("action"),
|
|
parent_hash=value.get("parent_hash"),
|
|
extra=extra,
|
|
)
|
|
|
|
return pipe_obj
|
|
|
|
# Fallback: build from path argument or bare value
|
|
hash_val = "unknown"
|
|
path_val = default_path or getattr(value, "path", None)
|
|
url_val: Optional[str] = None
|
|
title_val = None
|
|
|
|
# If the raw value is a string, treat it as either a URL or a file path.
|
|
# This is important for @-selection results that are plain URL strings.
|
|
if isinstance(value, str):
|
|
s = value.strip()
|
|
if s.lower().startswith(("http://", "https://")):
|
|
url_val = s
|
|
path_val = None
|
|
else:
|
|
path_val = s
|
|
|
|
if path_val and path_val != "unknown":
|
|
try:
|
|
from SYS.utils import sha256_file
|
|
from pathlib import Path
|
|
|
|
path_obj = Path(path_val)
|
|
hash_val = sha256_file(path_obj)
|
|
# Extract title from filename (without extension)
|
|
title_val = path_obj.stem
|
|
except Exception:
|
|
pass
|
|
|
|
# When coming from a raw URL string, mark it explicitly as URL.
|
|
# Otherwise treat it as a local path.
|
|
store_val = "URL" if url_val else "PATH"
|
|
|
|
pipe_obj = models.PipeObject(
|
|
hash=hash_val,
|
|
store=store_val,
|
|
provider=None,
|
|
path=str(path_val) if path_val and path_val != "unknown" else None,
|
|
title=title_val,
|
|
url=url_val,
|
|
source_url=url_val,
|
|
tag=[],
|
|
extra={},
|
|
)
|
|
|
|
return pipe_obj
|
|
|
|
|
|
def register_url_with_local_library(
|
|
pipe_obj: models.PipeObject,
|
|
config: Dict[str,
|
|
Any]
|
|
) -> bool:
|
|
"""Register url with a file in the local library database.
|
|
|
|
This is called automatically by download cmdlet to ensure url are persisted
|
|
without requiring a separate add-url step in the pipeline.
|
|
|
|
Args:
|
|
pipe_obj: PipeObject with path and url
|
|
config: Config dict containing local library path
|
|
|
|
Returns:
|
|
True if url were registered, False otherwise
|
|
"""
|
|
|
|
try:
|
|
from SYS.config import get_local_storage_path
|
|
from API.folder import API_folder_store
|
|
|
|
file_path = get_field(pipe_obj, "path")
|
|
url_field = get_field(pipe_obj, "url", [])
|
|
urls: List[str] = []
|
|
if isinstance(url_field, str):
|
|
urls = [u.strip() for u in url_field.split(",") if u.strip()]
|
|
elif isinstance(url_field, (list, tuple)):
|
|
urls = [u for u in url_field if isinstance(u, str) and u.strip()]
|
|
|
|
if not file_path or not urls:
|
|
return False
|
|
|
|
path_obj = Path(file_path)
|
|
if not path_obj.exists():
|
|
return False
|
|
|
|
storage_path = get_local_storage_path(config)
|
|
if not storage_path:
|
|
return False
|
|
|
|
with API_folder_store(storage_path) as db:
|
|
file_hash = db.get_file_hash(path_obj)
|
|
if not file_hash:
|
|
return False
|
|
metadata = db.get_metadata(file_hash) or {}
|
|
existing_url = metadata.get("url") or []
|
|
|
|
# Add any new url
|
|
changed = False
|
|
for u in urls:
|
|
if u not in existing_url:
|
|
existing_url.append(u)
|
|
changed = True
|
|
|
|
if changed:
|
|
metadata["url"] = existing_url
|
|
db.save_metadata(path_obj, metadata)
|
|
return True
|
|
|
|
return True # url already existed
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
|
|
"""Persist the Tidal manifest from search results and return a local path."""
|
|
|
|
metadata = None
|
|
if isinstance(item, dict):
|
|
metadata = item.get("full_metadata") or item.get("metadata")
|
|
else:
|
|
metadata = getattr(item, "full_metadata", None) or getattr(item, "metadata", None)
|
|
|
|
if not isinstance(metadata, dict):
|
|
return None
|
|
|
|
existing_path = metadata.get("_tidal_manifest_path")
|
|
if existing_path:
|
|
try:
|
|
resolved = Path(str(existing_path))
|
|
if resolved.is_file():
|
|
return str(resolved)
|
|
except Exception:
|
|
pass
|
|
|
|
existing_url = metadata.get("_tidal_manifest_url")
|
|
if existing_url and isinstance(existing_url, str):
|
|
candidate = existing_url.strip()
|
|
if candidate:
|
|
return candidate
|
|
|
|
raw_manifest = metadata.get("manifest")
|
|
if not raw_manifest:
|
|
# When piping directly from the HIFI search table, we may only have a track id.
|
|
# Fetch track details from the proxy so downstream stages can decode the manifest.
|
|
try:
|
|
already = bool(metadata.get("_tidal_track_details_fetched"))
|
|
except Exception:
|
|
already = False
|
|
|
|
track_id = metadata.get("trackId") or metadata.get("id")
|
|
if track_id is None:
|
|
try:
|
|
if isinstance(item, dict):
|
|
candidate_path = item.get("path") or item.get("url")
|
|
else:
|
|
candidate_path = getattr(item, "path", None) or getattr(item, "url", None)
|
|
except Exception:
|
|
candidate_path = None
|
|
|
|
if candidate_path:
|
|
m = re.search(
|
|
r"hifi:(?://)?track[\\/](\d+)",
|
|
str(candidate_path),
|
|
flags=re.IGNORECASE,
|
|
)
|
|
if m:
|
|
track_id = m.group(1)
|
|
|
|
if (not already) and track_id is not None:
|
|
try:
|
|
track_int = int(track_id)
|
|
except Exception:
|
|
track_int = None
|
|
|
|
if track_int and track_int > 0:
|
|
try:
|
|
import httpx
|
|
|
|
resp = httpx.get(
|
|
"https://tidal-api.binimum.org/track/",
|
|
params={"id": str(track_int)},
|
|
timeout=10.0,
|
|
)
|
|
resp.raise_for_status()
|
|
payload = resp.json()
|
|
data = payload.get("data") if isinstance(payload, dict) else None
|
|
if isinstance(data, dict) and data:
|
|
try:
|
|
metadata.update(data)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
metadata["_tidal_track_details_fetched"] = True
|
|
except Exception:
|
|
pass
|
|
if not metadata.get("url"):
|
|
try:
|
|
resp_info = httpx.get(
|
|
"https://tidal-api.binimum.org/info/",
|
|
params={"id": str(track_int)},
|
|
timeout=10.0,
|
|
)
|
|
resp_info.raise_for_status()
|
|
info_payload = resp_info.json()
|
|
info_data = info_payload.get("data") if isinstance(info_payload, dict) else None
|
|
if isinstance(info_data, dict) and info_data:
|
|
try:
|
|
for k, v in info_data.items():
|
|
if k not in metadata:
|
|
metadata[k] = v
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if info_data.get("url"):
|
|
metadata["url"] = info_data.get("url")
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
raw_manifest = metadata.get("manifest")
|
|
if not raw_manifest:
|
|
return None
|
|
|
|
manifest_str = "".join(str(raw_manifest or "").split())
|
|
if not manifest_str:
|
|
return None
|
|
|
|
manifest_bytes: bytes
|
|
try:
|
|
manifest_bytes = base64.b64decode(manifest_str, validate=True)
|
|
except Exception:
|
|
try:
|
|
manifest_bytes = base64.b64decode(manifest_str, validate=False)
|
|
except Exception:
|
|
try:
|
|
manifest_bytes = manifest_str.encode("utf-8")
|
|
except Exception:
|
|
return None
|
|
|
|
if not manifest_bytes:
|
|
return None
|
|
|
|
head = (manifest_bytes[:1024] or b"").lstrip()
|
|
if head.startswith((b"{", b"[")):
|
|
try:
|
|
text = manifest_bytes.decode("utf-8", errors="ignore")
|
|
payload = json.loads(text)
|
|
urls = payload.get("urls") or []
|
|
selected_url = None
|
|
for candidate in urls:
|
|
if isinstance(candidate, str):
|
|
candidate = candidate.strip()
|
|
if candidate:
|
|
selected_url = candidate
|
|
break
|
|
if selected_url:
|
|
try:
|
|
metadata["_tidal_manifest_url"] = selected_url
|
|
except Exception:
|
|
pass
|
|
return selected_url
|
|
try:
|
|
metadata["_tidal_manifest_error"] = "JSON manifest contained no urls"
|
|
except Exception:
|
|
pass
|
|
log(
|
|
f"[hifi] JSON manifest for track {metadata.get('trackId') or metadata.get('id')} had no playable urls",
|
|
file=sys.stderr,
|
|
)
|
|
except Exception as exc:
|
|
try:
|
|
metadata["_tidal_manifest_error"] = (
|
|
f"Failed to parse JSON manifest: {exc}"
|
|
)
|
|
except Exception:
|
|
pass
|
|
log(
|
|
f"[hifi] Failed to parse JSON manifest for track {metadata.get('trackId') or metadata.get('id')}: {exc}",
|
|
file=sys.stderr,
|
|
)
|
|
return None
|
|
|
|
looks_like_mpd = (
|
|
head.startswith(b"<?xml")
|
|
or head.startswith(b"<MPD")
|
|
or b"<MPD" in head
|
|
)
|
|
|
|
if not looks_like_mpd:
|
|
manifest_mime = str(metadata.get("manifestMimeType") or "").strip().lower()
|
|
try:
|
|
metadata["_tidal_manifest_error"] = (
|
|
f"Decoded manifest is not an MPD XML (mime: {manifest_mime or 'unknown'})"
|
|
)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
log(
|
|
f"[hifi] Decoded manifest is not an MPD XML for track {metadata.get('trackId') or metadata.get('id')} (mime {manifest_mime or 'unknown'})",
|
|
file=sys.stderr,
|
|
)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
manifest_hash = str(metadata.get("manifestHash") or "").strip()
|
|
track_id = metadata.get("trackId") or metadata.get("id")
|
|
identifier = manifest_hash or hashlib.sha256(manifest_bytes).hexdigest()
|
|
identifier_safe = re.sub(r"[^A-Za-z0-9_-]+", "_", identifier)[:64]
|
|
if not identifier_safe:
|
|
identifier_safe = hashlib.sha256(manifest_bytes).hexdigest()[:12]
|
|
|
|
track_safe = "tidal"
|
|
if track_id is not None:
|
|
track_safe = re.sub(r"[^A-Za-z0-9_-]+", "_", str(track_id))[:32]
|
|
if not track_safe:
|
|
track_safe = "tidal"
|
|
|
|
# Persist as .mpd for DASH manifests.
|
|
ext = "mpd"
|
|
|
|
manifest_dir = Path(tempfile.gettempdir()) / "medeia" / "hifi"
|
|
try:
|
|
manifest_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception:
|
|
pass
|
|
|
|
filename = f"hifi-{track_safe}-{identifier_safe[:24]}.{ext}"
|
|
target_path = manifest_dir / filename
|
|
try:
|
|
with open(target_path, "wb") as fh:
|
|
fh.write(manifest_bytes)
|
|
metadata["_tidal_manifest_path"] = str(target_path)
|
|
if isinstance(item, dict):
|
|
if item.get("full_metadata") is metadata:
|
|
item["full_metadata"] = metadata
|
|
elif item.get("metadata") is metadata:
|
|
item["metadata"] = metadata
|
|
else:
|
|
extra = getattr(item, "extra", None)
|
|
if isinstance(extra, dict):
|
|
extra["_tidal_manifest_path"] = str(target_path)
|
|
except Exception:
|
|
return None
|
|
|
|
return str(target_path)
|