update and cleanup repo

This commit is contained in:
2026-05-26 15:32:01 -07:00
parent 5041d9fbb9
commit 0db899d0c3
72 changed files with 788 additions and 1884 deletions
+5 -776
View File
@@ -17,7 +17,6 @@ from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from SYS.logger import log, debug, debug_panel
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from dataclasses import dataclass, field
from SYS import models
from SYS import pipeline as pipeline_context
from SYS.item_accessors import get_field as _item_accessor_get_field
@@ -25,533 +24,11 @@ from SYS.payload_builders import build_file_result_payload, build_table_result_p
from SYS.result_publication import publish_result_table
from SYS.result_table import Table
from SYS.rich_display import stderr_console as get_stderr_console
from SYS.cmdlet_spec import Cmdlet, CmdletArg, QueryArg, SharedArgs, parse_cmdlet_args
from rich.prompt import Confirm
from contextlib import AbstractContextManager, nullcontext
@dataclass
class CmdletArg:
"""Represents a single cmdlet argument with optional enum choices."""
name: str
"""Argument name, e.g., '-path' or 'location'"""
type: str = "string"
"""Argument type: 'string', 'int', 'flag', 'enum', etc."""
required: bool = False
"""Whether this argument is required"""
description: str = ""
"""Human-readable description of the argument"""
choices: List[str] = field(default_factory=list)
"""Optional list of valid choices for enum/autocomplete, e.g., ['hydrus', 'local', '0x0.st']"""
alias: str = ""
"""Optional alias for the argument name, e.g., 'loc' for 'location'"""
handler: Optional[Any] = None
"""Optional handler function/callable for processing this argument's value"""
variadic: bool = False
"""Whether this argument accepts multiple values (consumes remaining positional args)"""
usage: str = ""
"""dsf"""
requires_db: bool = False
"""Whether this argument requires the local DB/library root to be configured."""
# Query-mapping support:
# Some cmdlets use a unified `-query` string. When configured, individual args
# can be populated from fields inside `-query` (e.g., -query "hash:<sha256>").
query_key: Optional[str] = None
"""Field name inside -query that maps to this argument (e.g., 'hash')."""
query_aliases: List[str] = field(default_factory=list)
"""Additional field names inside -query that map to this argument."""
query_only: bool = False
"""When True, do not accept a dedicated CLI flag for this arg; only map from -query."""
def resolve(self, value: Any) -> Any:
"""Resolve/process the argument value using the handler if available.
Args:
value: The raw argument value to process
Returns:
Processed value from handler, or original value if no handler
Example:
# For STORAGE arg with a handler
storage_path = SharedArgs.STORAGE.resolve('local') # Returns Path(tempfile.gettempdir())
"""
if self.handler is not None and callable(self.handler):
return self.handler(value)
return value
def to_flags(self) -> tuple[str, ...]:
"""Generate all flag variants (short and long form) for this argument.
Returns a tuple of all valid flag forms for this argument, including:
- Long form with double dash: --name
- Single dash multi-char form: -name (for convenience)
- Short form with single dash: -alias (if alias exists)
For flags, also generates negation forms:
- --no-name, -name (negation of multi-char form)
- --no-name, -nalias (negation with alias)
Returns:
Tuple of flag strings, e.g., ('--archive', '-archive', '-arch')
or for flags: ('--archive', '-archive', '-arch', '--no-archive', '-narch')
Example:
archive_flags = SharedArgs.ARCHIVE.to_flags()
# Returns: ('--archive', '-archive', '-arch', '--no-archive', '-narch')
storage_flags = SharedArgs.STORAGE.to_flags()
# Returns: ('--storage', '-storage', '-s')
"""
normalized_name = str(self.name or "").lstrip("-")
if not normalized_name:
return tuple()
flags = [
f"--{normalized_name}",
f"-{normalized_name}"
] # Both double-dash and single-dash variants
# Add short form if alias exists
if self.alias:
flags.append(f"-{self.alias}")
# Add negation forms for flag type
if self.type == "flag":
flags.append(f"--no-{normalized_name}")
flags.append(f"-no{normalized_name}") # Single-dash negation variant
if self.alias:
flags.append(f"-n{self.alias}")
return tuple(flags)
def QueryArg(
name: str,
*,
key: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
type: str = "string",
required: bool = False,
description: str = "",
choices: Optional[Sequence[str]] = None,
handler: Optional[Any] = None,
query_only: bool = True,
) -> CmdletArg:
"""Create an argument that can be populated from `-query` fields.
By default, this does NOT create a dedicated flag (query_only=True). This is
useful for deprecating bloat flags like `-hash` while still making `hash:` a
first-class, documented, reusable field.
"""
return CmdletArg(
name=str(name),
type=str(type or "string"),
required=bool(required),
description=str(description or ""),
choices=list(choices or []),
handler=handler,
query_key=str(key or name).strip().lower()
if str(key or name).strip() else None,
query_aliases=[
str(a).strip().lower() for a in (aliases or []) if str(a).strip()
],
query_only=bool(query_only),
)
# ============================================================================
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
# ============================================================================
class SharedArgs:
"""Registry of shared CmdletArg definitions used across multiple cmdlet.
This class provides a centralized location for common arguments so they're
defined once and used consistently everywhere. Reduces duplication and ensures
all cmdlet handle the same arguments identically.
Example:
CMDLET = Cmdlet(
name="my-cmdlet",
summary="Does something",
usage="my-cmdlet",
args=[
SharedArgs.QUERY, # Use predefined shared arg (e.g., -query "hash:<sha256>")
SharedArgs.LOCATION, # Use another shared arg
CmdletArg(...), # Mix with custom args
]
)
"""
# NOTE: This project no longer exposes a dedicated -hash flag.
# Use SharedArgs.QUERY with `hash:` syntax instead (e.g., -query "hash:<sha256>").
STORE = CmdletArg(
name="store",
type="enum",
choices=[], # Dynamically populated via get_store_choices()
description="Selects a storage backend",
query_key="store",
)
INSTANCE = CmdletArg(
name="instance",
type="string",
description="Selects a plugin instance",
query_key="instance",
)
URL = CmdletArg(
name="url",
type="string",
description="http parser",
)
PLUGIN = CmdletArg(
name="plugin",
type="string",
description="selects plugin",
)
@staticmethod
def get_store_choices(config: Optional[Dict[str, Any]] = None, force: bool = False) -> List[str]:
"""Get list of available store backend names.
This method returns the cached list of available backends from the most
recent startup check. Stores that failed to initialize are filtered out.
Users must restart to refresh the list if stores are enabled/disabled.
Args:
config: Optional config dict. Used if force=True or no cache exists.
force: If True, force a fresh check of the backends.
Returns:
List of backend names (e.g., ['default', 'test', 'home', 'work'])
Only includes backends that successfully initialized at startup.
Example:
SharedArgs.INSTANCE.choices = SharedArgs.get_store_choices(config)
"""
# Use the cached startup check result if available (unless force=True)
if not force and hasattr(SharedArgs, "_cached_available_stores"):
return SharedArgs._cached_available_stores or []
# Autocomplete and shared arg choices must only expose backends that actually
# initialized successfully. Do a full refresh when the cache is missing.
SharedArgs._refresh_store_choices_cache(config, skip_instantiation=False)
return SharedArgs._cached_available_stores or []
@staticmethod
def _refresh_store_choices_cache(config: Optional[Dict[str, Any]] = None, skip_instantiation: bool = False) -> None:
"""Refresh the cached store choices list. Should be called once at startup.
Store choices are user-facing and should only include backends that actually
initialized successfully. When `skip_instantiation` is True, this method keeps
the cache empty rather than surfacing configured-but-disabled store names.
Args:
config: Config dict. If not provided, will try to load from config module.
skip_instantiation: When True, do not instantiate backend classes; use a lightweight list only.
"""
try:
if config is None:
try:
from SYS.config import load_config
config = load_config(emit_summary=False)
except Exception:
SharedArgs._cached_available_stores = []
return
SharedArgs._cached_available_stores = []
# If caller requested a lightweight pass, avoid exposing configured names
# that may be disabled or unavailable.
if skip_instantiation:
return
names: set[str] = set()
# Plugin-based multi-instance backends (config["plugin"] / config["provider"] sections)
try:
from PluginCore.registry import REGISTRY
plugin_instances = REGISTRY.list_storage_plugin_instances(config)
for _plugin_name, instance_names in plugin_instances.items():
names.update(instance_names)
except Exception:
pass
if names:
SharedArgs._cached_available_stores = sorted(names)
except Exception:
SharedArgs._cached_available_stores = []
LOCATION = CmdletArg(
"location",
type="enum",
choices=["hydrus",
"0x0"],
required=True,
description="Destination location",
)
DELETE = CmdletArg(
"delete",
type="flag",
description="Delete the file after successful operation.",
)
# Metadata arguments
ARTIST = CmdletArg(
"artist",
type="string",
description="Filter by artist name (case-insensitive, partial match).",
)
ALBUM = CmdletArg(
"album",
type="string",
description="Filter by album name (case-insensitive, partial match).",
)
TRACK = CmdletArg(
"track",
type="string",
description="Filter by track title (case-insensitive, partial match).",
)
# Library/Search arguments
LIBRARY = CmdletArg(
"library",
type="string",
choices=["hydrus",
"local",
"soulseek",
"libgen",
"ftp"],
description="Search library or source location.",
)
TIMEOUT = CmdletArg(
"timeout",
type="integer",
description="Search or operation timeout in seconds."
)
LIMIT = CmdletArg(
"limit",
type="integer",
description="Maximum number of results to return."
)
# Path/File arguments
PATH = CmdletArg("path", type="string", description="File or directory path.")
# Generic arguments
QUERY = CmdletArg(
"query",
type="string",
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>}).",
)
REASON = CmdletArg(
"reason",
type="string",
description="Reason or explanation for the operation."
)
ARCHIVE = CmdletArg(
"archive",
type="flag",
description=
"Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).",
alias="arch",
)
@staticmethod
def resolve_storage(
storage_value: Optional[str],
default: Optional[Path] = None
) -> Path:
"""Resolve a storage location name to a filesystem Path.
Maps storage identifiers to their actual filesystem paths.
This project has been refactored to use system temporary directories
for all staging/downloads by default.
Args:
storage_value: One of 'hydrus', 'local', 'ftp', or None (currently unified to temp)
default: Path to return if storage_value is None (defaults to temp directory)
Returns:
Resolved Path object for the storage location (typically system temp)
Example:
# In a cmdlet:
storage_path = SharedArgs.resolve_storage(parsed.get('storage'))
# Returns Path(tempfile.gettempdir())
"""
# We no longer maintain a hardcoded map for 'hydrus' (~/.hydrus) or 'local' (~/Videos).
# Everything defaults to the system temp directory unless a specific default is provided.
# This ensures environment independence.
if default is not None:
return default
return Path(tempfile.gettempdir())
@classmethod
def get(cls, name: str) -> Optional[CmdletArg]:
"""Get a shared argument by name.
Args:
name: Uppercase name like 'HASH', 'LOCATION', etc.
Returns:
CmdletArg if found, None otherwise
Example:
arg = SharedArgs.get('QUERY') # Returns SharedArgs.QUERY
"""
try:
return getattr(cls, name.upper())
except AttributeError:
return None
@dataclass
class Cmdlet:
"""Represents a cmdlet with metadata and arguments.
Example:
cmd = Cmdlet(
name="add-file",
summary="Upload a media file",
usage="add-file <location>",
aliases=["add-file-alias"],
args=[
CmdletArg("location", required=True, description="Destination location"),
CmdletArg("-delete", type="flag", description="Delete after upload"),
],
details=[
"- This is a detail line",
"- Another detail",
]
)
# Access properties
log(cmd.name) # "add-file"
log(cmd.summary) # "Upload a media file"
log(cmd.args[0].name) # "location"
"""
name: str
""""""
summary: str
"""One-line summary of the cmdlet"""
usage: str
"""Usage string, e.g., 'add-file <location> [-delete]'"""
alias: List[str] = field(default_factory=list)
"""List of aliases for this cmdlet, e.g., ['add', 'add-f']"""
arg: List[CmdletArg] = field(default_factory=list)
"""List of arguments accepted by this cmdlet"""
detail: List[str] = field(default_factory=list)
"""Detailed explanation lines (for help text)"""
examples: List[str] = field(default_factory=list)
"""Example invocations shown in `.help`."""
# Execution function: func(result, args, config) -> int
exec: Optional[Callable[[Any,
Sequence[str],
Dict[str,
Any]],
int]] = field(default=None)
def _collect_names(self) -> List[str]:
"""Collect primary name plus aliases, de-duplicated and normalized."""
names: List[str] = []
if self.name:
names.append(self.name)
for alias in self.alias or []:
if alias:
names.append(alias)
for alias in getattr(self, "aliases", None) or []:
if alias:
names.append(alias)
seen: Set[str] = set()
deduped: List[str] = []
for name in names:
key = name.replace("_", "-").lower()
if key in seen:
continue
seen.add(key)
deduped.append(name)
return deduped
def register(self) -> "Cmdlet":
"""Register this cmdlet's exec under its name and aliases."""
if not callable(self.exec):
return self
try:
from . import (
register_callable as _register_callable,
) # Local import to avoid circular import cost
except Exception:
return self
names = self._collect_names()
if not names:
return self
_register_callable(names, self.exec)
return self
def get_flags(self, arg_name: str) -> set[str]:
"""Generate -name and --name flag variants for an argument.
Args:
arg_name: The argument name (e.g., 'library', 'tag', 'size')
Returns:
Set containing both single-dash and double-dash variants
(e.g., {'-library', '--library'})
Example:
if low in cmdlet.get_flags('library'):
# handle library flag
"""
return {f"-{arg_name}",
f"--{arg_name}"}
def build_flag_registry(self) -> Dict[str, set[str]]:
"""Build a registry of all flag variants for this cmdlet's arguments.
Automatically generates all -name and --name variants for each argument.
Useful for parsing command-line arguments without hardcoding flags.
Returns:
Dict mapping argument names to their flag sets
(e.g., {'library': {'-library', '--library'}, 'tag': {'-tag', '--tag'}})
Example:
flags = cmdlet.build_flag_registry()
if low in flags.get('library', set()):
# handle library
elif low in flags.get('tag', set()):
# handle tag
"""
registry: Dict[str, set[str]] = {}
for arg in self.arg:
try:
registry[arg.name] = {str(flag).lower() for flag in arg.to_flags()}
except Exception:
registry[arg.name] = {flag.lower() for flag in self.get_flags(arg.name)}
return registry
# Tag groups cache (loaded from JSON config file)
_TAG_GROUPS_CACHE: Optional[Dict[str, List[str]]] = None
_TAG_GROUPS_MTIME: Optional[float] = None
@@ -566,240 +43,6 @@ def set_tag_groups_path(path: Path) -> None:
TAG_GROUPS_PATH = path
def parse_cmdlet_args(args: Sequence[str],
cmdlet_spec: Dict[str,
Any] | Cmdlet) -> Dict[str,
Any]:
"""Parse command-line arguments based on cmdlet specification.
Extracts argument values from command-line tokens using the argument names
and types defined in the cmdlet metadata. Automatically supports single-dash
and double-dash variants of flag names. Arguments without dashes in definition
are treated as positional arguments.
Args:
args: Command-line arguments (e.g., ["-path", "/home/file.txt", "-foo", "bar"])
cmdlet_spec: Cmdlet metadata dict with "args" key containing list of arg specs,
or a Cmdlet object. Each arg spec should have at least "name" key.
Argument names can be defined with or without prefixes.
Returns:
Dict mapping canonical arg names to their parsed values. If an arg is not
provided, it will not be in the dict. Lookup will normalize prefixes.
Example:
cmdlet = {
"args": [
{"name": "path", "type": "string"}, # Positional - matches bare value or -path/--path
{"name": "count", "type": "int"} # Positional - matches bare value or -count/--count
]
}
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
# result = {"path": "value1", "count": "5"}
"""
try:
from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
return _parse_cmdlet_args_fast(args, cmdlet_spec)
except Exception:
# Fall back to local implementation below to preserve behavior if the
# lightweight parser is unavailable.
pass
result: Dict[str,
Any] = {}
# Only accept Cmdlet objects
if not isinstance(cmdlet_spec, Cmdlet):
raise TypeError(f"Expected Cmdlet, got {type(cmdlet_spec).__name__}")
# Build arg specs from cmdlet
arg_specs: List[CmdletArg] = cmdlet_spec.arg
positional_args: List[CmdletArg] = [] # args without prefix in definition
flagged_args: List[CmdletArg] = [] # args with prefix in definition
query_mapped_args: List[CmdletArg] = []
arg_spec_map: Dict[str,
str] = {} # prefix variant -> canonical name (without prefix)
for spec in arg_specs:
name = spec.name
if not name:
continue
# Track args that can be populated from -query.
try:
if getattr(spec, "query_key", None):
query_mapped_args.append(spec)
except Exception:
pass
name_str = str(name)
canonical_name = name_str.lstrip("-")
# Query-only args do not register dedicated flags/positionals.
try:
if bool(getattr(spec, "query_only", False)):
continue
except Exception:
pass
# Determine if this is positional (no dashes in original definition)
if "-" not in name_str:
positional_args.append(spec)
else:
flagged_args.append(spec)
# Register all supported flag variants, including legacy aliases.
arg_spec_map[canonical_name.lower()] = canonical_name # bare canonical name
try:
for flag in spec.to_flags():
arg_spec_map[str(flag).lower()] = canonical_name
except Exception:
arg_spec_map[f"-{canonical_name}".lower()] = canonical_name
arg_spec_map[f"--{canonical_name}".lower()] = canonical_name
# Parse arguments
i = 0
positional_index = 0 # Track which positional arg we're on
while i < len(args):
token = str(args[i])
token_lower = token.lower()
# Legacy guidance: -hash/--hash was removed in favor of -query "hash:...".
# However, some cmdlets may explicitly re-introduce a -hash flag.
if token_lower in {"-hash",
"--hash"} and token_lower not in arg_spec_map:
try:
log(
'Legacy flag -hash is no longer supported. Use: -query "hash:<sha256>"',
file=sys.stderr,
)
except Exception:
pass
i += 1
continue
# Check if this token is a known flagged argument
if token_lower in arg_spec_map:
canonical_name = arg_spec_map[token_lower]
spec = next(
(
s for s in arg_specs
if str(s.name).lstrip("-").lower() == canonical_name.lower()
),
None,
)
# Check if it's a flag type (which doesn't consume next value, just marks presence)
is_flag = spec and spec.type == "flag"
if is_flag:
# For flags, just mark presence without consuming next token
result[canonical_name] = True
i += 1
else:
# For non-flags, consume next token as the value
if i + 1 < len(args) and not str(args[i + 1]).startswith("-"):
value = args[i + 1]
# Check if variadic
is_variadic = spec and spec.variadic
if is_variadic:
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(value)
else:
result[canonical_name] = value
i += 2
else:
i += 1
# Otherwise treat as positional if we have positional args remaining
elif positional_index < len(positional_args):
positional_spec = positional_args[positional_index]
canonical_name = str(positional_spec.name).lstrip("-")
is_variadic = positional_spec.variadic
if is_variadic:
# For variadic args, append to a list
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
# Should not happen if logic is correct, but safety check
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(token)
# Do not increment positional_index so subsequent tokens also match this arg
# Note: Variadic args should typically be the last positional argument
i += 1
else:
result[canonical_name] = token
positional_index += 1
i += 1
else:
# Unknown token, skip it
i += 1
# Populate query-mapped args from the unified -query string.
try:
raw_query = result.get("query")
except Exception:
raw_query = None
if query_mapped_args and raw_query is not None:
try:
from SYS.cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields",
{}) if isinstance(parsed_query,
dict) else {}
norm_fields = (
{
str(k).strip().lower(): v
for k, v in fields.items()
} if isinstance(fields,
dict) else {}
)
except Exception:
norm_fields = {}
for spec in query_mapped_args:
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
if not canonical_name:
continue
# Do not override explicit flags.
if canonical_name in result and result.get(canonical_name) not in (None,
""):
continue
try:
key = str(getattr(spec, "query_key", "") or "").strip().lower()
aliases = getattr(spec, "query_aliases", None)
alias_list = [
str(a).strip().lower() for a in (aliases or []) if str(a).strip()
]
except Exception:
key = ""
alias_list = []
candidates = [k for k in [key, canonical_name] + alias_list if k]
val = None
for k in candidates:
if k in norm_fields:
val = norm_fields.get(k)
break
if val is None:
continue
try:
result[canonical_name] = spec.resolve(val)
except Exception:
result[canonical_name] = val
return result
def resolve_target_dir(
parsed: Dict[str, Any],
config: Dict[str, Any],
@@ -3011,20 +2254,6 @@ def collapse_namespace_tags(
kept_ns = True
result.append(text)
return result
def collapse_namespace_tag(
tags: Optional[Iterable[Any]],
namespace: str,
prefer: str = "last"
) -> list[str]:
"""Singular alias for collapse_namespace_tags.
Some cmdlet prefer the singular name; keep behavior centralized.
"""
return collapse_namespace_tags(tags, namespace, prefer=prefer)
def extract_tag_from_result(result: Any) -> list[str]:
"""Extract all tags from a result dict or PipeObject.
@@ -3395,11 +2624,11 @@ def coerce_to_pipe_object(
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=str(
value.get("provider")
plugin=str(
value.get("plugin")
or value.get("prov")
or value.get("source")
or extra.get("provider")
or extra.get("plugin")
or extra.get("source")
or ""
).strip() or None,
@@ -3456,7 +2685,7 @@ def coerce_to_pipe_object(
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=None,
plugin=None,
path=str(path_val) if path_val and path_val != "unknown" else None,
title=title_val,
url=url_val,