This commit is contained in:
nose
2025-12-20 02:12:45 -08:00
parent b0b198df95
commit b75faa49a2
27 changed files with 2883 additions and 3329 deletions

4871
CLI.py

File diff suppressed because it is too large Load Diff

View File

@@ -523,7 +523,7 @@ local function _refresh_store_cache(timeout_seconds)
if not resp or not resp.success or type(resp.choices) ~= 'table' then
_lua_log('stores: failed to load store choices via helper; stderr=' .. tostring(resp and resp.stderr or '') .. ' error=' .. tostring(resp and resp.error or ''))
-- Fallback: directly call Python to import CLI.get_store_choices().
-- Fallback: directly call Python to import MedeiaCLI.get_store_choices().
-- This avoids helper IPC issues and still stays in sync with the REPL.
local python = (opts and opts.python_path) and tostring(opts.python_path) or 'python'
local cli_path = (opts and opts.cli_path) and tostring(opts.cli_path) or nil
@@ -537,7 +537,7 @@ local function _refresh_store_cache(timeout_seconds)
if cli_path and cli_path ~= '' then
local root = tostring(cli_path):match('(.*)[/\\]') or ''
if root ~= '' then
local code = "import json, sys; sys.path.insert(0, r'" .. root .. "'); from CLI import get_store_choices; print(json.dumps(get_store_choices()))"
local code = "import json, sys; sys.path.insert(0, r'" .. root .. "'); from CLI import MedeiaCLI; print(json.dumps(MedeiaCLI.get_store_choices()))"
local res = utils.subprocess({
args = { python, '-c', code },
cancellable = false,
@@ -1027,7 +1027,7 @@ local function _start_download_flow_for_current()
return
end
ensure_mpv_ipc_server()
M.run_pipeline('get-file -store ' .. quote_pipeline_arg(store_hash.store) .. ' -hash ' .. store_hash.hash .. ' -path ' .. quote_pipeline_arg(folder))
M.run_pipeline('get-file -store ' .. quote_pipeline_arg(store_hash.store) .. ' -query ' .. quote_pipeline_arg('hash:' .. store_hash.hash) .. ' -path ' .. quote_pipeline_arg(folder))
mp.osd_message('Download started', 2)
return
end

View File

@@ -136,33 +136,10 @@ def _run_op(op: str, data: Any) -> Dict[str, Any]:
# Provide store backend choices using the same source as CLI/Typer autocomplete.
if op_name in {"store-choices", "store_choices", "get-store-choices", "get_store_choices"}:
# Preferred: call the same choice function used by the CLI completer.
try:
from CLI import get_store_choices # noqa: WPS433
from CLI import MedeiaCLI # noqa: WPS433
backends = get_store_choices()
choices = sorted({str(n) for n in (backends or []) if str(n).strip()})
except Exception:
# Fallback: direct Store registry enumeration using loaded config.
try:
cfg = load_config() or {}
except Exception:
cfg = {}
try:
from Store import Store # noqa: WPS433
storage = Store(cfg, suppress_debug=True)
backends = storage.list_backends() or []
choices = sorted({str(n) for n in backends if str(n).strip()})
except Exception as exc:
return {
"success": False,
"stdout": "",
"stderr": "",
"error": f"{type(exc).__name__}: {exc}",
"table": None,
"choices": [],
}
backends = MedeiaCLI.get_store_choices()
choices = sorted({str(n) for n in (backends or []) if str(n).strip()})
return {
"success": True,

View File

@@ -23,6 +23,57 @@ _SOULSEEK_NOISE_SUBSTRINGS = (
)
@contextlib.asynccontextmanager
async def _suppress_aioslsk_asyncio_task_noise() -> Any:
"""Suppress non-fatal aioslsk task exceptions emitted via asyncio's loop handler.
aioslsk may spawn background tasks (e.g. direct peer connection attempts) that
can fail with ConnectionFailedError. These are often expected and should not
end a successful download with a scary "Task exception was never retrieved"
traceback.
We only swallow those specific cases and delegate everything else to the
previous/default handler.
"""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# Not in an event loop.
yield
return
previous_handler = loop.get_exception_handler()
def _handler(loop: asyncio.AbstractEventLoop, context: Dict[str, Any]) -> None:
try:
exc = context.get("exception")
msg = str(context.get("message") or "")
# Only suppress un-retrieved task exceptions from aioslsk connection failures.
if msg == "Task exception was never retrieved" and exc is not None:
cls = getattr(exc, "__class__", None)
name = getattr(cls, "__name__", "")
mod = getattr(cls, "__module__", "")
if name == "ConnectionFailedError" and str(mod).startswith("aioslsk"):
return
except Exception:
# If our filter logic fails, fall through to default handling.
pass
if previous_handler is not None:
previous_handler(loop, context)
else:
loop.default_exception_handler(context)
loop.set_exception_handler(_handler)
try:
yield
finally:
try:
loop.set_exception_handler(previous_handler)
except Exception:
pass
def _configure_aioslsk_logging() -> None:
"""Reduce aioslsk internal log noise.
@@ -508,7 +559,8 @@ async def download_soulseek_file(
client = SoulSeekClient(settings)
with _suppress_aioslsk_noise():
try:
await client.start()
async with _suppress_aioslsk_asyncio_task_noise():
await client.start()
await client.login()
debug(f"[soulseek] Logged in as {login_user}")

View File

@@ -11,7 +11,7 @@ from datetime import datetime
from threading import Thread, Lock
import time
from ..API.folder import API_folder_store
from API.folder import API_folder_store
from SYS.logger import log
logger = logging.getLogger(__name__)

View File

@@ -27,10 +27,7 @@ from cmdlet import REGISTRY
from config import get_local_storage_path, load_config
from SYS.worker_manager import WorkerManager
try: # Reuse the CLI selection parser instead of reimplementing it.
from CLI import _parse_selection_syntax
except ImportError: # pragma: no cover - fallback for atypical environments
_parse_selection_syntax = None # type: ignore
from CLI import MedeiaCLI
@dataclass(slots=True)
@@ -368,11 +365,8 @@ class PipelineExecutor:
@staticmethod
def _parse_selection(token: str) -> Optional[Sequence[int]]:
if _parse_selection_syntax:
parsed = _parse_selection_syntax(token)
if parsed:
return sorted(parsed)
return None
parsed = MedeiaCLI.parse_selection_syntax(token)
return sorted(parsed) if parsed else None
class _WorkerSession:

View File

@@ -109,19 +109,15 @@ class SharedArgs:
summary="Does something",
usage="my-cmdlet",
args=[
SharedArgs.HASH, # Use predefined shared arg
SharedArgs.QUERY, # Use predefined shared arg (e.g., -query "hash:<sha256>")
SharedArgs.LOCATION, # Use another shared arg
CmdletArg(...), # Mix with custom args
]
)
"""
# File/Hash arguments
HASH = CmdletArg(
name="hash",
type="string",
description="File hash (SHA256, 64-char hex string)",
)
# NOTE: This project no longer exposes a dedicated -hash flag.
# Use SharedArgs.QUERY with `hash:` syntax instead (e.g., -query "hash:<sha256>").
STORE = CmdletArg(
name="store",
@@ -248,7 +244,7 @@ class SharedArgs:
QUERY = CmdletArg(
"query",
type="string",
description="Search query string."
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>})."
)
REASON = CmdletArg(
@@ -321,7 +317,7 @@ class SharedArgs:
CmdletArg if found, None otherwise
Example:
arg = SharedArgs.get('HASH') # Returns SharedArgs.HASH
arg = SharedArgs.get('QUERY') # Returns SharedArgs.QUERY
"""
try:
return getattr(cls, name.upper())
@@ -527,6 +523,16 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
while i < len(args):
token = str(args[i])
token_lower = token.lower()
# Legacy guidance: -hash/--hash was removed in favor of -query "hash:...".
# We don't error hard here because some cmdlets also accept free-form args.
if token_lower in {"-hash", "--hash"}:
try:
log("Legacy flag -hash is no longer supported. Use: -query \"hash:<sha256>\"", file=sys.stderr)
except Exception:
pass
i += 1
continue
# Check if this token is a known flagged argument
if token_lower in arg_spec_map:
@@ -608,6 +614,53 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
return text
def parse_hash_query(query: Optional[str]) -> List[str]:
"""Parse a unified query string for `hash:` into normalized SHA256 hashes.
Supported examples:
- hash:<h1>
- hash:<h1>,<h2>,<h3>
- Hash: <h1> <h2> <h3>
- hash:{<h1>, <h2>}
Returns:
List of unique normalized 64-hex SHA256 hashes.
"""
import re
q = str(query or "").strip()
if not q:
return []
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
if not m:
return []
rest = (m.group(1) or "").strip()
if rest.startswith("{") and rest.endswith("}"):
rest = rest[1:-1].strip()
if rest.startswith("[") and rest.endswith("]"):
rest = rest[1:-1].strip()
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
out: List[str] = []
for part in raw_parts:
h = normalize_hash(part)
if not h:
continue
if h not in out:
out.append(h)
return out
def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
"""Parse `hash:` query and require exactly one hash."""
hashes = parse_hash_query(query)
if len(hashes) != 1:
return None
return hashes[0]
def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]:
"""Get normalized hash from override or result object, consolidating common pattern.

View File

@@ -48,7 +48,6 @@ class Add_File(Cmdlet):
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
CmdletArg(
name="room",
@@ -1746,6 +1745,62 @@ class Add_File(Cmdlet):
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# If we're moving/copying from one store to another, also copy the source store's
# existing associated URLs so they aren't lost.
try:
from metadata import normalize_urls
source_store = None
source_hash = None
if isinstance(result, dict):
source_store = result.get("store")
source_hash = result.get("hash")
if not source_store:
source_store = getattr(pipe_obj, "store", None)
if not source_hash:
source_hash = getattr(pipe_obj, "hash", None)
if (not source_hash) and isinstance(pipe_obj.extra, dict):
source_hash = pipe_obj.extra.get("hash")
source_store = str(source_store or "").strip()
source_hash = str(source_hash or "").strip().lower()
if (
source_store
and source_hash
and len(source_hash) == 64
and source_store.lower() != str(backend_name or "").strip().lower()
):
source_backend = None
try:
if source_store in store.list_backends():
source_backend = store[source_store]
except Exception:
source_backend = None
if source_backend is not None:
try:
src_urls = normalize_urls(source_backend.get_url(source_hash) or [])
except Exception:
src_urls = []
try:
dst_urls = normalize_urls(url or [])
except Exception:
dst_urls = []
merged: list[str] = []
seen: set[str] = set()
for u in list(dst_urls or []) + list(src_urls or []):
if not u:
continue
if u in seen:
continue
seen.add(u)
merged.append(u)
url = merged
except Exception:
pass
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)

View File

@@ -25,11 +25,11 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
usage="add-note -store <store> [-query \"hash:<sha256>\"] <name> <text...>",
alias=[""],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
],
@@ -72,7 +72,10 @@ class Add_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[add_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
note_name = str(parsed.get("name") or "").strip()
text_parts = parsed.get("text")
@@ -91,10 +94,10 @@ class Add_Note(Cmdlet):
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[add_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -161,7 +164,7 @@ class Add_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)

View File

@@ -31,7 +31,7 @@ CMDLET = Cmdlet(
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
@@ -372,7 +372,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
refresh_args.extend(["-query", f"hash:{target_hash}"])
get_relationship(subject, refresh_args, config)
except Exception:
pass
@@ -396,7 +396,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
override_store = parsed.get("store")
override_hash = parsed.get("hash")
override_hashes = sh.parse_hash_query(parsed.get("query"))
if parsed.get("query") and not override_hashes:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
king_arg = parsed.get("king")
alt_arg = parsed.get("alt")
rel_type = parsed.get("type", "alt")
@@ -436,20 +439,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
resolved_alt_items = [{"hash": h, "store": str(override_store)} for h in hashes]
items_to_process = normalize_result_input(resolved_alt_items)
# Allow explicit -hash operation (store/hash-first)
if (not items_to_process) and override_hash:
# Support comma-separated hashes
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",")]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
# Use the selected/override store; required in this mode
# Allow explicit store/hash-first operation via -query "hash:<sha256>" (supports multiple hash: tokens)
if (not items_to_process) and override_hashes:
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
items_to_process = [{"hash": h, "store": str(override_store)} for h in hashes]
items_to_process = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)

View File

@@ -205,7 +205,7 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
if not target_hash or not store_name:
return
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
refresh_args: List[str] = ["-query", f"hash:{target_hash}", "-store", store_name]
get_tag = None
try:
@@ -237,10 +237,10 @@ class Add_Tag(Cmdlet):
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
usage="add-tag -store <store> [-query \"hash:<sha256>\"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
@@ -249,7 +249,7 @@ class Add_Tag(Cmdlet):
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
"- Requires a store backend: use -store or pipe items that include store.",
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
"- If -query is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
@@ -258,7 +258,6 @@ class Add_Tag(Cmdlet):
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
@@ -273,6 +272,11 @@ class Add_Tag(Cmdlet):
# Parse arguments
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[add_tag] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
# If add-tag is in the middle of a pipeline (has downstream stages), default to
# including temp files. This enables common flows like:
# @N | download-media | add-tag ... | add-file ...
@@ -337,24 +341,12 @@ class Add_Tag(Cmdlet):
tag_to_add = parse_tag_arguments(raw_tag)
tag_to_add = expand_tag_groups(tag_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tag: List[str] = []
for tag in tag_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tag.append(tag)
tag_to_add = filtered_tag
if not tag_to_add:
log("No tag provided to add", file=sys.stderr)
return 1
# Get other flags (hash override can come from -hash or hash: token)
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
# Get other flags
hash_override = normalize_hash(query_hash) if query_hash else None
duplicate_arg = parsed.get("duplicate")
# tag ARE provided - apply them to each store-backed result

View File

@@ -18,7 +18,7 @@ class Add_Url(sh.Cmdlet):
summary="Associate a URL with a file",
usage="@1 | add-url <url>",
arg=[
sh.SharedArgs.HASH,
sh.SharedArgs.QUERY,
sh.SharedArgs.STORE,
sh.CmdletArg("url", required=True, description="URL to associate"),
],
@@ -33,14 +33,19 @@ class Add_Url(sh.Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -20,10 +20,10 @@ class Delete_File(sh.Cmdlet):
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
usage="delete-file [-query \"hash:<sha256>\"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
alias=["del-file"],
arg=[
sh.CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
sh.SharedArgs.QUERY,
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
@@ -196,6 +196,7 @@ class Delete_File(sh.Cmdlet):
return 0
# Parse arguments
override_query: str | None = None
override_hash: str | None = None
conserve: str | None = None
lib_root: str | None = None
@@ -205,8 +206,8 @@ class Delete_File(sh.Cmdlet):
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
if low in {"-query", "--query", "query"} and i + 1 < len(args):
override_query = str(args[i + 1]).strip()
i += 2
continue
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
@@ -222,6 +223,11 @@ class Delete_File(sh.Cmdlet):
reason_tokens.append(token)
i += 1
override_hash = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
# If no lib_root provided, try to get the first folder store from config
if not lib_root:
try:

View File

@@ -26,11 +26,11 @@ class Delete_Note(Cmdlet):
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-hash <sha256>] <name>",
usage="delete-note -store <store> [-query \"hash:<sha256>\"] <name>",
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
],
detail=[
@@ -68,7 +68,10 @@ class Delete_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[delete_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
note_name_override = str(parsed.get("name") or "").strip()
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
@@ -78,10 +81,10 @@ class Delete_Note(Cmdlet):
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[delete_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -109,7 +112,7 @@ class Delete_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
ctx.emit(res)

View File

@@ -117,7 +117,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
refresh_args.extend(["-query", f"hash:{target_hash}"])
cmd = get_cmdlet("get-relationship")
if not cmd:
@@ -148,24 +148,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
delete_all_flag = parsed_args.get("all", False)
rel_type_filter = parsed_args.get("type")
override_store = parsed_args.get("store")
override_hash = parsed_args.get("hash")
override_hashes = sh.parse_hash_query(parsed_args.get("query"))
if parsed_args.get("query") and not override_hashes:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
raw_path = parsed_args.get("path")
# Normalize input
results = normalize_result_input(result)
# Allow store/hash-first usage when no pipeline items were provided
if (not results) and override_hash:
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (normalize_hash(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
if (not results) and override_hashes:
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
results = [{"hash": h, "store": str(override_store)} for h in hashes]
results = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not results:
# Legacy -path mode below may still apply
@@ -228,7 +225,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -hash or ensure pipeline includes hash)", file=sys.stderr)
log("Could not extract file hash for deletion (use -query \"hash:<sha256>\" or ensure pipeline includes hash)", file=sys.stderr)
return 1
meta = db.get_metadata(file_hash) or {}
@@ -380,7 +377,7 @@ CMDLET = Cmdlet(
arg=[
CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
],

View File

@@ -65,7 +65,7 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
refresh_args: list[str] = []
if file_hash:
refresh_args.extend(["-hash", file_hash])
refresh_args.extend(["-query", f"hash:{file_hash}"])
if store_name:
refresh_args.extend(["-store", store_name])
get_tag(subject, refresh_args, config)
@@ -76,14 +76,14 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-hash <sha256>] <tag>[,<tag>...]",
usage="delete-tag -store <store> [-query \"hash:<sha256>\"] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -hash override.",
"- Requires a Hydrus file (hash present) or explicit -query override.",
"- Multiple tags can be comma-separated or space-separated.",
],
)
@@ -111,11 +111,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
has_piped_tag = _looks_like_tag_row(result)
has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
if not args and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Parse -hash override and collect tags from remaining args
# Parse -query/-store overrides and collect remaining args.
override_query: str | None = None
override_hash: str | None = None
override_store: str | None = None
rest: list[str] = []
@@ -123,8 +120,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
if low in {"-query", "--query", "query"} and i + 1 < len(args):
override_query = str(args[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args):
@@ -133,64 +130,37 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
continue
rest.append(a)
i += 1
# Check if first argument is @ syntax (result table selection)
# @5 or @{2,5,8} to delete tags from ResultTable by index
tags_from_at_syntax = []
hash_from_at_syntax = None
path_from_at_syntax = None
store_from_at_syntax = None
if rest and str(rest[0]).startswith("@"):
selector_arg = str(rest[0])
pipe_selector = selector_arg[1:].strip()
# Parse @N or @{N,M,K} syntax
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
# @{2,5,8}
pipe_selector = pipe_selector[1:-1]
try:
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
except ValueError:
log("Invalid selection syntax. Use @2 or @{2,5,8}")
return 1
# Get the last ResultTable from pipeline context
try:
last_table = ctx._LAST_RESULT_TABLE
if last_table:
# Extract tags from selected rows
for idx in indices:
if 1 <= idx <= len(last_table.rows):
# Look for a TagItem in _LAST_RESULT_ITEMS by index
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
item = ctx._LAST_RESULT_ITEMS[idx - 1]
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
tag_name = get_field(item, 'tag_name')
if tag_name:
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
tags_from_at_syntax.append(tag_name)
# Also get hash from first item for consistency
if not hash_from_at_syntax:
hash_from_at_syntax = get_field(item, 'hash')
if not path_from_at_syntax:
path_from_at_syntax = get_field(item, 'path')
if not store_from_at_syntax:
store_from_at_syntax = get_field(item, 'store')
if not tags_from_at_syntax:
log(f"No tags found at indices: {indices}")
return 1
else:
log("No ResultTable in pipeline (use @ after running get-tag)")
return 1
except Exception as exc:
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
return 1
# Handle @N selection which creates a list - extract the first item
# If we have a list of TagItems, we want to process ALL of them if no args provided
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
# Also handles: delete-tag @1,2 (where we want to delete tags from multiple files)
override_hash = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
# Selection syntax (@...) is handled by the pipeline runner, not by this cmdlet.
# If @ reaches here as a literal argument, it's almost certainly user error.
if rest and str(rest[0]).startswith("@") and not (has_piped_tag or has_piped_tag_list):
log("Selection syntax is only supported via piping. Use: @N | delete-tag")
return 1
# Special case: grouped tag selection created by the pipeline runner.
# This represents "delete these selected tags" (not "delete tags from this file").
grouped_table = ""
try:
grouped_table = str(get_field(result, "table") or "").strip().lower()
except Exception:
grouped_table = ""
grouped_tags = get_field(result, "tag") if result is not None else None
tags_arg = parse_tag_arguments(rest)
if grouped_table == "tag.selection" and isinstance(grouped_tags, list) and grouped_tags and not tags_arg:
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash"))
store_name = override_store or get_field(result, "store")
path = get_field(result, "path") or get_field(result, "target")
tags = [str(t) for t in grouped_tags if t]
return 0 if _process_deletion(tags, file_hash, path, store_name, config) else 1
if not tags_arg and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Normalize result to a list for processing
items_to_process = []
@@ -198,6 +168,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
items_to_process = result
elif result:
items_to_process = [result]
# Process each item
success_count = 0
# If we have TagItems and no args, we are deleting the tags themselves
# If we have Files (or other objects) and args, we are deleting tags FROM those files
@@ -206,81 +179,66 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
is_tag_item_mode = bool(items_to_process) and _looks_like_tag_row(items_to_process[0])
if is_tag_item_mode:
# Collect all tags to delete from the TagItems
# Group by hash/file_path to batch operations if needed, or just process one by one
# For simplicity, we'll process one by one or group by file
pass
# Collect all tags to delete from the TagItems and batch per file.
# This keeps delete-tag efficient (one backend call per file).
groups: Dict[tuple[str, str, str], list[str]] = {}
for item in items_to_process:
tag_name = get_field(item, "tag_name")
if not tag_name:
continue
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_store = override_store or get_field(item, "store")
item_path = get_field(item, "path") or get_field(item, "target")
key = (str(item_hash or ""), str(item_store or ""), str(item_path or ""))
groups.setdefault(key, []).append(str(tag_name))
for (h, s, p), tag_list in groups.items():
if not tag_list:
continue
if _process_deletion(tag_list, h or None, p or None, s or None, config):
success_count += 1
return 0 if success_count > 0 else 1
else:
# "Delete tags from files" mode
# We need args (tags to delete)
if not args and not tags_from_at_syntax:
if not tags_arg:
log("Requires at least one tag argument when deleting from files")
return 1
# Process each item
success_count = 0
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
# but we might need the piped result for the file context if @ selection was from a Tag table
# Actually, the @ selection logic above already extracted tags.
if tags_from_at_syntax:
# Special case: @ selection of tags.
# We already extracted tags and hash/path.
# Just run the deletion once using the extracted info.
# This preserves the existing logic for @ selection.
tags = tags_from_at_syntax
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
path = path_from_at_syntax
store_name = override_store or store_from_at_syntax
if _process_deletion(tags, file_hash, path, store_name, config):
success_count += 1
else:
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
tags_arg = parse_tag_arguments(rest)
for item in items_to_process:
tags_to_delete = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
)
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
# It's a tag row (TagItem or PipeObject/dict with tag_name)
if tags_arg:
# User provided tags to delete FROM this file (ignoring the tag name in the item?)
# Or maybe they want to delete the tag in the item AND the args?
# Usually if piping TagItems, we delete THOSE tags.
# If args are present, maybe we should warn?
# For now, if args are present, assume they override or add to the tag item?
# Let's assume if args are present, we use args. If not, we use the tag name.
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
if tag_name:
tags_to_delete = [tag_name]
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
for item in items_to_process:
tags_to_delete: list[str] = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
)
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
if tags_arg:
tags_to_delete = tags_arg
else:
# It's a File or other object
if tags_arg:
tags_to_delete = tags_arg
else:
# No tags provided for a file object - skip or error?
# We already logged an error if no args and not TagItem mode globally,
# but inside the loop we might have mixed items? Unlikely.
continue
if tags_to_delete:
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
success_count += 1
tag_name = get_field(item, 'tag_name')
if tag_name:
tags_to_delete = [str(tag_name)]
else:
if tags_arg:
tags_to_delete = tags_arg
else:
continue
if tags_to_delete:
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
success_count += 1
if success_count > 0:
return 0

View File

@@ -27,7 +27,7 @@ class Delete_Url(Cmdlet):
summary="Remove a URL association from a file",
usage="@1 | delete-url <url>",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to remove"),
],
@@ -42,14 +42,19 @@ class Delete_Url(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -1124,7 +1124,8 @@ def download_media(
# If we downloaded sections, look for files with the session_id pattern
if opts.clip_sections and session_id:
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
# Also includes sidecars like "{session_id}_1.en.vtt".
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)')
matching_files = [f for f in files if section_pattern.search(f.name)]
if matching_files:
@@ -1136,38 +1137,116 @@ def download_media(
matching_files.sort(key=extract_section_num)
debug(f"Found {len(matching_files)} section file(s) matching pattern")
# Now rename section files to use hash-based names
# This ensures unique filenames for each section content
renamed_files = []
for idx, section_file in enumerate(matching_files, 1):
# Now rename section *media* files to use hash-based names.
# Sidecars (subtitles) are renamed to match the media hash so they can be
# attached as notes later (and not emitted as separate pipeline items).
by_index: Dict[int, List[Path]] = {}
for f in matching_files:
m = section_pattern.search(f.name)
if not m:
continue
try:
# Calculate hash for the file
file_hash = sha256_file(section_file)
ext = section_file.suffix
new_name = f"{file_hash}{ext}"
new_path = opts.output_dir / new_name
if new_path.exists() and new_path != section_file:
# If file with same hash exists, use it and delete the temp one
debug(f"File with hash {file_hash} already exists, using existing file.")
n = int(m.group(1))
except Exception:
continue
by_index.setdefault(n, []).append(f)
renamed_media_files: List[Path] = []
for sec_num in sorted(by_index.keys()):
group = by_index.get(sec_num) or []
if not group:
continue
def _is_subtitle(p: Path) -> bool:
try:
return p.suffix.lower() in _SUBTITLE_EXTS
except Exception:
return False
media_candidates = [p for p in group if not _is_subtitle(p)]
subtitle_candidates = [p for p in group if _is_subtitle(p)]
# Pick the primary media file for this section.
# Prefer non-json, non-info sidecars.
media_file: Optional[Path] = None
for cand in media_candidates:
try:
if cand.suffix.lower() in {".json", ".info.json"}:
continue
except Exception:
pass
media_file = cand
break
if media_file is None and media_candidates:
media_file = media_candidates[0]
if media_file is None:
# No media file found for this section; skip.
continue
try:
media_hash = sha256_file(media_file)
except Exception as e:
debug(f"Failed to hash section media file {media_file.name}: {e}")
renamed_media_files.append(media_file)
continue
# Preserve any suffix tail after the section index so language tags survive.
# Example: <session>_1.en.vtt -> <hash>.en.vtt
prefix = f"{session_id}_{sec_num}"
def _tail(name: str) -> str:
try:
if name.startswith(prefix):
return name[len(prefix):]
except Exception:
pass
# Fallback: keep just the last suffix.
try:
return Path(name).suffix
except Exception:
return ""
# Rename media file to <hash><tail> (tail typically like .mkv).
try:
new_media_name = f"{media_hash}{_tail(media_file.name)}"
new_media_path = opts.output_dir / new_media_name
if new_media_path.exists() and new_media_path != media_file:
debug(f"File with hash {media_hash} already exists, using existing file.")
try:
section_file.unlink()
media_file.unlink()
except OSError:
pass
renamed_files.append(new_path)
else:
section_file.rename(new_path)
debug(f"Renamed section file: {section_file.name} {new_name}")
renamed_files.append(new_path)
media_file.rename(new_media_path)
debug(f"Renamed section file: {media_file.name} -> {new_media_name}")
renamed_media_files.append(new_media_path)
except Exception as e:
debug(f"Failed to process section file {section_file.name}: {e}")
renamed_files.append(section_file)
media_path = renamed_files[0]
media_paths = renamed_files
debug(f"Failed to rename section media file {media_file.name}: {e}")
renamed_media_files.append(media_file)
new_media_path = media_file
# Rename subtitle sidecars to match media hash for later note attachment.
for sub_file in subtitle_candidates:
try:
new_sub_name = f"{media_hash}{_tail(sub_file.name)}"
new_sub_path = opts.output_dir / new_sub_name
if new_sub_path.exists() and new_sub_path != sub_file:
try:
sub_file.unlink()
except OSError:
pass
else:
sub_file.rename(new_sub_path)
debug(f"Renamed section file: {sub_file.name} -> {new_sub_name}")
except Exception as e:
debug(f"Failed to rename section subtitle file {sub_file.name}: {e}")
media_path = renamed_media_files[0] if renamed_media_files else matching_files[0]
media_paths = renamed_media_files if renamed_media_files else None
if not opts.quiet:
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
count = len(media_paths) if isinstance(media_paths, list) else 1
debug(f"✓ Downloaded {count} section media file(s) (session: {session_id})")
else:
# Fallback to most recent file if pattern not found
media_path = files[0]
@@ -1398,9 +1477,14 @@ class Download_Media(Cmdlet):
alias=[""],
arg=[
SharedArgs.URL,
SharedArgs.QUERY,
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
CmdletArg(
name="clip",
type="string",
description="Extract time range(s) or keyed spec (e.g., clip:3m4s-3m14s,item:2-3)",
),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
SharedArgs.PATH
],
@@ -1483,6 +1567,34 @@ class Download_Media(Cmdlet):
# Get other options
clip_spec = parsed.get("clip")
query_spec = parsed.get("query")
# download-media supports a small keyed spec language inside -query.
# Examples:
# -query "hash:<sha256>"
# -query "clip:1m-1m15s,2m1s-2m11s"
# -query "hash:<sha256>,clip:1m-1m15s,item:2-3"
query_keyed: Dict[str, List[str]] = {}
if query_spec:
try:
query_keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
except Exception:
query_keyed = {}
# Optional: allow an explicit hash via -query "hash:<sha256>".
# This is used as the preferred king hash for multi-clip relationships.
query_hash_override: Optional[str] = None
try:
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
hash_candidate = (hash_values[-1] if hash_values else None)
if hash_candidate:
# Re-wrap for the shared parser which expects the `hash:` prefix.
query_hash_override = sh.parse_single_hash_query(f"hash:{hash_candidate}")
else:
# Backwards-compatible: treat a non-keyed query as a hash query.
query_hash_override = sh.parse_single_hash_query(str(query_spec)) if query_spec else None
except Exception:
query_hash_override = None
# Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them.
embed_chapters = True
@@ -1492,12 +1604,38 @@ class Download_Media(Cmdlet):
# Parse clip range(s) if specified
clip_ranges: Optional[List[tuple[int, int]]] = None
clip_values: List[str] = []
item_values: List[str] = []
if clip_spec:
clip_ranges = self._parse_time_ranges(str(clip_spec))
# Support keyed clip syntax:
# -clip "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
clip_values.extend(keyed.get("clip", []) or [])
item_values.extend(keyed.get("item", []) or [])
# Allow the same keyed spec language inside -query so users can do:
# download-media <url> -query "clip:1m-1m15s,2m1s-2m11s"
if query_keyed:
clip_values.extend(query_keyed.get("clip", []) or [])
item_values.extend(query_keyed.get("item", []) or [])
if item_values and not parsed.get("item"):
parsed["item"] = ",".join([v for v in item_values if v])
if clip_values:
clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v]))
if not clip_ranges:
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
bad_spec = clip_spec or query_spec
log(f"Invalid clip format: {bad_spec}", file=sys.stderr)
return 1
if clip_ranges:
try:
debug(f"Clip ranges: {clip_ranges}")
except Exception:
pass
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
storage = None
@@ -1886,56 +2024,37 @@ class Download_Media(Cmdlet):
formats = list_formats(url, no_playlist=False)
if formats and len(formats) > 1:
# Filter formats: multiple videos (640x+, one per resolution tier) + 1 best audio
video_formats = []
audio_formats = []
for fmt in formats:
width = fmt.get("width") or 0
height = fmt.get("height") or 0
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
# Classify as video or audio
if vcodec != "none" and acodec == "none" and width >= 640:
video_formats.append(fmt)
elif acodec != "none" and vcodec == "none":
audio_formats.append(fmt)
# Group videos by resolution and select best format per resolution
filtered_formats = []
if video_formats:
# Group by height (resolution tier)
from collections import defaultdict
by_resolution = defaultdict(list)
for f in video_formats:
height = f.get("height") or 0
by_resolution[height].append(f)
# For each resolution, prefer AV1, then highest bitrate
for height in sorted(by_resolution.keys(), reverse=True):
candidates = by_resolution[height]
av1_formats = [f for f in candidates if "av01" in f.get("vcodec", "")]
if av1_formats:
best = max(av1_formats, key=lambda f: f.get("tbr") or 0)
else:
best = max(candidates, key=lambda f: f.get("tbr") or 0)
filtered_formats.append(best)
# Select best audio: highest bitrate (any format)
if audio_formats:
best_audio = max(audio_formats, key=lambda f: f.get("tbr") or f.get("abr") or 0)
filtered_formats.append(best_audio)
if not filtered_formats:
# Fallback to all formats if filtering resulted in nothing
filtered_formats = formats
debug(f"Filtered to {len(filtered_formats)} formats from {len(formats)} total")
# Show format selection table
log(f"Available formats for {url}:", file=sys.stderr)
log("", file=sys.stderr)
# Formatlist filtering
#
# Goal:
# - Keep the list useful (hide non-media entries like storyboards)
# - But NEVER filter down so far that the user can't browse/pick formats.
#
# The old filtering was too aggressive (e.g. width>=640, one per resolution),
# which often hid most YouTube formats.
def _is_browseable_format(fmt: Any) -> bool:
if not isinstance(fmt, dict):
return False
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return False
ext = str(fmt.get("ext") or "").strip().lower()
if ext in {"mhtml", "json"}:
return False
note = str(fmt.get("format_note") or "").lower()
if "storyboard" in note:
return False
if format_id.lower().startswith("sb"):
return False
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
# Keep anything with at least one stream.
return not (vcodec == "none" and acodec == "none")
candidate_formats = [f for f in formats if _is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(formats)
debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})")
# Build the base command that will be replayed with @N selection
# Include any additional args from the original command
@@ -1946,8 +2065,10 @@ class Download_Media(Cmdlet):
base_cmd += ' ' + ' '.join(remaining_args)
# Create result table for display
table = ResultTable()
table.title = f"Available formats for {url}"
# NOTE: ResultTable defaults to max_columns=5; for formatlist we want more columns
# (including Size) so the user can compare formats.
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [url])
# Collect results for table
@@ -1960,6 +2081,7 @@ class Download_Media(Cmdlet):
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
# If the chosen format is video-only (no audio stream), automatically
@@ -1971,11 +2093,20 @@ class Download_Media(Cmdlet):
except Exception:
selection_format_id = format_id
# Format size
# Format size (prefer exact filesize; fall back to filesize_approx)
size_str = ""
if filesize:
size_mb = filesize / (1024 * 1024)
size_str = f"{size_mb:.1f}MB"
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
# Build format description
desc_parts = []
@@ -2002,42 +2133,67 @@ class Download_Media(Cmdlet):
"annotations": [ext, resolution] if resolution else [ext],
"media_kind": "format",
"cmd": base_cmd,
# Put Size early so it's visible even with smaller column caps.
"columns": [
("#", str(idx)),
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
("Size", size_str or "N/A"),
],
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": selection_format_id,
},
"_selection_args": ["-format", selection_format_id]
"_selection_args": None,
}
# Preserve clip settings across @N selection.
# Some runners only append row selection args; make sure clip intent
# survives even when it was provided via -query "clip:...".
selection_args: List[str] = ["-format", selection_format_id]
try:
if (not clip_spec) and clip_values:
selection_args.extend(["-clip", ",".join([v for v in clip_values if v])])
except Exception:
pass
format_dict["_selection_args"] = selection_args
# Add to results list and table (don't emit - formats should wait for @N selection)
results_list.append(format_dict)
table.add_result(format_dict)
# Render and display the table
# Table is displayed by pipeline runner via set_current_stage_table
# Some runners (e.g. cmdnat) do not automatically render stage tables.
# Since this branch is explicitly interactive (user must pick @N), always
# print the table here and mark it as already rendered to avoid duplicates
# in runners that also print tables (e.g. CLI.py).
try:
sys.stderr.write(table.format_plain() + "\n")
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Set the result table so it displays and is available for @N selection
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
log(f"", file=sys.stderr)
log(f"Use: @N | download-media to select and download format", file=sys.stderr)
log(f"Use: @N to select and download format", file=sys.stderr)
return 0
# Download each URL
downloaded_count = 0
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
if clip_sections_spec:
try:
debug(f"Clip sections spec: {clip_sections_spec}")
except Exception:
pass
for url in supported_url:
try:
debug(f"Processing: {url}")
@@ -2136,6 +2292,13 @@ class Download_Media(Cmdlet):
p_path = Path(p)
except Exception:
continue
# Sidecars (subtitles) should never be piped as standalone items.
# They are handled separately and attached to notes.
try:
if p_path.suffix.lower() in _SUBTITLE_EXTS:
continue
except Exception:
pass
if not p_path.exists() or p_path.is_dir():
continue
try:
@@ -2189,6 +2352,12 @@ class Download_Media(Cmdlet):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
# We keep subtitles as notes; do not leave a sidecar that later stages
# might try to ingest as a file.
try:
sub_path.unlink()
except Exception:
pass
pipe_objects.append(po)
@@ -2196,7 +2365,7 @@ class Download_Media(Cmdlet):
# Relationship tags are only added when multiple clips exist.
try:
if clip_ranges and len(pipe_objects) == len(clip_ranges):
source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
source_hash = query_hash_override or self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
except Exception:
pass
@@ -2234,8 +2403,8 @@ class Download_Media(Cmdlet):
if formats:
formats_to_show = formats
table = ResultTable()
table.title = f"Available formats for {url}"
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [str(a) for a in (args or [])])
results_list: List[Dict[str, Any]] = []
@@ -2245,6 +2414,7 @@ class Download_Media(Cmdlet):
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
selection_format_id = format_id
@@ -2255,12 +2425,18 @@ class Download_Media(Cmdlet):
selection_format_id = format_id
size_str = ""
if filesize:
try:
size_mb = float(filesize) / (1024 * 1024)
size_str = f"{size_mb:.1f}MB"
except Exception:
size_str = ""
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
desc_parts: List[str] = []
if resolution and resolution != "audio only":
@@ -2283,13 +2459,12 @@ class Download_Media(Cmdlet):
"detail": format_desc,
"media_kind": "format",
"columns": [
("#", str(idx)),
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
("Size", size_str or "N/A"),
],
"full_metadata": {
"format_id": format_id,
@@ -2305,6 +2480,13 @@ class Download_Media(Cmdlet):
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
# See comment in the main formatlist path: always print for interactive selection.
try:
sys.stderr.write(table.format_plain() + "\n")
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Returning 0 with no emits lets the CLI pause the pipeline for @N selection.
log("Requested format is not available; select a working format with @N", file=sys.stderr)
return 0
@@ -2387,6 +2569,25 @@ class Download_Media(Cmdlet):
if not ts:
return None
# Support compact units like 3m4s, 1h22m, 1h2m3s
# (case-insensitive; seconds may be fractional but are truncated to int)
try:
unit_match = re.fullmatch(
r"(?i)\s*(?:(?P<h>\d+)h)?\s*(?:(?P<m>\d+)m)?\s*(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
ts,
)
except Exception:
unit_match = None
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
try:
hours = int(unit_match.group("h") or 0)
minutes = int(unit_match.group("m") or 0)
seconds = float(unit_match.group("s") or 0)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
if ":" in ts:
parts = [p.strip() for p in ts.split(":")]
if len(parts) == 2:
@@ -2430,6 +2631,46 @@ class Download_Media(Cmdlet):
return ranges
@staticmethod
def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]:
"""Parse comma-separated values with optional sticky `key:` prefixes.
Example:
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
Rules:
- Items are split on commas.
- If an item begins with `key:` then key becomes active for subsequent items.
- If an item has no `key:` prefix, it belongs to the last active key.
- If no key has been set yet, values belong to default_key.
"""
out: Dict[str, List[str]] = {}
if not isinstance(spec, str):
spec = str(spec)
text = spec.strip()
if not text:
return out
active = (default_key or "").strip().lower() or "clip"
key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$")
for raw_piece in text.split(","):
piece = raw_piece.strip()
if not piece:
continue
m = key_pattern.match(piece)
if m:
active = (m.group(1) or "").strip().lower() or active
value = (m.group(2) or "").strip()
if value:
out.setdefault(active, []).append(value)
continue
out.setdefault(active, []).append(piece)
return out
def _build_clip_sections_spec(
self,
clip_ranges: Optional[List[tuple[int, int]]],

View File

@@ -25,14 +25,14 @@ class Get_File(sh.Cmdlet):
summary="Export file to local path",
usage="@1 | get-file -path C:\\Downloads",
arg=[
sh.SharedArgs.HASH,
sh.SharedArgs.QUERY,
sh.SharedArgs.STORE,
sh.SharedArgs.PATH,
sh.CmdletArg("name", description="Output filename (default: from metadata title)"),
],
detail=[
"- Exports file from storage backend to local path",
"- Uses hash+store to retrieve file",
"- Uses selected item's hash, or -query \"hash:<sha256>\"",
"- Preserves file extension and metadata",
],
exec=self.run,
@@ -44,9 +44,14 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] run() called with result type: {type(result)}")
parsed = sh.parse_cmdlet_args(args, self)
debug(f"[get-file] parsed args: {parsed}")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
@@ -54,7 +59,7 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -26,16 +26,16 @@ class Get_Metadata(Cmdlet):
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-hash <sha256>] [-store <backend>]",
usage="get-metadata [-query \"hash:<sha256>\"] [-store <backend>]",
alias=["meta"],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
"- Retrieves metadata from storage backend using file hash as identifier.",
"- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.",
"- Hash and store are taken from piped result or can be overridden with -hash/-store flags.",
"- Hash and store are taken from piped result or can be overridden with -query/-store flags.",
"- All metadata is retrieved from the storage backend's database (single source of truth).",
],
exec=self.run,
@@ -153,13 +153,18 @@ class Get_Metadata(Cmdlet):
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
return 1
# Get hash and store from parsed args or result
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -hash to specify", file=sys.stderr)
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
return 1
if not storage_source:

View File

@@ -25,11 +25,11 @@ class Get_Note(Cmdlet):
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-hash <sha256>]",
usage="get-note -store <store> [-query \"hash:<sha256>\"]",
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
],
detail=[
"- Notes are retrieved via the selected store backend.",
@@ -66,14 +66,17 @@ class Get_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[get_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[get_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -94,7 +97,7 @@ class Get_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
continue

View File

@@ -29,12 +29,12 @@ from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
usage="get-relationship [-query \"hash:<sha256>\"]",
alias=[
"get-rel",
],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
@@ -48,20 +48,28 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash and -store override
override_hash: str | None = None
# Parse -query and -store override
override_query: str | None = None
override_store: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
if low in {"-query", "--query", "query"} and i + 1 < len(args_list):
override_query = str(args_list[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args_list):
override_store = str(args_list[i + 1]).strip()
i += 2
continue
i += 1
override_hash: str | None = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("get-relationship requires -query \"hash:<sha256>\"", file=sys.stderr)
return 1
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
@@ -69,7 +77,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr)
log("get-relationship expects a single item; select one row (e.g. @1) or pass -query \"hash:<sha256>\"", file=sys.stderr)
return 1
else:
result = result[0]
@@ -439,8 +447,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']])
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
ctx.set_last_result_table(table, pipeline_results)
print(table)

View File

@@ -801,11 +801,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get tags from Hydrus, local sidecar, or URL metadata.
Usage:
get-tag [-hash <sha256>] [--store <key>] [--emit]
get-tag [-query "hash:<sha256>"] [--store <key>] [--emit]
get-tag -scrape <url|provider>
Options:
-hash <sha256>: Override hash to use instead of result's hash
-query "hash:<sha256>": Override hash to use instead of result's hash
--store <key>: Store result to this key for pipeline
--emit: Emit result without interactive prompt (quiet mode)
-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks)
@@ -843,22 +843,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args_list)
# Extract values
hash_override_raw = parsed_args.get("hash")
hash_override = normalize_hash(hash_override_raw)
query_raw = parsed_args.get("query")
hash_override = sh.parse_single_hash_query(query_raw)
if query_raw and not hash_override:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
store_key = parsed_args.get("store")
emit_requested = parsed_args.get("emit", False)
scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_flag_present or scrape_url is not None
explicit_hash_flag = any(str(arg).lower() in {"-hash", "--hash"} for arg in raw_args)
if hash_override_raw is not None:
if not hash_override or not looks_like_hash(hash_override):
debug(f"[get_tag] Ignoring invalid hash override '{hash_override_raw}' (explicit_flag={explicit_hash_flag})")
if explicit_hash_flag:
log("Invalid hash format: expected 64 hex characters", file=sys.stderr)
return 1
hash_override = None
if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
log("-scrape requires a URL or provider name", file=sys.stderr)
return 1
@@ -1182,10 +1176,10 @@ class Get_Tag(Cmdlet):
super().__init__(
name="get-tag",
summary="Get tag values from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
usage="get-tag [-query \"hash:<sha256>\"] [--store <key>] [--emit] [-scrape <url|provider>]",
alias=[],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg(
name="-store",
type="string",
@@ -1211,7 +1205,7 @@ class Get_Tag(Cmdlet):
" Hydrus: Using file hash if available",
" Local: From sidecar files or local library database",
"- Options:",
" -hash: Override hash to look up in Hydrus",
" -query: Override hash to look up in Hydrus (use: -query \"hash:<sha256>\")",
" -store: Store result to key for downstream pipeline",
" -emit: Quiet mode (no interactive selection)",
" -scrape: Scrape metadata from URL or metadata provider",

View File

@@ -34,7 +34,7 @@ class Get_Url(Cmdlet):
summary="List url associated with a file",
usage="@1 | get-url",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
@@ -47,13 +47,18 @@ class Get_Url(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -12,7 +12,7 @@ from SYS.logger import log, debug
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag = (
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
@@ -20,6 +20,7 @@ Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, firs
sh.should_show_help,
sh.normalize_hash,
sh.first_title_tag,
sh.parse_hash_query,
)
import pipeline as ctx
@@ -34,7 +35,7 @@ class Search_Store(Cmdlet):
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [query] [-store BACKEND] [-limit N]",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
@@ -46,51 +47,18 @@ class Search_Store(Cmdlet):
"URL search: url:* (any URL) or url:<value> (URL substring)",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
"search-store -store home '*' # Search 'home' Hydrus instance",
"search-store -store test 'video' # Search 'test' folder store",
"search-store 'url:*' # Files that have any URL",
"search-store 'url:youtube.com' # Files whose URL contains substring",
"search-store -query foo # Search all storage backends",
"search-store -store home -query '*' # Search 'home' Hydrus instance",
"search-store -store test -query 'video' # Search 'test' folder store",
"search-store -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-store -query 'url:*' # Files that have any URL",
"search-store -query 'url:youtube.com' # Files whose URL contains substring",
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _parse_hash_query(query: str) -> List[str]:
"""Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
Supported examples:
- hash:<h1>,<h2>,<h3>
- Hash: <h1> <h2> <h3>
- hash:{<h1>, <h2>}
"""
q = str(query or "").strip()
if not q:
return []
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
if not m:
return []
rest = (m.group(1) or "").strip()
if rest.startswith("{") and rest.endswith("}"):
rest = rest[1:-1].strip()
if rest.startswith("[") and rest.endswith("]"):
rest = rest[1:-1].strip()
# Split on commas and whitespace.
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
out: List[str] = []
for part in raw_parts:
h = normalize_hash(part)
if not h:
continue
if h not in out:
out.append(h)
return out
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
@@ -142,6 +110,7 @@ class Search_Store(Cmdlet):
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
store_flags = {f.lower() for f in (flag_registry.get("store") or {"-store", "--store"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
@@ -155,6 +124,11 @@ class Search_Store(Cmdlet):
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in query_flags and i + 1 < len(args_list):
chunk = args_list[i + 1]
query = f"{query} {chunk}".strip() if query else chunk
i += 2
continue
if low in store_flags and i + 1 < len(args_list):
storage_backend = args_list[i + 1]
i += 2
@@ -182,7 +156,7 @@ class Search_Store(Cmdlet):
if store_filter and not storage_backend:
storage_backend = store_filter
hash_query = self._parse_hash_query(query)
hash_query = parse_hash_query(query)
if not query:
log("Provide a search query", file=sys.stderr)

View File

@@ -7,7 +7,7 @@ root_dir = Path(__file__).parent
if str(root_dir) not in sys.path:
sys.path.insert(0, str(root_dir))
from CLI import main
from CLI import MedeiaCLI
if __name__ == "__main__":
main()
MedeiaCLI().run()

View File

@@ -9,22 +9,38 @@ Features:
- Interactive selection with user input
- Input options for cmdlet arguments (location, source selection, etc)
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union, Callable, Tuple
from typing import Any, Dict, List, Optional, Callable
from pathlib import Path
import json
import shutil
# Optional Textual imports - graceful fallback if not available
try:
from textual.widgets import Tree, DataTable
from textual.containers import Horizontal, Vertical
from textual.widgets import Static, Button
from textual.widgets import Tree
TEXTUAL_AVAILABLE = True
except ImportError:
TEXTUAL_AVAILABLE = False
def _sanitize_cell_text(value: Any) -> str:
"""Coerce to a single-line, tab-free string suitable for ASCII tables."""
if value is None:
return ""
text = str(value)
if not text:
return ""
return (
text
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
@dataclass
class InputOption:
"""Represents an interactive input option (cmdlet argument) in a table.
@@ -120,18 +136,7 @@ class ResultRow:
def add_column(self, name: str, value: Any) -> None:
"""Add a column to this row."""
str_value = str(value) if value is not None else ""
# Tables are single-line per row: normalize hard line breaks inside cells
# so values (e.g., long descriptions) don't break the ASCII box shape.
if str_value:
str_value = (
str_value
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
str_value = _sanitize_cell_text(value)
# Normalize extension columns globally and cap to 5 characters
if str(name).strip().lower() == "ext":
@@ -180,7 +185,7 @@ class ResultTable:
>>> print(result_table)
"""
def __init__(self, title: str = "", title_width: int = 80, max_columns: int = None, preserve_order: bool = False):
def __init__(self, title: str = "", title_width: int = 80, max_columns: Optional[int] = None, preserve_order: bool = False):
"""Initialize a result table.
Args:
@@ -290,6 +295,8 @@ class ResultTable:
new_table.source_args = list(self.source_args) if self.source_args else []
new_table.input_options = dict(self.input_options) if self.input_options else {}
new_table.no_choice = self.no_choice
new_table.table = self.table
new_table.header_lines = list(self.header_lines) if self.header_lines else []
return new_table
def set_row_selection_args(self, row_index: int, selection_args: List[str]) -> None:
@@ -339,8 +346,8 @@ class ResultTable:
Looks for columns named 'Title', 'Name', or 'Tag' (in that order).
Case-insensitive sort. Returns self for chaining.
IMPORTANT: Updates source_index to match new sorted positions so that
@N selections continue to work correctly after sorting.
NOTE: This only affects display order. Each row keeps its original
`source_index` (insertion order) for callers that need stable mapping.
"""
if getattr(self, "preserve_order", False):
return self
@@ -508,7 +515,7 @@ class ResultTable:
Shows the Tag column with the tag name and Source column to identify
which storage backend the tag values come from (Hydrus, local, etc.).
All data preserved in TagItem for piping and operations.
Use @1 to select a tag, @{1,3,5} to select multiple.
Tag row selection is handled by the CLI pipeline (e.g. `@N | ...`).
"""
# Tag name (truncate if too long)
if hasattr(item, 'tag_name') and item.tag_name:
@@ -566,21 +573,18 @@ class ResultTable:
instead of treating it as a regular field. This allows dynamic column definitions
from search providers.
Priority field groups (uses first match within each group):
Priority field groups (first match per group):
- title | name | filename
- ext
- size | size_bytes
- store | table | source
- type | media_kind | kind
- target | path | url
- hash | hash_hex | file_hash
- tag | tag_summary
- detail | description
"""
# Helper to determine if a field should be hidden from display
def is_hidden_field(field_name: Any) -> bool:
# Hide internal/metadata fields
hidden_fields = {
'__', 'id', 'action', 'parent_id', 'is_temp', 'path', 'extra',
'target', 'hash', 'hash_hex', 'file_hash', 'tag', 'tag_summary', 'name'
'target', 'hash', 'hash_hex', 'file_hash', 'tag', 'tag_summary'
}
if isinstance(field_name, str):
if field_name.startswith('__'):
@@ -665,7 +669,7 @@ class ResultTable:
if column_count == 0:
# Explicitly set which columns to display in order
priority_groups = [
('title', ['title']),
('title', ['title', 'name', 'filename']),
('ext', ['ext']),
('size', ['size', 'size_bytes']),
('store', ['store', 'table', 'source']),
@@ -691,6 +695,8 @@ class ResultTable:
col_name = "Store"
elif field in ['size', 'size_bytes']:
col_name = "Size (Mb)"
elif field in ['title', 'name', 'filename']:
col_name = "Title"
else:
col_name = field.replace('_', ' ').title()
@@ -794,25 +800,13 @@ class ResultTable:
# Title block
if self.title:
lines.append("|" + "=" * (table_width - 2) + "|")
safe_title = (
str(self.title)
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
safe_title = _sanitize_cell_text(self.title)
lines.append(wrap(safe_title.ljust(table_width - 2)))
lines.append("|" + "=" * (table_width - 2) + "|")
# Optional header metadata lines
for meta in self.header_lines:
safe_meta = (
str(meta)
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
safe_meta = _sanitize_cell_text(meta)
lines.append(wrap(safe_meta))
# Add header with # column
@@ -832,14 +826,7 @@ class ResultTable:
for col_name in column_names:
width = capped_width(col_name)
col_value = row.get_column(col_name) or ""
if col_value:
col_value = (
col_value
.replace("\r\n", " ")
.replace("\n", " ")
.replace("\r", " ")
.replace("\t", " ")
)
col_value = _sanitize_cell_text(col_value)
if len(col_value) > width:
col_value = col_value[: width - 3] + "..."
row_parts.append(col_value.ljust(width))
@@ -1190,7 +1177,7 @@ class ResultTable:
Dictionary mapping option names to selected values
"""
result = {}
for name, option in self.input_options.items():
for name, _option in self.input_options.items():
value = self.select_option(name)
if value is not None:
result[name] = value
@@ -1310,7 +1297,7 @@ class ResultTable:
if not TEXTUAL_AVAILABLE:
raise ImportError("Textual not available for tree building")
tree_widget.reset()
tree_widget.reset(self.title or "Results")
root = tree_widget.root
# Add each row as a top-level node
@@ -1325,43 +1312,6 @@ class ResultTable:
row_node.add_leaf(f"[cyan]{col.name}[/cyan]: {value_str}")
def _format_duration(duration: Any) -> str:
"""Format duration value as human-readable string.
Args:
duration: Duration in seconds, milliseconds, or already formatted string
Returns:
Formatted duration string (e.g., "2h 18m 5s", "5m 30s")
"""
if isinstance(duration, str):
return duration if duration else ""
try:
# Convert to seconds if needed
if isinstance(duration, (int, float)):
seconds = int(duration)
if seconds < 1000: # Likely already in seconds
pass
else: # Likely in milliseconds
seconds = seconds // 1000
else:
return ""
hours = seconds // 3600
minutes = (seconds % 3600) // 60
secs = seconds % 60
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
else:
return f"{secs}s"
except (ValueError, TypeError):
return ""
def _format_size(size: Any, integer_only: bool = False) -> str:
"""Format file size as human-readable string.