This commit is contained in:
nose
2025-12-20 02:12:45 -08:00
parent b0b198df95
commit b75faa49a2
27 changed files with 2883 additions and 3329 deletions

View File

@@ -109,19 +109,15 @@ class SharedArgs:
summary="Does something",
usage="my-cmdlet",
args=[
SharedArgs.HASH, # Use predefined shared arg
SharedArgs.QUERY, # Use predefined shared arg (e.g., -query "hash:<sha256>")
SharedArgs.LOCATION, # Use another shared arg
CmdletArg(...), # Mix with custom args
]
)
"""
# File/Hash arguments
HASH = CmdletArg(
name="hash",
type="string",
description="File hash (SHA256, 64-char hex string)",
)
# NOTE: This project no longer exposes a dedicated -hash flag.
# Use SharedArgs.QUERY with `hash:` syntax instead (e.g., -query "hash:<sha256>").
STORE = CmdletArg(
name="store",
@@ -248,7 +244,7 @@ class SharedArgs:
QUERY = CmdletArg(
"query",
type="string",
description="Search query string."
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>})."
)
REASON = CmdletArg(
@@ -321,7 +317,7 @@ class SharedArgs:
CmdletArg if found, None otherwise
Example:
arg = SharedArgs.get('HASH') # Returns SharedArgs.HASH
arg = SharedArgs.get('QUERY') # Returns SharedArgs.QUERY
"""
try:
return getattr(cls, name.upper())
@@ -527,6 +523,16 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
while i < len(args):
token = str(args[i])
token_lower = token.lower()
# Legacy guidance: -hash/--hash was removed in favor of -query "hash:...".
# We don't error hard here because some cmdlets also accept free-form args.
if token_lower in {"-hash", "--hash"}:
try:
log("Legacy flag -hash is no longer supported. Use: -query \"hash:<sha256>\"", file=sys.stderr)
except Exception:
pass
i += 1
continue
# Check if this token is a known flagged argument
if token_lower in arg_spec_map:
@@ -608,6 +614,53 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
return text
def parse_hash_query(query: Optional[str]) -> List[str]:
"""Parse a unified query string for `hash:` into normalized SHA256 hashes.
Supported examples:
- hash:<h1>
- hash:<h1>,<h2>,<h3>
- Hash: <h1> <h2> <h3>
- hash:{<h1>, <h2>}
Returns:
List of unique normalized 64-hex SHA256 hashes.
"""
import re
q = str(query or "").strip()
if not q:
return []
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
if not m:
return []
rest = (m.group(1) or "").strip()
if rest.startswith("{") and rest.endswith("}"):
rest = rest[1:-1].strip()
if rest.startswith("[") and rest.endswith("]"):
rest = rest[1:-1].strip()
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
out: List[str] = []
for part in raw_parts:
h = normalize_hash(part)
if not h:
continue
if h not in out:
out.append(h)
return out
def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
"""Parse `hash:` query and require exactly one hash."""
hashes = parse_hash_query(query)
if len(hashes) != 1:
return None
return hashes[0]
def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]:
"""Get normalized hash from override or result object, consolidating common pattern.

View File

@@ -48,7 +48,6 @@ class Add_File(Cmdlet):
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
CmdletArg(
name="room",
@@ -1746,6 +1745,62 @@ class Add_File(Cmdlet):
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# If we're moving/copying from one store to another, also copy the source store's
# existing associated URLs so they aren't lost.
try:
from metadata import normalize_urls
source_store = None
source_hash = None
if isinstance(result, dict):
source_store = result.get("store")
source_hash = result.get("hash")
if not source_store:
source_store = getattr(pipe_obj, "store", None)
if not source_hash:
source_hash = getattr(pipe_obj, "hash", None)
if (not source_hash) and isinstance(pipe_obj.extra, dict):
source_hash = pipe_obj.extra.get("hash")
source_store = str(source_store or "").strip()
source_hash = str(source_hash or "").strip().lower()
if (
source_store
and source_hash
and len(source_hash) == 64
and source_store.lower() != str(backend_name or "").strip().lower()
):
source_backend = None
try:
if source_store in store.list_backends():
source_backend = store[source_store]
except Exception:
source_backend = None
if source_backend is not None:
try:
src_urls = normalize_urls(source_backend.get_url(source_hash) or [])
except Exception:
src_urls = []
try:
dst_urls = normalize_urls(url or [])
except Exception:
dst_urls = []
merged: list[str] = []
seen: set[str] = set()
for u in list(dst_urls or []) + list(src_urls or []):
if not u:
continue
if u in seen:
continue
seen.add(u)
merged.append(u)
url = merged
except Exception:
pass
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)

View File

@@ -25,11 +25,11 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
usage="add-note -store <store> [-query \"hash:<sha256>\"] <name> <text...>",
alias=[""],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
],
@@ -72,7 +72,10 @@ class Add_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[add_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
note_name = str(parsed.get("name") or "").strip()
text_parts = parsed.get("text")
@@ -91,10 +94,10 @@ class Add_Note(Cmdlet):
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[add_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -161,7 +164,7 @@ class Add_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)

View File

@@ -31,7 +31,7 @@ CMDLET = Cmdlet(
arg=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-alt", type="string", description="Explicitly select alt item(s) by @ selection or hash list (e.g., -alt @3-5 or -alt <hash>,<hash>)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
@@ -372,7 +372,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
refresh_args.extend(["-query", f"hash:{target_hash}"])
get_relationship(subject, refresh_args, config)
except Exception:
pass
@@ -396,7 +396,10 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
override_store = parsed.get("store")
override_hash = parsed.get("hash")
override_hashes = sh.parse_hash_query(parsed.get("query"))
if parsed.get("query") and not override_hashes:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
king_arg = parsed.get("king")
alt_arg = parsed.get("alt")
rel_type = parsed.get("type", "alt")
@@ -436,20 +439,12 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
resolved_alt_items = [{"hash": h, "store": str(override_store)} for h in hashes]
items_to_process = normalize_result_input(resolved_alt_items)
# Allow explicit -hash operation (store/hash-first)
if (not items_to_process) and override_hash:
# Support comma-separated hashes
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",")]
hashes = [h for h in (_normalise_hash_hex(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
# Use the selected/override store; required in this mode
# Allow explicit store/hash-first operation via -query "hash:<sha256>" (supports multiple hash: tokens)
if (not items_to_process) and override_hashes:
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
items_to_process = [{"hash": h, "store": str(override_store)} for h in hashes]
items_to_process = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)

View File

@@ -205,7 +205,7 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
if not target_hash or not store_name:
return
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
refresh_args: List[str] = ["-query", f"hash:{target_hash}", "-store", store_name]
get_tag = None
try:
@@ -237,10 +237,10 @@ class Add_Tag(Cmdlet):
super().__init__(
name="add-tag",
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
usage="add-tag -store <store> [-query \"hash:<sha256>\"] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
@@ -249,7 +249,7 @@ class Add_Tag(Cmdlet):
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
"- Requires a store backend: use -store or pipe items that include store.",
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
"- If -query is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tag can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
@@ -258,7 +258,6 @@ class Add_Tag(Cmdlet):
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
@@ -273,6 +272,11 @@ class Add_Tag(Cmdlet):
# Parse arguments
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[add_tag] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
# If add-tag is in the middle of a pipeline (has downstream stages), default to
# including temp files. This enables common flows like:
# @N | download-media | add-tag ... | add-file ...
@@ -337,24 +341,12 @@ class Add_Tag(Cmdlet):
tag_to_add = parse_tag_arguments(raw_tag)
tag_to_add = expand_tag_groups(tag_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tag: List[str] = []
for tag in tag_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tag.append(tag)
tag_to_add = filtered_tag
if not tag_to_add:
log("No tag provided to add", file=sys.stderr)
return 1
# Get other flags (hash override can come from -hash or hash: token)
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
# Get other flags
hash_override = normalize_hash(query_hash) if query_hash else None
duplicate_arg = parsed.get("duplicate")
# tag ARE provided - apply them to each store-backed result

View File

@@ -18,7 +18,7 @@ class Add_Url(sh.Cmdlet):
summary="Associate a URL with a file",
usage="@1 | add-url <url>",
arg=[
sh.SharedArgs.HASH,
sh.SharedArgs.QUERY,
sh.SharedArgs.STORE,
sh.CmdletArg("url", required=True, description="URL to associate"),
],
@@ -33,14 +33,19 @@ class Add_Url(sh.Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -20,10 +20,10 @@ class Delete_File(sh.Cmdlet):
super().__init__(
name="delete-file",
summary="Delete a file locally and/or from Hydrus, including database entries.",
usage="delete-file [-hash <sha256>] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
usage="delete-file [-query \"hash:<sha256>\"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]",
alias=["del-file"],
arg=[
sh.CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
sh.SharedArgs.QUERY,
sh.CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."),
sh.CmdletArg("lib-root", description="Path to local library root for database cleanup."),
sh.CmdletArg("reason", description="Optional reason for deletion (free text)."),
@@ -196,6 +196,7 @@ class Delete_File(sh.Cmdlet):
return 0
# Parse arguments
override_query: str | None = None
override_hash: str | None = None
conserve: str | None = None
lib_root: str | None = None
@@ -205,8 +206,8 @@ class Delete_File(sh.Cmdlet):
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
if low in {"-query", "--query", "query"} and i + 1 < len(args):
override_query = str(args[i + 1]).strip()
i += 2
continue
if low in {"-conserve", "--conserve"} and i + 1 < len(args):
@@ -222,6 +223,11 @@ class Delete_File(sh.Cmdlet):
reason_tokens.append(token)
i += 1
override_hash = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
# If no lib_root provided, try to get the first folder store from config
if not lib_root:
try:

View File

@@ -26,11 +26,11 @@ class Delete_Note(Cmdlet):
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-hash <sha256>] <name>",
usage="delete-note -store <store> [-query \"hash:<sha256>\"] <name>",
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
],
detail=[
@@ -68,7 +68,10 @@ class Delete_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[delete_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
note_name_override = str(parsed.get("name") or "").strip()
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
@@ -78,10 +81,10 @@ class Delete_Note(Cmdlet):
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[delete_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -109,7 +112,7 @@ class Delete_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
ctx.emit(res)

View File

@@ -117,7 +117,7 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
refresh_args.extend(["-query", f"hash:{target_hash}"])
cmd = get_cmdlet("get-relationship")
if not cmd:
@@ -148,24 +148,21 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
delete_all_flag = parsed_args.get("all", False)
rel_type_filter = parsed_args.get("type")
override_store = parsed_args.get("store")
override_hash = parsed_args.get("hash")
override_hashes = sh.parse_hash_query(parsed_args.get("query"))
if parsed_args.get("query") and not override_hashes:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
raw_path = parsed_args.get("path")
# Normalize input
results = normalize_result_input(result)
# Allow store/hash-first usage when no pipeline items were provided
if (not results) and override_hash:
raw = str(override_hash)
parts = [p.strip() for p in raw.replace(";", ",").split(",") if p.strip()]
hashes = [h for h in (normalize_hash(p) for p in parts) if h]
if not hashes:
log("Invalid -hash value (expected 64-hex sha256)", file=sys.stderr)
return 1
if (not results) and override_hashes:
if not override_store:
log("-store is required when using -hash without piped items", file=sys.stderr)
log("-store is required when using -query without piped items", file=sys.stderr)
return 1
results = [{"hash": h, "store": str(override_store)} for h in hashes]
results = [{"hash": h, "store": str(override_store)} for h in override_hashes]
if not results:
# Legacy -path mode below may still apply
@@ -228,7 +225,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception:
file_hash = None
if not file_hash:
log("Could not extract file hash for deletion (use -hash or ensure pipeline includes hash)", file=sys.stderr)
log("Could not extract file hash for deletion (use -query \"hash:<sha256>\" or ensure pipeline includes hash)", file=sys.stderr)
return 1
meta = db.get_metadata(file_hash) or {}
@@ -380,7 +377,7 @@ CMDLET = Cmdlet(
arg=[
CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."),
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
],

View File

@@ -65,7 +65,7 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
refresh_args: list[str] = []
if file_hash:
refresh_args.extend(["-hash", file_hash])
refresh_args.extend(["-query", f"hash:{file_hash}"])
if store_name:
refresh_args.extend(["-store", store_name])
get_tag(subject, refresh_args, config)
@@ -76,14 +76,14 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
CMDLET = Cmdlet(
name="delete-tag",
summary="Remove tags from a file in a store.",
usage="delete-tag -store <store> [-hash <sha256>] <tag>[,<tag>...]",
usage="delete-tag -store <store> [-query \"hash:<sha256>\"] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("<tag>[,<tag>...]", required=True, description="One or more tags to remove. Comma- or space-separated."),
],
detail=[
"- Requires a Hydrus file (hash present) or explicit -hash override.",
"- Requires a Hydrus file (hash present) or explicit -query override.",
"- Multiple tags can be comma-separated or space-separated.",
],
)
@@ -111,11 +111,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
has_piped_tag = _looks_like_tag_row(result)
has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])
if not args and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Parse -hash override and collect tags from remaining args
# Parse -query/-store overrides and collect remaining args.
override_query: str | None = None
override_hash: str | None = None
override_store: str | None = None
rest: list[str] = []
@@ -123,8 +120,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
if low in {"-query", "--query", "query"} and i + 1 < len(args):
override_query = str(args[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args):
@@ -133,64 +130,37 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
continue
rest.append(a)
i += 1
# Check if first argument is @ syntax (result table selection)
# @5 or @{2,5,8} to delete tags from ResultTable by index
tags_from_at_syntax = []
hash_from_at_syntax = None
path_from_at_syntax = None
store_from_at_syntax = None
if rest and str(rest[0]).startswith("@"):
selector_arg = str(rest[0])
pipe_selector = selector_arg[1:].strip()
# Parse @N or @{N,M,K} syntax
if pipe_selector.startswith("{") and pipe_selector.endswith("}"):
# @{2,5,8}
pipe_selector = pipe_selector[1:-1]
try:
indices = [int(tok.strip()) for tok in pipe_selector.split(',') if tok.strip()]
except ValueError:
log("Invalid selection syntax. Use @2 or @{2,5,8}")
return 1
# Get the last ResultTable from pipeline context
try:
last_table = ctx._LAST_RESULT_TABLE
if last_table:
# Extract tags from selected rows
for idx in indices:
if 1 <= idx <= len(last_table.rows):
# Look for a TagItem in _LAST_RESULT_ITEMS by index
if idx - 1 < len(ctx._LAST_RESULT_ITEMS):
item = ctx._LAST_RESULT_ITEMS[idx - 1]
if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
tag_name = get_field(item, 'tag_name')
if tag_name:
log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}")
tags_from_at_syntax.append(tag_name)
# Also get hash from first item for consistency
if not hash_from_at_syntax:
hash_from_at_syntax = get_field(item, 'hash')
if not path_from_at_syntax:
path_from_at_syntax = get_field(item, 'path')
if not store_from_at_syntax:
store_from_at_syntax = get_field(item, 'store')
if not tags_from_at_syntax:
log(f"No tags found at indices: {indices}")
return 1
else:
log("No ResultTable in pipeline (use @ after running get-tag)")
return 1
except Exception as exc:
log(f"Error processing @ selection: {exc}", file=__import__('sys').stderr)
return 1
# Handle @N selection which creates a list - extract the first item
# If we have a list of TagItems, we want to process ALL of them if no args provided
# This handles: delete-tag @1 (where @1 expands to a list containing one TagItem)
# Also handles: delete-tag @1,2 (where we want to delete tags from multiple files)
override_hash = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
# Selection syntax (@...) is handled by the pipeline runner, not by this cmdlet.
# If @ reaches here as a literal argument, it's almost certainly user error.
if rest and str(rest[0]).startswith("@") and not (has_piped_tag or has_piped_tag_list):
log("Selection syntax is only supported via piping. Use: @N | delete-tag")
return 1
# Special case: grouped tag selection created by the pipeline runner.
# This represents "delete these selected tags" (not "delete tags from this file").
grouped_table = ""
try:
grouped_table = str(get_field(result, "table") or "").strip().lower()
except Exception:
grouped_table = ""
grouped_tags = get_field(result, "tag") if result is not None else None
tags_arg = parse_tag_arguments(rest)
if grouped_table == "tag.selection" and isinstance(grouped_tags, list) and grouped_tags and not tags_arg:
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash"))
store_name = override_store or get_field(result, "store")
path = get_field(result, "path") or get_field(result, "target")
tags = [str(t) for t in grouped_tags if t]
return 0 if _process_deletion(tags, file_hash, path, store_name, config) else 1
if not tags_arg and not has_piped_tag and not has_piped_tag_list:
log("Requires at least one tag argument")
return 1
# Normalize result to a list for processing
items_to_process = []
@@ -198,6 +168,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
items_to_process = result
elif result:
items_to_process = [result]
# Process each item
success_count = 0
# If we have TagItems and no args, we are deleting the tags themselves
# If we have Files (or other objects) and args, we are deleting tags FROM those files
@@ -206,81 +179,66 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
is_tag_item_mode = bool(items_to_process) and _looks_like_tag_row(items_to_process[0])
if is_tag_item_mode:
# Collect all tags to delete from the TagItems
# Group by hash/file_path to batch operations if needed, or just process one by one
# For simplicity, we'll process one by one or group by file
pass
# Collect all tags to delete from the TagItems and batch per file.
# This keeps delete-tag efficient (one backend call per file).
groups: Dict[tuple[str, str, str], list[str]] = {}
for item in items_to_process:
tag_name = get_field(item, "tag_name")
if not tag_name:
continue
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_store = override_store or get_field(item, "store")
item_path = get_field(item, "path") or get_field(item, "target")
key = (str(item_hash or ""), str(item_store or ""), str(item_path or ""))
groups.setdefault(key, []).append(str(tag_name))
for (h, s, p), tag_list in groups.items():
if not tag_list:
continue
if _process_deletion(tag_list, h or None, p or None, s or None, config):
success_count += 1
return 0 if success_count > 0 else 1
else:
# "Delete tags from files" mode
# We need args (tags to delete)
if not args and not tags_from_at_syntax:
if not tags_arg:
log("Requires at least one tag argument when deleting from files")
return 1
# Process each item
success_count = 0
# If we have tags from @ syntax (e.g. delete-tag @{1,2}), we ignore the piped result for tag selection
# but we might need the piped result for the file context if @ selection was from a Tag table
# Actually, the @ selection logic above already extracted tags.
if tags_from_at_syntax:
# Special case: @ selection of tags.
# We already extracted tags and hash/path.
# Just run the deletion once using the extracted info.
# This preserves the existing logic for @ selection.
tags = tags_from_at_syntax
file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax)
path = path_from_at_syntax
store_name = override_store or store_from_at_syntax
if _process_deletion(tags, file_hash, path, store_name, config):
success_count += 1
else:
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
tags_arg = parse_tag_arguments(rest)
for item in items_to_process:
tags_to_delete = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
)
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
# It's a tag row (TagItem or PipeObject/dict with tag_name)
if tags_arg:
# User provided tags to delete FROM this file (ignoring the tag name in the item?)
# Or maybe they want to delete the tag in the item AND the args?
# Usually if piping TagItems, we delete THOSE tags.
# If args are present, maybe we should warn?
# For now, if args are present, assume they override or add to the tag item?
# Let's assume if args are present, we use args. If not, we use the tag name.
tags_to_delete = tags_arg
else:
tag_name = get_field(item, 'tag_name')
if tag_name:
tags_to_delete = [tag_name]
# Process items from pipe (or single result)
# If args are provided, they are the tags to delete from EACH item
# If items are TagItems and no args, the tag to delete is the item itself
for item in items_to_process:
tags_to_delete: list[str] = []
item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash"))
item_path = (
get_field(item, "path")
or get_field(item, "target")
)
item_store = override_store or get_field(item, "store")
if _looks_like_tag_row(item):
if tags_arg:
tags_to_delete = tags_arg
else:
# It's a File or other object
if tags_arg:
tags_to_delete = tags_arg
else:
# No tags provided for a file object - skip or error?
# We already logged an error if no args and not TagItem mode globally,
# but inside the loop we might have mixed items? Unlikely.
continue
if tags_to_delete:
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
success_count += 1
tag_name = get_field(item, 'tag_name')
if tag_name:
tags_to_delete = [str(tag_name)]
else:
if tags_arg:
tags_to_delete = tags_arg
else:
continue
if tags_to_delete:
if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config):
success_count += 1
if success_count > 0:
return 0

View File

@@ -27,7 +27,7 @@ class Delete_Url(Cmdlet):
summary="Remove a URL association from a file",
usage="@1 | delete-url <url>",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to remove"),
],
@@ -42,14 +42,19 @@ class Delete_Url(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -1124,7 +1124,8 @@ def download_media(
# If we downloaded sections, look for files with the session_id pattern
if opts.clip_sections and session_id:
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
# Also includes sidecars like "{session_id}_1.en.vtt".
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)')
matching_files = [f for f in files if section_pattern.search(f.name)]
if matching_files:
@@ -1136,38 +1137,116 @@ def download_media(
matching_files.sort(key=extract_section_num)
debug(f"Found {len(matching_files)} section file(s) matching pattern")
# Now rename section files to use hash-based names
# This ensures unique filenames for each section content
renamed_files = []
for idx, section_file in enumerate(matching_files, 1):
# Now rename section *media* files to use hash-based names.
# Sidecars (subtitles) are renamed to match the media hash so they can be
# attached as notes later (and not emitted as separate pipeline items).
by_index: Dict[int, List[Path]] = {}
for f in matching_files:
m = section_pattern.search(f.name)
if not m:
continue
try:
# Calculate hash for the file
file_hash = sha256_file(section_file)
ext = section_file.suffix
new_name = f"{file_hash}{ext}"
new_path = opts.output_dir / new_name
if new_path.exists() and new_path != section_file:
# If file with same hash exists, use it and delete the temp one
debug(f"File with hash {file_hash} already exists, using existing file.")
n = int(m.group(1))
except Exception:
continue
by_index.setdefault(n, []).append(f)
renamed_media_files: List[Path] = []
for sec_num in sorted(by_index.keys()):
group = by_index.get(sec_num) or []
if not group:
continue
def _is_subtitle(p: Path) -> bool:
try:
return p.suffix.lower() in _SUBTITLE_EXTS
except Exception:
return False
media_candidates = [p for p in group if not _is_subtitle(p)]
subtitle_candidates = [p for p in group if _is_subtitle(p)]
# Pick the primary media file for this section.
# Prefer non-json, non-info sidecars.
media_file: Optional[Path] = None
for cand in media_candidates:
try:
if cand.suffix.lower() in {".json", ".info.json"}:
continue
except Exception:
pass
media_file = cand
break
if media_file is None and media_candidates:
media_file = media_candidates[0]
if media_file is None:
# No media file found for this section; skip.
continue
try:
media_hash = sha256_file(media_file)
except Exception as e:
debug(f"Failed to hash section media file {media_file.name}: {e}")
renamed_media_files.append(media_file)
continue
# Preserve any suffix tail after the section index so language tags survive.
# Example: <session>_1.en.vtt -> <hash>.en.vtt
prefix = f"{session_id}_{sec_num}"
def _tail(name: str) -> str:
try:
if name.startswith(prefix):
return name[len(prefix):]
except Exception:
pass
# Fallback: keep just the last suffix.
try:
return Path(name).suffix
except Exception:
return ""
# Rename media file to <hash><tail> (tail typically like .mkv).
try:
new_media_name = f"{media_hash}{_tail(media_file.name)}"
new_media_path = opts.output_dir / new_media_name
if new_media_path.exists() and new_media_path != media_file:
debug(f"File with hash {media_hash} already exists, using existing file.")
try:
section_file.unlink()
media_file.unlink()
except OSError:
pass
renamed_files.append(new_path)
else:
section_file.rename(new_path)
debug(f"Renamed section file: {section_file.name} {new_name}")
renamed_files.append(new_path)
media_file.rename(new_media_path)
debug(f"Renamed section file: {media_file.name} -> {new_media_name}")
renamed_media_files.append(new_media_path)
except Exception as e:
debug(f"Failed to process section file {section_file.name}: {e}")
renamed_files.append(section_file)
media_path = renamed_files[0]
media_paths = renamed_files
debug(f"Failed to rename section media file {media_file.name}: {e}")
renamed_media_files.append(media_file)
new_media_path = media_file
# Rename subtitle sidecars to match media hash for later note attachment.
for sub_file in subtitle_candidates:
try:
new_sub_name = f"{media_hash}{_tail(sub_file.name)}"
new_sub_path = opts.output_dir / new_sub_name
if new_sub_path.exists() and new_sub_path != sub_file:
try:
sub_file.unlink()
except OSError:
pass
else:
sub_file.rename(new_sub_path)
debug(f"Renamed section file: {sub_file.name} -> {new_sub_name}")
except Exception as e:
debug(f"Failed to rename section subtitle file {sub_file.name}: {e}")
media_path = renamed_media_files[0] if renamed_media_files else matching_files[0]
media_paths = renamed_media_files if renamed_media_files else None
if not opts.quiet:
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
count = len(media_paths) if isinstance(media_paths, list) else 1
debug(f"✓ Downloaded {count} section media file(s) (session: {session_id})")
else:
# Fallback to most recent file if pattern not found
media_path = files[0]
@@ -1398,9 +1477,14 @@ class Download_Media(Cmdlet):
alias=[""],
arg=[
SharedArgs.URL,
SharedArgs.QUERY,
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
CmdletArg(
name="clip",
type="string",
description="Extract time range(s) or keyed spec (e.g., clip:3m4s-3m14s,item:2-3)",
),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
SharedArgs.PATH
],
@@ -1483,6 +1567,34 @@ class Download_Media(Cmdlet):
# Get other options
clip_spec = parsed.get("clip")
query_spec = parsed.get("query")
# download-media supports a small keyed spec language inside -query.
# Examples:
# -query "hash:<sha256>"
# -query "clip:1m-1m15s,2m1s-2m11s"
# -query "hash:<sha256>,clip:1m-1m15s,item:2-3"
query_keyed: Dict[str, List[str]] = {}
if query_spec:
try:
query_keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
except Exception:
query_keyed = {}
# Optional: allow an explicit hash via -query "hash:<sha256>".
# This is used as the preferred king hash for multi-clip relationships.
query_hash_override: Optional[str] = None
try:
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
hash_candidate = (hash_values[-1] if hash_values else None)
if hash_candidate:
# Re-wrap for the shared parser which expects the `hash:` prefix.
query_hash_override = sh.parse_single_hash_query(f"hash:{hash_candidate}")
else:
# Backwards-compatible: treat a non-keyed query as a hash query.
query_hash_override = sh.parse_single_hash_query(str(query_spec)) if query_spec else None
except Exception:
query_hash_override = None
# Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them.
embed_chapters = True
@@ -1492,12 +1604,38 @@ class Download_Media(Cmdlet):
# Parse clip range(s) if specified
clip_ranges: Optional[List[tuple[int, int]]] = None
clip_values: List[str] = []
item_values: List[str] = []
if clip_spec:
clip_ranges = self._parse_time_ranges(str(clip_spec))
# Support keyed clip syntax:
# -clip "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
clip_values.extend(keyed.get("clip", []) or [])
item_values.extend(keyed.get("item", []) or [])
# Allow the same keyed spec language inside -query so users can do:
# download-media <url> -query "clip:1m-1m15s,2m1s-2m11s"
if query_keyed:
clip_values.extend(query_keyed.get("clip", []) or [])
item_values.extend(query_keyed.get("item", []) or [])
if item_values and not parsed.get("item"):
parsed["item"] = ",".join([v for v in item_values if v])
if clip_values:
clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v]))
if not clip_ranges:
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
bad_spec = clip_spec or query_spec
log(f"Invalid clip format: {bad_spec}", file=sys.stderr)
return 1
if clip_ranges:
try:
debug(f"Clip ranges: {clip_ranges}")
except Exception:
pass
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
storage = None
@@ -1886,56 +2024,37 @@ class Download_Media(Cmdlet):
formats = list_formats(url, no_playlist=False)
if formats and len(formats) > 1:
# Filter formats: multiple videos (640x+, one per resolution tier) + 1 best audio
video_formats = []
audio_formats = []
for fmt in formats:
width = fmt.get("width") or 0
height = fmt.get("height") or 0
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
# Classify as video or audio
if vcodec != "none" and acodec == "none" and width >= 640:
video_formats.append(fmt)
elif acodec != "none" and vcodec == "none":
audio_formats.append(fmt)
# Group videos by resolution and select best format per resolution
filtered_formats = []
if video_formats:
# Group by height (resolution tier)
from collections import defaultdict
by_resolution = defaultdict(list)
for f in video_formats:
height = f.get("height") or 0
by_resolution[height].append(f)
# For each resolution, prefer AV1, then highest bitrate
for height in sorted(by_resolution.keys(), reverse=True):
candidates = by_resolution[height]
av1_formats = [f for f in candidates if "av01" in f.get("vcodec", "")]
if av1_formats:
best = max(av1_formats, key=lambda f: f.get("tbr") or 0)
else:
best = max(candidates, key=lambda f: f.get("tbr") or 0)
filtered_formats.append(best)
# Select best audio: highest bitrate (any format)
if audio_formats:
best_audio = max(audio_formats, key=lambda f: f.get("tbr") or f.get("abr") or 0)
filtered_formats.append(best_audio)
if not filtered_formats:
# Fallback to all formats if filtering resulted in nothing
filtered_formats = formats
debug(f"Filtered to {len(filtered_formats)} formats from {len(formats)} total")
# Show format selection table
log(f"Available formats for {url}:", file=sys.stderr)
log("", file=sys.stderr)
# Formatlist filtering
#
# Goal:
# - Keep the list useful (hide non-media entries like storyboards)
# - But NEVER filter down so far that the user can't browse/pick formats.
#
# The old filtering was too aggressive (e.g. width>=640, one per resolution),
# which often hid most YouTube formats.
def _is_browseable_format(fmt: Any) -> bool:
if not isinstance(fmt, dict):
return False
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return False
ext = str(fmt.get("ext") or "").strip().lower()
if ext in {"mhtml", "json"}:
return False
note = str(fmt.get("format_note") or "").lower()
if "storyboard" in note:
return False
if format_id.lower().startswith("sb"):
return False
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
# Keep anything with at least one stream.
return not (vcodec == "none" and acodec == "none")
candidate_formats = [f for f in formats if _is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(formats)
debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})")
# Build the base command that will be replayed with @N selection
# Include any additional args from the original command
@@ -1946,8 +2065,10 @@ class Download_Media(Cmdlet):
base_cmd += ' ' + ' '.join(remaining_args)
# Create result table for display
table = ResultTable()
table.title = f"Available formats for {url}"
# NOTE: ResultTable defaults to max_columns=5; for formatlist we want more columns
# (including Size) so the user can compare formats.
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [url])
# Collect results for table
@@ -1960,6 +2081,7 @@ class Download_Media(Cmdlet):
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
# If the chosen format is video-only (no audio stream), automatically
@@ -1971,11 +2093,20 @@ class Download_Media(Cmdlet):
except Exception:
selection_format_id = format_id
# Format size
# Format size (prefer exact filesize; fall back to filesize_approx)
size_str = ""
if filesize:
size_mb = filesize / (1024 * 1024)
size_str = f"{size_mb:.1f}MB"
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
# Build format description
desc_parts = []
@@ -2002,42 +2133,67 @@ class Download_Media(Cmdlet):
"annotations": [ext, resolution] if resolution else [ext],
"media_kind": "format",
"cmd": base_cmd,
# Put Size early so it's visible even with smaller column caps.
"columns": [
("#", str(idx)),
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
("Size", size_str or "N/A"),
],
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": selection_format_id,
},
"_selection_args": ["-format", selection_format_id]
"_selection_args": None,
}
# Preserve clip settings across @N selection.
# Some runners only append row selection args; make sure clip intent
# survives even when it was provided via -query "clip:...".
selection_args: List[str] = ["-format", selection_format_id]
try:
if (not clip_spec) and clip_values:
selection_args.extend(["-clip", ",".join([v for v in clip_values if v])])
except Exception:
pass
format_dict["_selection_args"] = selection_args
# Add to results list and table (don't emit - formats should wait for @N selection)
results_list.append(format_dict)
table.add_result(format_dict)
# Render and display the table
# Table is displayed by pipeline runner via set_current_stage_table
# Some runners (e.g. cmdnat) do not automatically render stage tables.
# Since this branch is explicitly interactive (user must pick @N), always
# print the table here and mark it as already rendered to avoid duplicates
# in runners that also print tables (e.g. CLI.py).
try:
sys.stderr.write(table.format_plain() + "\n")
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Set the result table so it displays and is available for @N selection
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
log(f"", file=sys.stderr)
log(f"Use: @N | download-media to select and download format", file=sys.stderr)
log(f"Use: @N to select and download format", file=sys.stderr)
return 0
# Download each URL
downloaded_count = 0
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
if clip_sections_spec:
try:
debug(f"Clip sections spec: {clip_sections_spec}")
except Exception:
pass
for url in supported_url:
try:
debug(f"Processing: {url}")
@@ -2136,6 +2292,13 @@ class Download_Media(Cmdlet):
p_path = Path(p)
except Exception:
continue
# Sidecars (subtitles) should never be piped as standalone items.
# They are handled separately and attached to notes.
try:
if p_path.suffix.lower() in _SUBTITLE_EXTS:
continue
except Exception:
pass
if not p_path.exists() or p_path.is_dir():
continue
try:
@@ -2189,6 +2352,12 @@ class Download_Media(Cmdlet):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
# We keep subtitles as notes; do not leave a sidecar that later stages
# might try to ingest as a file.
try:
sub_path.unlink()
except Exception:
pass
pipe_objects.append(po)
@@ -2196,7 +2365,7 @@ class Download_Media(Cmdlet):
# Relationship tags are only added when multiple clips exist.
try:
if clip_ranges and len(pipe_objects) == len(clip_ranges):
source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
source_hash = query_hash_override or self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
except Exception:
pass
@@ -2234,8 +2403,8 @@ class Download_Media(Cmdlet):
if formats:
formats_to_show = formats
table = ResultTable()
table.title = f"Available formats for {url}"
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-media", [str(a) for a in (args or [])])
results_list: List[Dict[str, Any]] = []
@@ -2245,6 +2414,7 @@ class Download_Media(Cmdlet):
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
selection_format_id = format_id
@@ -2255,12 +2425,18 @@ class Download_Media(Cmdlet):
selection_format_id = format_id
size_str = ""
if filesize:
try:
size_mb = float(filesize) / (1024 * 1024)
size_str = f"{size_mb:.1f}MB"
except Exception:
size_str = ""
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
desc_parts: List[str] = []
if resolution and resolution != "audio only":
@@ -2283,13 +2459,12 @@ class Download_Media(Cmdlet):
"detail": format_desc,
"media_kind": "format",
"columns": [
("#", str(idx)),
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
("Size", size_str or "N/A"),
],
"full_metadata": {
"format_id": format_id,
@@ -2305,6 +2480,13 @@ class Download_Media(Cmdlet):
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
# See comment in the main formatlist path: always print for interactive selection.
try:
sys.stderr.write(table.format_plain() + "\n")
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Returning 0 with no emits lets the CLI pause the pipeline for @N selection.
log("Requested format is not available; select a working format with @N", file=sys.stderr)
return 0
@@ -2387,6 +2569,25 @@ class Download_Media(Cmdlet):
if not ts:
return None
# Support compact units like 3m4s, 1h22m, 1h2m3s
# (case-insensitive; seconds may be fractional but are truncated to int)
try:
unit_match = re.fullmatch(
r"(?i)\s*(?:(?P<h>\d+)h)?\s*(?:(?P<m>\d+)m)?\s*(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
ts,
)
except Exception:
unit_match = None
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
try:
hours = int(unit_match.group("h") or 0)
minutes = int(unit_match.group("m") or 0)
seconds = float(unit_match.group("s") or 0)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
if ":" in ts:
parts = [p.strip() for p in ts.split(":")]
if len(parts) == 2:
@@ -2430,6 +2631,46 @@ class Download_Media(Cmdlet):
return ranges
@staticmethod
def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]:
"""Parse comma-separated values with optional sticky `key:` prefixes.
Example:
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
Rules:
- Items are split on commas.
- If an item begins with `key:` then key becomes active for subsequent items.
- If an item has no `key:` prefix, it belongs to the last active key.
- If no key has been set yet, values belong to default_key.
"""
out: Dict[str, List[str]] = {}
if not isinstance(spec, str):
spec = str(spec)
text = spec.strip()
if not text:
return out
active = (default_key or "").strip().lower() or "clip"
key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$")
for raw_piece in text.split(","):
piece = raw_piece.strip()
if not piece:
continue
m = key_pattern.match(piece)
if m:
active = (m.group(1) or "").strip().lower() or active
value = (m.group(2) or "").strip()
if value:
out.setdefault(active, []).append(value)
continue
out.setdefault(active, []).append(piece)
return out
def _build_clip_sections_spec(
self,
clip_ranges: Optional[List[tuple[int, int]]],

View File

@@ -25,14 +25,14 @@ class Get_File(sh.Cmdlet):
summary="Export file to local path",
usage="@1 | get-file -path C:\\Downloads",
arg=[
sh.SharedArgs.HASH,
sh.SharedArgs.QUERY,
sh.SharedArgs.STORE,
sh.SharedArgs.PATH,
sh.CmdletArg("name", description="Output filename (default: from metadata title)"),
],
detail=[
"- Exports file from storage backend to local path",
"- Uses hash+store to retrieve file",
"- Uses selected item's hash, or -query \"hash:<sha256>\"",
"- Preserves file extension and metadata",
],
exec=self.run,
@@ -44,9 +44,14 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] run() called with result type: {type(result)}")
parsed = sh.parse_cmdlet_args(args, self)
debug(f"[get-file] parsed args: {parsed}")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or sh.get_field(result, "hash")
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
@@ -54,7 +59,7 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -26,16 +26,16 @@ class Get_Metadata(Cmdlet):
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
usage="get-metadata [-hash <sha256>] [-store <backend>]",
usage="get-metadata [-query \"hash:<sha256>\"] [-store <backend>]",
alias=["meta"],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
"- Retrieves metadata from storage backend using file hash as identifier.",
"- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.",
"- Hash and store are taken from piped result or can be overridden with -hash/-store flags.",
"- Hash and store are taken from piped result or can be overridden with -query/-store flags.",
"- All metadata is retrieved from the storage backend's database (single source of truth).",
],
exec=self.run,
@@ -153,13 +153,18 @@ class Get_Metadata(Cmdlet):
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
return 1
# Get hash and store from parsed args or result
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
storage_source = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("No hash available - use -hash to specify", file=sys.stderr)
log("No hash available - use -query \"hash:<sha256>\"", file=sys.stderr)
return 1
if not storage_source:

View File

@@ -25,11 +25,11 @@ class Get_Note(Cmdlet):
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-hash <sha256>]",
usage="get-note -store <store> [-query \"hash:<sha256>\"]",
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
SharedArgs.QUERY,
],
detail=[
"- Notes are retrieved via the selected store backend.",
@@ -66,14 +66,17 @@ class Get_Note(Cmdlet):
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("[get_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
if store_override and query_hash:
results = [{"store": str(store_override), "hash": query_hash}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
log("[get_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
return 1
store_registry = Store(config)
@@ -94,7 +97,7 @@ class Get_Note(Cmdlet):
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
override_hash=str(query_hash) if query_hash else None,
)
if not resolved_hash:
continue

View File

@@ -29,12 +29,12 @@ from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
usage="get-relationship [-query \"hash:<sha256>\"]",
alias=[
"get-rel",
],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
@@ -48,20 +48,28 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash and -store override
override_hash: str | None = None
# Parse -query and -store override
override_query: str | None = None
override_store: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
if low in {"-query", "--query", "query"} and i + 1 < len(args_list):
override_query = str(args_list[i + 1]).strip()
i += 2
continue
if low in {"-store", "--store", "store"} and i + 1 < len(args_list):
override_store = str(args_list[i + 1]).strip()
i += 2
continue
i += 1
override_hash: str | None = sh.parse_single_hash_query(override_query) if override_query else None
if override_query and not override_hash:
log("get-relationship requires -query \"hash:<sha256>\"", file=sys.stderr)
return 1
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
@@ -69,7 +77,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr)
log("get-relationship expects a single item; select one row (e.g. @1) or pass -query \"hash:<sha256>\"", file=sys.stderr)
return 1
else:
result = result[0]
@@ -439,8 +447,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']])
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
ctx.set_last_result_table(table, pipeline_results)
print(table)

View File

@@ -801,11 +801,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get tags from Hydrus, local sidecar, or URL metadata.
Usage:
get-tag [-hash <sha256>] [--store <key>] [--emit]
get-tag [-query "hash:<sha256>"] [--store <key>] [--emit]
get-tag -scrape <url|provider>
Options:
-hash <sha256>: Override hash to use instead of result's hash
-query "hash:<sha256>": Override hash to use instead of result's hash
--store <key>: Store result to this key for pipeline
--emit: Emit result without interactive prompt (quiet mode)
-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks)
@@ -843,22 +843,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args_list)
# Extract values
hash_override_raw = parsed_args.get("hash")
hash_override = normalize_hash(hash_override_raw)
query_raw = parsed_args.get("query")
hash_override = sh.parse_single_hash_query(query_raw)
if query_raw and not hash_override:
log("Invalid -query value (expected hash:<sha256>)", file=sys.stderr)
return 1
store_key = parsed_args.get("store")
emit_requested = parsed_args.get("emit", False)
scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_flag_present or scrape_url is not None
explicit_hash_flag = any(str(arg).lower() in {"-hash", "--hash"} for arg in raw_args)
if hash_override_raw is not None:
if not hash_override or not looks_like_hash(hash_override):
debug(f"[get_tag] Ignoring invalid hash override '{hash_override_raw}' (explicit_flag={explicit_hash_flag})")
if explicit_hash_flag:
log("Invalid hash format: expected 64 hex characters", file=sys.stderr)
return 1
hash_override = None
if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
log("-scrape requires a URL or provider name", file=sys.stderr)
return 1
@@ -1182,10 +1176,10 @@ class Get_Tag(Cmdlet):
super().__init__(
name="get-tag",
summary="Get tag values from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
usage="get-tag [-query \"hash:<sha256>\"] [--store <key>] [--emit] [-scrape <url|provider>]",
alias=[],
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
CmdletArg(
name="-store",
type="string",
@@ -1211,7 +1205,7 @@ class Get_Tag(Cmdlet):
" Hydrus: Using file hash if available",
" Local: From sidecar files or local library database",
"- Options:",
" -hash: Override hash to look up in Hydrus",
" -query: Override hash to look up in Hydrus (use: -query \"hash:<sha256>\")",
" -store: Store result to key for downstream pipeline",
" -emit: Quiet mode (no interactive selection)",
" -scrape: Scrape metadata from URL or metadata provider",

View File

@@ -34,7 +34,7 @@ class Get_Url(Cmdlet):
summary="List url associated with a file",
usage="@1 | get-url",
arg=[
SharedArgs.HASH,
SharedArgs.QUERY,
SharedArgs.STORE,
],
detail=[
@@ -47,13 +47,18 @@ class Get_Url(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Get url for file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
return 1
# Extract hash and store from result or args
file_hash = parsed.get("hash") or get_field(result, "hash")
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
if not file_hash:
log("Error: No file hash provided")
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:

View File

@@ -12,7 +12,7 @@ from SYS.logger import log, debug
from . import _shared as sh
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag = (
Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query = (
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
@@ -20,6 +20,7 @@ Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, firs
sh.should_show_help,
sh.normalize_hash,
sh.first_title_tag,
sh.parse_hash_query,
)
import pipeline as ctx
@@ -34,7 +35,7 @@ class Search_Store(Cmdlet):
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [query] [-store BACKEND] [-limit N]",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
@@ -46,51 +47,18 @@ class Search_Store(Cmdlet):
"URL search: url:* (any URL) or url:<value> (URL substring)",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
"search-store -store home '*' # Search 'home' Hydrus instance",
"search-store -store test 'video' # Search 'test' folder store",
"search-store 'url:*' # Files that have any URL",
"search-store 'url:youtube.com' # Files whose URL contains substring",
"search-store -query foo # Search all storage backends",
"search-store -store home -query '*' # Search 'home' Hydrus instance",
"search-store -store test -query 'video' # Search 'test' folder store",
"search-store -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-store -query 'url:*' # Files that have any URL",
"search-store -query 'url:youtube.com' # Files whose URL contains substring",
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _parse_hash_query(query: str) -> List[str]:
"""Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
Supported examples:
- hash:<h1>,<h2>,<h3>
- Hash: <h1> <h2> <h3>
- hash:{<h1>, <h2>}
"""
q = str(query or "").strip()
if not q:
return []
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
if not m:
return []
rest = (m.group(1) or "").strip()
if rest.startswith("{") and rest.endswith("}"):
rest = rest[1:-1].strip()
if rest.startswith("[") and rest.endswith("]"):
rest = rest[1:-1].strip()
# Split on commas and whitespace.
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
out: List[str] = []
for part in raw_parts:
h = normalize_hash(part)
if not h:
continue
if h not in out:
out.append(h)
return out
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
@@ -142,6 +110,7 @@ class Search_Store(Cmdlet):
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
query_flags = {f.lower() for f in (flag_registry.get("query") or {"-query", "--query"})}
store_flags = {f.lower() for f in (flag_registry.get("store") or {"-store", "--store"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
@@ -155,6 +124,11 @@ class Search_Store(Cmdlet):
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in query_flags and i + 1 < len(args_list):
chunk = args_list[i + 1]
query = f"{query} {chunk}".strip() if query else chunk
i += 2
continue
if low in store_flags and i + 1 < len(args_list):
storage_backend = args_list[i + 1]
i += 2
@@ -182,7 +156,7 @@ class Search_Store(Cmdlet):
if store_filter and not storage_backend:
storage_backend = store_filter
hash_query = self._parse_hash_query(query)
hash_query = parse_hash_query(query)
if not query:
log("Provide a search query", file=sys.stderr)