dfd
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 21:24:27 -08:00
parent fcdd507d00
commit 8288ea8c66
16 changed files with 530 additions and 339 deletions

View File

@@ -39,6 +39,16 @@ class CmdletArg:
requires_db: bool = False
"""Whether this argument requires the local DB/library root to be configured."""
# Query-mapping support:
# Some cmdlets use a unified `-query` string. When configured, individual args
# can be populated from fields inside `-query` (e.g., -query "hash:<sha256>").
query_key: Optional[str] = None
"""Field name inside -query that maps to this argument (e.g., 'hash')."""
query_aliases: List[str] = field(default_factory=list)
"""Additional field names inside -query that map to this argument."""
query_only: bool = False
"""When True, do not accept a dedicated CLI flag for this arg; only map from -query."""
def resolve(self, value: Any) -> Any:
"""Resolve/process the argument value using the handler if available.
@@ -95,6 +105,37 @@ class CmdletArg:
return tuple(flags)
def QueryArg(
name: str,
*,
key: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
type: str = "string",
required: bool = False,
description: str = "",
choices: Optional[Sequence[str]] = None,
handler: Optional[Any] = None,
query_only: bool = True,
) -> CmdletArg:
"""Create an argument that can be populated from `-query` fields.
By default, this does NOT create a dedicated flag (query_only=True). This is
useful for deprecating bloat flags like `-hash` while still making `hash:` a
first-class, documented, reusable field.
"""
return CmdletArg(
name=str(name),
type=str(type or "string"),
required=bool(required),
description=str(description or ""),
choices=list(choices or []),
handler=handler,
query_key=str(key or name).strip().lower() if str(key or name).strip() else None,
query_aliases=[str(a).strip().lower() for a in (aliases or []) if str(a).strip()],
query_only=bool(query_only),
)
# ============================================================================
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
# ============================================================================
@@ -127,6 +168,7 @@ class SharedArgs:
type="enum",
choices=[], # Dynamically populated via get_store_choices()
description="Selects store",
query_key="store",
)
PATH = CmdletArg(
@@ -497,6 +539,7 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
arg_specs: List[CmdletArg] = cmdlet_spec.arg
positional_args: List[CmdletArg] = [] # args without prefix in definition
flagged_args: List[CmdletArg] = [] # args with prefix in definition
query_mapped_args: List[CmdletArg] = []
arg_spec_map: Dict[str, str] = {} # prefix variant -> canonical name (without prefix)
@@ -504,9 +547,23 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
name = spec.name
if not name:
continue
# Track args that can be populated from -query.
try:
if getattr(spec, "query_key", None):
query_mapped_args.append(spec)
except Exception:
pass
name_str = str(name)
canonical_name = name_str.lstrip("-")
# Query-only args do not register dedicated flags/positionals.
try:
if bool(getattr(spec, "query_only", False)):
continue
except Exception:
pass
# Determine if this is positional (no dashes in original definition)
if "-" not in name_str:
@@ -592,7 +649,49 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
else:
# Unknown token, skip it
i += 1
# Populate query-mapped args from the unified -query string.
try:
raw_query = result.get("query")
except Exception:
raw_query = None
if query_mapped_args and raw_query is not None:
try:
from cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {}
norm_fields = {str(k).strip().lower(): v for k, v in fields.items()} if isinstance(fields, dict) else {}
except Exception:
norm_fields = {}
for spec in query_mapped_args:
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
if not canonical_name:
continue
# Do not override explicit flags.
if canonical_name in result and result.get(canonical_name) not in (None, ""):
continue
try:
key = str(getattr(spec, "query_key", "") or "").strip().lower()
aliases = getattr(spec, "query_aliases", None)
alias_list = [str(a).strip().lower() for a in (aliases or []) if str(a).strip()]
except Exception:
key = ""
alias_list = []
candidates = [k for k in [key, canonical_name] + alias_list if k]
val = None
for k in candidates:
if k in norm_fields:
val = norm_fields.get(k)
break
if val is None:
continue
try:
result[canonical_name] = spec.resolve(val)
except Exception:
result[canonical_name] = val
return result

View File

@@ -703,7 +703,7 @@ class Add_File(Cmdlet):
continue
# No destination specified: keep legacy behavior (download-media only).
code = self._delegate_to_download_data(item, url_str, location, provider_name, args, config)
code = self._delegate_to_download_media(item, url_str, location, provider_name, args, config)
if code == 0:
successes += 1
else:
@@ -1509,7 +1509,7 @@ class Add_File(Cmdlet):
pass
return None
def _delegate_to_download_data(
def _delegate_to_download_media(
self,
result: Any,
url_str: str,

View File

@@ -12,6 +12,7 @@ from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
QueryArg = sh.QueryArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
@@ -26,11 +27,21 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note (-query \"title:<title>,text:<text>\") [ -store <store> -hash <sha256> | <piped> ]",
usage="add-note (-query \"title:<title>,text:<text>[,store:<store>][,hash:<sha256>]\") [ -store <store> | <piped> ]",
alias=[""],
arg=[
SharedArgs.STORE,
CmdletArg("hash", type="string", required=False, description="Target file hash (sha256). When omitted, uses piped item hash."),
QueryArg(
"hash",
key="hash",
aliases=["sha256"],
type="string",
required=False,
handler=normalize_hash,
description="(Optional) Specific file hash target, provided via -query as hash:<sha256>. When omitted, uses piped item hash.",
query_only=True,
),
SharedArgs.QUERY,
],
detail=[
@@ -141,7 +152,7 @@ class Add_Note(Cmdlet):
return 1
if hash_override and not store_override:
log("[add_note] Error: -hash requires -store <store>", file=sys.stderr)
log("[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>", file=sys.stderr)
return 1
explicit_target = bool(hash_override and store_override)
@@ -169,7 +180,7 @@ class Add_Note(Cmdlet):
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{"store": str(store_override), "hash": hash_override}]
else:
log("[add_note] Error: Requires piped item(s) from add-file, or explicit -store <store> and -hash <sha256>", file=sys.stderr)
log("[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query \"store:<store> hash:<sha256> ...\")", file=sys.stderr)
return 1
store_registry = Store(config)

View File

@@ -375,7 +375,7 @@ CMDLET = Cmdlet(
summary="Remove relationships from files.",
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."),
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),

View File

@@ -1,267 +0,0 @@
"""Smart downloader front-door.
Currently focused on Internet Archive item pages:
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
- Selecting a row via @N expands to download-file <direct-url>
This enables:
search-provider -provider internetarchive "..."
@3 # shows formats table
@2 | add-file ... # downloads selected file then pipes to add-file
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Sequence, cast
from urllib.parse import quote
from SYS.logger import log, debug
import pipeline as pipeline_context
from result_table import ResultTable
from . import _shared as sh
Cmdlet = sh.Cmdlet
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
def _extract_ia_identifier(text: str) -> str:
s = str(text or "").strip()
if not s:
return ""
# https://archive.org/details/<identifier>
m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
if m:
return str(m.group(1) or "").strip()
# internetarchive:<identifier>
if s.lower().startswith("internetarchive:"):
return s.split(":", 1)[-1].strip()
return ""
class Download_Data(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="download-data",
summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
alias=[],
arg=[SharedArgs.URL],
detail=[
"For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
"Select a file row with @N to run download-file on that direct URL.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
# parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
parsed = parse_cmdlet_args(args, cast(Any, self))
except Exception:
parsed = {}
raw_urls = parsed.get("url", [])
if isinstance(raw_urls, str):
raw_urls = [raw_urls]
url_arg = str(raw_urls[0]).strip() if raw_urls else ""
piped_items: List[Any] = []
if isinstance(result, list):
piped_items = list(result)
elif result is not None:
piped_items = [result]
# Prefer piped item target if present.
target = ""
if piped_items:
target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
if not target:
target = url_arg
table_name = ""
try:
table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
except Exception:
table_name = ""
identifier = ""
if piped_items:
md = get_field(piped_items[0], "full_metadata")
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
if not identifier:
identifier = _extract_ia_identifier(target)
if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
return 1
@staticmethod
def _run_internetarchive(item: Any, *, identifier: str) -> int:
try:
from Provider.internetarchive import _ia as _ia_loader
except Exception as exc:
log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
return 1
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
try:
source = str(f.get("source") or "").strip().lower()
fmt = str(f.get("format") or "").strip().lower()
except Exception:
source = ""
fmt = ""
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
ia = None
try:
ia = _ia_loader()
except Exception as exc:
log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
return 1
try:
get_item = getattr(ia, "get_item", None)
if not callable(get_item):
raise Exception("internetarchive.get_item is not available")
ia_item = cast(Any, get_item(str(identifier)))
except Exception as exc:
log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
return 1
files: List[Dict[str, Any]] = []
try:
raw_files = getattr(ia_item, "files", None)
if isinstance(raw_files, list):
for f in raw_files:
if isinstance(f, dict):
files.append(f)
except Exception:
files = []
if not files:
try:
for f in ia_item.get_files():
name = getattr(f, "name", None)
if not name and isinstance(f, dict):
name = f.get("name")
if not name:
continue
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
if not files:
log("download-data: Internet Archive item has no files", file=sys.stderr)
return 1
# Prefer non-metadata files for the picker.
candidates = [f for f in files if not _is_ia_metadata_file(f)]
if not candidates:
candidates = list(files)
def _key(f: Dict[str, Any]) -> tuple[str, str]:
fmt = str(f.get("format") or "").strip().lower()
name = str(f.get("name") or "").strip().lower()
return (fmt, name)
candidates.sort(key=_key)
title = ""
try:
title = str(get_field(item, "title") or "").strip()
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":")
if not title:
table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
# Selecting a row should expand to `download-file <direct-url>`.
table.set_source_command("download-file", [])
rows: List[Dict[str, Any]] = []
for f in candidates:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
# Keep as-is; ResultTable will stringify.
pass
direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
# Used by @N expansion: download-file <direct-url>
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-data: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception as exc:
debug(f"[download-data] Failed to register result table: {exc}")
return 0
CMDLET = Download_Data()

View File

@@ -47,7 +47,11 @@ class Download_File(Cmdlet):
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
],
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
"For streaming sites, use download-media.",
"For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.",
],
exec=self.run,
)
self.register()
@@ -121,6 +125,7 @@ class Download_File(Cmdlet):
"match_provider_name_for_url": _match_provider_name_for_url,
"SearchResult": _SearchResult,
}
except Exception:
return {
"get_search_provider": None,
@@ -129,6 +134,154 @@ class Download_File(Cmdlet):
"SearchResult": None,
}
@staticmethod
def _maybe_show_internetarchive_formats(
*,
raw_urls: Sequence[str],
piped_items: Sequence[Any],
parsed: Dict[str, Any],
config: Dict[str, Any],
quiet_mode: bool,
) -> Optional[int]:
"""If the input is an IA item page, show a selectable formats table.
Returns an exit code when handled; otherwise None.
"""
if quiet_mode:
return None
try:
total_inputs = int(len(raw_urls or []) + len(piped_items or []))
except Exception:
total_inputs = 0
if total_inputs != 1:
return None
item = piped_items[0] if piped_items else None
target = ""
if item is not None:
try:
target = str(get_field(item, "path") or get_field(item, "url") or "").strip()
except Exception:
target = ""
if not target and raw_urls:
target = str(raw_urls[0]).strip()
if not target:
return None
try:
from Provider import internetarchive as ia
except Exception:
return None
identifier = ""
try:
md = get_field(item, "full_metadata") if item is not None else None
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
except Exception:
identifier = ""
if not identifier:
try:
identifier = str(ia.extract_identifier(target) or "").strip()
except Exception:
identifier = ""
if not identifier:
return None
# Only show picker for item pages (details); direct download URLs should download immediately.
try:
if not ia.is_details_url(target):
return None
except Exception:
return None
try:
files = ia.list_download_files(identifier)
except Exception as exc:
log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
return 1
if not files:
log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
return 1
title = ""
try:
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
try:
from result_table import ResultTable
except Exception as exc:
log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
return 1
base_args: List[str] = []
out_arg = parsed.get("path") or parsed.get("output")
if out_arg:
base_args.extend(["-path", str(out_arg)])
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
table.set_source_command("download-file", base_args)
rows: List[Dict[str, Any]] = []
for f in files:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
direct_url = str(f.get("direct_url") or "").strip()
if not direct_url:
continue
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
pass
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-file: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception:
pass
log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
return 0
@staticmethod
def _openlibrary_edition_id_from_url(u: str) -> str:
try:
@@ -284,11 +437,11 @@ class Download_File(Cmdlet):
post = None
title_hint = None
tags_hint: List[str] = []
tg_tags: List[str] = []
if channel:
tags_hint.append(f"channel:{channel}")
tg_tags.append(f"channel:{channel}")
if post is not None:
tags_hint.append(f"post:{post}")
tg_tags.append(f"post:{post}")
if channel and post is not None:
title_hint = f"{channel} {post}"
elif post is not None:
@@ -300,7 +453,7 @@ class Download_File(Cmdlet):
downloaded_path=downloaded_path,
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=tg_tags,
media_kind_hint="file",
full_metadata=telegram_info,
provider_hint="telegram",
@@ -481,14 +634,15 @@ class Download_File(Cmdlet):
# Otherwise, try provider.download(SearchResult) with the URL as the target.
if provider is not None:
sr_obj = None
try:
sr = SearchResult(
sr_obj = SearchResult(
table=str(provider_name),
title=str(url),
path=str(url),
full_metadata={},
)
downloaded_path = provider.download(sr, final_output_dir) # type: ignore[call-arg]
downloaded_path = provider.download(sr_obj, final_output_dir) # type: ignore[call-arg]
except Exception:
downloaded_path = None
@@ -498,24 +652,25 @@ class Download_File(Cmdlet):
raise DownloadError("LibGen URL did not resolve to a downloadable file")
if downloaded_path:
tags_hint: Optional[List[str]] = None
emit_tags: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name).lower() == "libgen":
if str(provider_name).lower() == "libgen" and sr_obj is not None:
media_kind_hint = "book"
try:
sr_tags = getattr(sr, "tag", None)
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
emit_tags = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
emit_tags = None
try:
if isinstance(getattr(sr, "full_metadata", None), dict):
full_md = sr.full_metadata
t = str(full_md.get("title") or "").strip()
sr_full_md = getattr(sr_obj, "full_metadata", None)
if isinstance(sr_full_md, dict):
full_md = sr_full_md
t = str(sr_full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
@@ -525,7 +680,7 @@ class Download_File(Cmdlet):
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=emit_tags,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
@@ -802,6 +957,17 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
quiet_mode=quiet_mode,
)
if ia_picker_exit is not None:
return int(ia_picker_exit)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
if not final_output_dir:
@@ -817,7 +983,6 @@ class Download_File(Cmdlet):
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
registry = self._load_provider_registry()
downloaded_count = 0

View File

@@ -30,9 +30,7 @@ CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-query \"hash:<sha256>\"]",
alias=[
"get-rel",
],
alias=[],
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,

View File

@@ -1054,7 +1054,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
usage="screen-shot <url> [options]",
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,

View File

@@ -65,7 +65,7 @@ class Search_Provider(Cmdlet):
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-data",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-file",
],
exec=self.run
)