dfd
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 21:24:27 -08:00
parent fcdd507d00
commit 8288ea8c66
16 changed files with 530 additions and 339 deletions

14
CLI.py
View File

@@ -960,6 +960,8 @@ class CmdletExecutor:
"search_file": "Results",
"download-data": "Downloads",
"download_data": "Downloads",
"download-file": "Downloads",
"download_file": "Downloads",
"get-tag": "Tags",
"get_tag": "Tags",
"get-file": "Results",
@@ -1329,9 +1331,11 @@ class CmdletExecutor:
"search-file",
"download-data",
"download-media",
"download-file",
"search_file",
"download_data",
"download_media",
"download_file",
".config",
".worker",
}
@@ -2043,8 +2047,8 @@ class PipelineExecutor:
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
elif table_type == "internetarchive":
print("Auto-loading Internet Archive item via download-data")
stages.append(["download-data"])
print("Auto-loading Internet Archive item via download-file")
stages.append(["download-file"])
elif table_type in {"soulseek", "openlibrary", "libgen"}:
print("Auto-piping selection to download-file")
stages.append(["download-file"])
@@ -2075,15 +2079,13 @@ class PipelineExecutor:
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(0, ["download-media"])
if table_type == "internetarchive" and first_cmd not in (
"download-data",
"download_data",
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-data after Internet Archive selection")
stages.insert(0, ["download-data"])
debug("Auto-inserting download-file after Internet Archive selection")
stages.insert(0, ["download-file"])
if table_type == "libgen" and first_cmd not in (
"download-file",
"download-media",

View File

@@ -7,6 +7,8 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.logger import log
@@ -71,6 +73,143 @@ def _extract_identifier_from_any(value: str) -> str:
return raw
def extract_identifier(value: str) -> str:
"""Public wrapper for extracting an IA identifier from URLs/tags/bare ids."""
return _extract_identifier_from_any(value)
def is_details_url(url: str) -> bool:
raw = str(url or "").strip()
if not raw:
return False
if not (raw.startswith("http://") or raw.startswith("https://")):
return False
try:
p = urlparse(raw)
host = (p.hostname or "").lower().strip()
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
return False
if not host.endswith("archive.org"):
return False
return len(parts) >= 2 and parts[0].lower() == "details" and bool(parts[1].strip())
def is_download_file_url(url: str) -> bool:
raw = str(url or "").strip()
if not raw:
return False
if not (raw.startswith("http://") or raw.startswith("https://")):
return False
try:
p = urlparse(raw)
host = (p.hostname or "").lower().strip()
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
return False
if not host.endswith("archive.org"):
return False
# /download/<identifier>/<filename>
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
"""Return a sorted list of downloadable files for an IA identifier.
Each entry includes: name, size, format, source, direct_url.
"""
ident = str(identifier or "").strip()
if not ident:
return []
ia = _ia()
get_item = getattr(ia, "get_item", None)
if not callable(get_item):
raise Exception("internetarchive.get_item is not available")
try:
item: Any = get_item(str(ident))
except Exception as exc:
raise Exception(f"Internet Archive item lookup failed: {exc}")
files: List[Dict[str, Any]] = []
try:
raw_files = getattr(item, "files", None)
if isinstance(raw_files, list):
for f in raw_files:
if isinstance(f, dict):
files.append(f)
except Exception:
files = []
if not files:
try:
for f in item.get_files():
name = getattr(f, "name", None)
if not name and isinstance(f, dict):
name = f.get("name")
if not name:
continue
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
if not files:
return []
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
try:
source = str(f.get("source") or "").strip().lower()
fmt = str(f.get("format") or "").strip().lower()
except Exception:
source = ""
fmt = ""
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
candidates = [f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)]
if not candidates:
candidates = [f for f in files if isinstance(f, dict)]
out: List[Dict[str, Any]] = []
for f in candidates:
name = str(f.get("name") or "").strip()
if not name:
continue
direct_url = f"https://archive.org/download/{ident}/{quote(name, safe='')}"
out.append(
{
"name": name,
"size": f.get("size"),
"format": f.get("format"),
"source": f.get("source"),
"direct_url": direct_url,
}
)
def _key(f: Dict[str, Any]) -> tuple[str, str]:
fmt = str(f.get("format") or "").strip().lower()
name = str(f.get("name") or "").strip().lower()
return (fmt, name)
out.sort(key=_key)
return out
def _extract_download_filename_from_url(url: str) -> str:
raw = str(url or "").strip()
if not raw:

View File

@@ -29,13 +29,13 @@ class PipelinePreset:
PIPELINE_PRESETS: List[PipelinePreset] = [
PipelinePreset(
label="Download → Merge → Local",
description="Use download-data with playlist auto-selection, merge the pieces, tag, then import into local storage.",
pipeline='download-data "<url>" | merge-file | add-tags -store local | add-file -storage local',
description="Use download-media with playlist auto-selection, merge the pieces, tag, then import into local storage.",
pipeline='download-media "<url>" | merge-file | add-tags -store local | add-file -storage local',
),
PipelinePreset(
label="Download → Hydrus",
description="Fetch media, auto-tag, and push directly into Hydrus.",
pipeline='download-data "<url>" | merge-file | add-tags -store hydrus | add-file -storage hydrus',
pipeline='download-media "<url>" | merge-file | add-tags -store hydrus | add-file -storage hydrus',
),
PipelinePreset(
label="Search Local Library",

View File

@@ -363,23 +363,24 @@ class DownloadModal(ModalScreen):
self.app.call_from_thread(self._hide_progress)
return
# Stage 1: Download data if enabled
# Stage 1: Download if enabled
download_succeeded = False
download_stderr_text = "" # Store for merge stage
if download_enabled:
download_cmdlet = get_cmdlet("download-data")
download_cmdlet_name = "download-media" if self.is_playlist else "download-file"
download_cmdlet = get_cmdlet(download_cmdlet_name)
if download_cmdlet:
logger.info("📥 Executing download-data stage")
logger.info(f"📥 Executing {download_cmdlet_name} stage")
logger.info(f"download_cmdlet object: {download_cmdlet}")
logger.info(f"result_obj: {result_obj}")
# Log step to worker
if worker:
worker.log_step("Starting download-data stage...")
worker.log_step(f"Starting {download_cmdlet_name} stage...")
# Build arguments for download-data
# Build arguments for download-media (yt-dlp) playlists; download-file takes no yt-dlp args.
cmdlet_args = []
if self.is_playlist:
if download_cmdlet_name == "download-media" and self.is_playlist:
# Always use yt-dlp's native --playlist-items for playlists
if playlist_selection:
# User provided specific selection
@@ -415,7 +416,10 @@ class DownloadModal(ModalScreen):
try:
with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
logger.info(f"Calling download_cmdlet...")
returncode = download_cmdlet(result_obj, cmdlet_args, self.config)
cmd_config = dict(self.config) if isinstance(self.config, dict) else self.config
if isinstance(cmd_config, dict):
cmd_config["_quiet_background_output"] = True
returncode = download_cmdlet(result_obj, cmdlet_args, cmd_config)
logger.info(f"download_cmdlet returned: {returncode}")
except Exception as cmdlet_error:
# If cmdlet throws an exception, log it
@@ -437,17 +441,17 @@ class DownloadModal(ModalScreen):
# Always append output to worker for debugging
if worker:
if stdout_text:
worker.append_stdout(f"[download-data stdout]\n{stdout_text}\n")
worker.append_stdout(f"[{download_cmdlet_name} stdout]\n{stdout_text}\n")
if stderr_text:
worker.append_stdout(f"[download-data stderr]\n{stderr_text}\n")
worker.append_stdout(f"[{download_cmdlet_name} stderr]\n{stderr_text}\n")
# Log the output so it gets captured by WorkerLoggingHandler
if stdout_text:
logger.info(f"[download-data output]\n{stdout_text}")
logger.info(f"[{download_cmdlet_name} output]\n{stdout_text}")
if stderr_text:
logger.info(f"[download-data stderr]\n{stderr_text}")
logger.info(f"[{download_cmdlet_name} stderr]\n{stderr_text}")
if returncode != 0:
download_failed_msg = f"❌ download-data stage failed with code {returncode}\nstdout: {stdout_text}\nstderr: {stderr_text}"
download_failed_msg = f"{download_cmdlet_name} stage failed with code {returncode}\nstdout: {stdout_text}\nstderr: {stderr_text}"
logger.error(download_failed_msg)
if worker:
worker.append_stdout(f"\n{download_failed_msg}\n")
@@ -545,11 +549,11 @@ class DownloadModal(ModalScreen):
else:
download_succeeded = True
# Always log output at INFO level so we can see what happened
logger.info(f"download-data stage completed successfully")
logger.info(f"{download_cmdlet_name} stage completed successfully")
if stdout_text:
logger.info(f"download-data stdout:\n{stdout_text}")
logger.info(f"{download_cmdlet_name} stdout:\n{stdout_text}")
if stderr_text:
logger.info(f"download-data stderr:\n{stderr_text}")
logger.info(f"{download_cmdlet_name} stderr:\n{stderr_text}")
# Log step to worker
if worker:
@@ -630,9 +634,9 @@ class DownloadModal(ModalScreen):
logger.info(f"Merge enabled - will merge {len(downloaded_files)} files before tagging")
download_stderr_text = f"DOWNLOADED_FILES:{','.join(downloaded_files)}\n" + download_stderr_text
logger.info("download-data stage completed successfully")
logger.info(f"{download_cmdlet_name} stage completed successfully")
except Exception as e:
logger.error(f"download-data execution error: {e}", exc_info=True)
logger.error(f"{download_cmdlet_name} execution error: {e}", exc_info=True)
self.app.call_from_thread(
self.app.notify,
f"Download error: {e}",
@@ -1577,9 +1581,10 @@ class DownloadModal(ModalScreen):
# Stage 1: Download data if enabled
if download_enabled:
download_cmdlet = get_cmdlet("download-data")
download_cmdlet_name = "download-file"
download_cmdlet = get_cmdlet(download_cmdlet_name)
if download_cmdlet:
stage_msg = "📥 Executing download-data stage"
stage_msg = f"📥 Executing {download_cmdlet_name} stage"
logger.info(stage_msg)
if worker:
worker.append_stdout(f"{stage_msg}\n")
@@ -1591,23 +1596,26 @@ class DownloadModal(ModalScreen):
stderr_buf = io.StringIO()
with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
returncode = download_cmdlet(result_obj, [], self.config)
cmd_config = dict(self.config) if isinstance(self.config, dict) else self.config
if isinstance(cmd_config, dict):
cmd_config["_quiet_background_output"] = True
returncode = download_cmdlet(result_obj, [], cmd_config)
stdout_text = stdout_buf.getvalue()
stderr_text = stderr_buf.getvalue()
if stdout_text:
logger.debug(f"download-data stdout: {stdout_text}")
logger.debug(f"{download_cmdlet_name} stdout: {stdout_text}")
if worker:
worker.append_stdout(stdout_text)
if stderr_text:
logger.debug(f"download-data stderr: {stderr_text}")
logger.debug(f"{download_cmdlet_name} stderr: {stderr_text}")
if worker:
worker.append_stdout(f"⚠️ stderr: {stderr_text}\n")
if returncode != 0:
error_msg = f"❌ download-data stage failed with code {returncode}\nstderr: {stderr_text}"
error_msg = f"{download_cmdlet_name} stage failed with code {returncode}\nstderr: {stderr_text}"
logger.error(error_msg)
if worker:
worker.append_stdout(f"{error_msg}\n")
@@ -1619,12 +1627,12 @@ class DownloadModal(ModalScreen):
)
return
else:
success_msg = "download-data completed successfully"
success_msg = f"{download_cmdlet_name} completed successfully"
logger.info(success_msg)
if worker:
worker.append_stdout(f"{success_msg}\n")
except Exception as e:
error_msg = f"❌ download-data error: {e}"
error_msg = f"{download_cmdlet_name} error: {e}"
logger.error(error_msg, exc_info=True)
if worker:
worker.append_stdout(f"{error_msg}\nTraceback:\n{__import__('traceback').format_exc()}\n")

View File

@@ -86,6 +86,29 @@ def _has_flag(tokens: list[str], *flags: str) -> bool:
return False
def _get_flag_value(tokens: list[str], *flags: str) -> Optional[str]:
"""Return the value for a flag from tokenized args.
Supports:
- -flag value
- --flag value
- -flag=value
- --flag=value
"""
want = {str(f).strip().lower() for f in flags if str(f).strip()}
if not want:
return None
for idx, tok in enumerate(tokens):
low = str(tok).strip().lower()
if "=" in low:
head, val = low.split("=", 1)
if head.strip() in want:
return tok.split("=", 1)[1]
if low in want and idx + 1 < len(tokens):
return tokens[idx + 1]
return None
def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxErrorDetail]:
"""Enforce: add-note in piped mode must occur after add-file.
@@ -116,12 +139,25 @@ def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxError
if any(pos > i for pos in add_file_positions):
has_hash = _has_flag(tokens, "-hash", "--hash")
has_store = _has_flag(tokens, "-store", "--store")
if has_hash and has_store:
# Also accept explicit targeting via -query "store:<store> hash:<sha256> ...".
query_val = _get_flag_value(tokens, "-query", "--query")
has_store_hash_in_query = False
if query_val:
try:
parsed_q = parse_query(str(query_val))
q_hash = get_field(parsed_q, "hash") or get_field(parsed_q, "sha256")
q_store = get_field(parsed_q, "store")
has_store_hash_in_query = bool(str(q_hash or "").strip() and str(q_store or "").strip())
except Exception:
has_store_hash_in_query = False
if (has_hash and has_store) or has_store_hash_in_query:
continue
return SyntaxErrorDetail(
"Pipeline error: 'add-note' must come after 'add-file' when used with piped input. "
"Move 'add-note' after 'add-file', or call it with explicit targeting: "
"add-note -store <store> -hash <sha256> -query \"title:<title>,text:<text>\"."
"add-note -query \"store:<store> hash:<sha256> title:<title>,text:<text>\"."
)
return None

View File

@@ -39,6 +39,16 @@ class CmdletArg:
requires_db: bool = False
"""Whether this argument requires the local DB/library root to be configured."""
# Query-mapping support:
# Some cmdlets use a unified `-query` string. When configured, individual args
# can be populated from fields inside `-query` (e.g., -query "hash:<sha256>").
query_key: Optional[str] = None
"""Field name inside -query that maps to this argument (e.g., 'hash')."""
query_aliases: List[str] = field(default_factory=list)
"""Additional field names inside -query that map to this argument."""
query_only: bool = False
"""When True, do not accept a dedicated CLI flag for this arg; only map from -query."""
def resolve(self, value: Any) -> Any:
"""Resolve/process the argument value using the handler if available.
@@ -95,6 +105,37 @@ class CmdletArg:
return tuple(flags)
def QueryArg(
name: str,
*,
key: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
type: str = "string",
required: bool = False,
description: str = "",
choices: Optional[Sequence[str]] = None,
handler: Optional[Any] = None,
query_only: bool = True,
) -> CmdletArg:
"""Create an argument that can be populated from `-query` fields.
By default, this does NOT create a dedicated flag (query_only=True). This is
useful for deprecating bloat flags like `-hash` while still making `hash:` a
first-class, documented, reusable field.
"""
return CmdletArg(
name=str(name),
type=str(type or "string"),
required=bool(required),
description=str(description or ""),
choices=list(choices or []),
handler=handler,
query_key=str(key or name).strip().lower() if str(key or name).strip() else None,
query_aliases=[str(a).strip().lower() for a in (aliases or []) if str(a).strip()],
query_only=bool(query_only),
)
# ============================================================================
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
# ============================================================================
@@ -127,6 +168,7 @@ class SharedArgs:
type="enum",
choices=[], # Dynamically populated via get_store_choices()
description="Selects store",
query_key="store",
)
PATH = CmdletArg(
@@ -497,6 +539,7 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
arg_specs: List[CmdletArg] = cmdlet_spec.arg
positional_args: List[CmdletArg] = [] # args without prefix in definition
flagged_args: List[CmdletArg] = [] # args with prefix in definition
query_mapped_args: List[CmdletArg] = []
arg_spec_map: Dict[str, str] = {} # prefix variant -> canonical name (without prefix)
@@ -504,9 +547,23 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
name = spec.name
if not name:
continue
# Track args that can be populated from -query.
try:
if getattr(spec, "query_key", None):
query_mapped_args.append(spec)
except Exception:
pass
name_str = str(name)
canonical_name = name_str.lstrip("-")
# Query-only args do not register dedicated flags/positionals.
try:
if bool(getattr(spec, "query_only", False)):
continue
except Exception:
pass
# Determine if this is positional (no dashes in original definition)
if "-" not in name_str:
@@ -592,7 +649,49 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
else:
# Unknown token, skip it
i += 1
# Populate query-mapped args from the unified -query string.
try:
raw_query = result.get("query")
except Exception:
raw_query = None
if query_mapped_args and raw_query is not None:
try:
from cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {}
norm_fields = {str(k).strip().lower(): v for k, v in fields.items()} if isinstance(fields, dict) else {}
except Exception:
norm_fields = {}
for spec in query_mapped_args:
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
if not canonical_name:
continue
# Do not override explicit flags.
if canonical_name in result and result.get(canonical_name) not in (None, ""):
continue
try:
key = str(getattr(spec, "query_key", "") or "").strip().lower()
aliases = getattr(spec, "query_aliases", None)
alias_list = [str(a).strip().lower() for a in (aliases or []) if str(a).strip()]
except Exception:
key = ""
alias_list = []
candidates = [k for k in [key, canonical_name] + alias_list if k]
val = None
for k in candidates:
if k in norm_fields:
val = norm_fields.get(k)
break
if val is None:
continue
try:
result[canonical_name] = spec.resolve(val)
except Exception:
result[canonical_name] = val
return result

View File

@@ -703,7 +703,7 @@ class Add_File(Cmdlet):
continue
# No destination specified: keep legacy behavior (download-media only).
code = self._delegate_to_download_data(item, url_str, location, provider_name, args, config)
code = self._delegate_to_download_media(item, url_str, location, provider_name, args, config)
if code == 0:
successes += 1
else:
@@ -1509,7 +1509,7 @@ class Add_File(Cmdlet):
pass
return None
def _delegate_to_download_data(
def _delegate_to_download_media(
self,
result: Any,
url_str: str,

View File

@@ -12,6 +12,7 @@ from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
QueryArg = sh.QueryArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
@@ -26,11 +27,21 @@ class Add_Note(Cmdlet):
super().__init__(
name="add-note",
summary="Add file store note",
usage="add-note (-query \"title:<title>,text:<text>\") [ -store <store> -hash <sha256> | <piped> ]",
usage="add-note (-query \"title:<title>,text:<text>[,store:<store>][,hash:<sha256>]\") [ -store <store> | <piped> ]",
alias=[""],
arg=[
SharedArgs.STORE,
CmdletArg("hash", type="string", required=False, description="Target file hash (sha256). When omitted, uses piped item hash."),
QueryArg(
"hash",
key="hash",
aliases=["sha256"],
type="string",
required=False,
handler=normalize_hash,
description="(Optional) Specific file hash target, provided via -query as hash:<sha256>. When omitted, uses piped item hash.",
query_only=True,
),
SharedArgs.QUERY,
],
detail=[
@@ -141,7 +152,7 @@ class Add_Note(Cmdlet):
return 1
if hash_override and not store_override:
log("[add_note] Error: -hash requires -store <store>", file=sys.stderr)
log("[add_note] Error: hash:<sha256> requires store:<store> in -query or -store <store>", file=sys.stderr)
return 1
explicit_target = bool(hash_override and store_override)
@@ -169,7 +180,7 @@ class Add_Note(Cmdlet):
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{"store": str(store_override), "hash": hash_override}]
else:
log("[add_note] Error: Requires piped item(s) from add-file, or explicit -store <store> and -hash <sha256>", file=sys.stderr)
log("[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query \"store:<store> hash:<sha256> ...\")", file=sys.stderr)
return 1
store_registry = Store(config)

View File

@@ -375,7 +375,7 @@ CMDLET = Cmdlet(
summary="Remove relationships from files.",
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
arg=[
CmdletArg("path", type="string", description="Specify the local file path (legacy mode, if not piping a result)."),
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),

View File

@@ -1,267 +0,0 @@
"""Smart downloader front-door.
Currently focused on Internet Archive item pages:
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
- Selecting a row via @N expands to download-file <direct-url>
This enables:
search-provider -provider internetarchive "..."
@3 # shows formats table
@2 | add-file ... # downloads selected file then pipes to add-file
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Sequence, cast
from urllib.parse import quote
from SYS.logger import log, debug
import pipeline as pipeline_context
from result_table import ResultTable
from . import _shared as sh
Cmdlet = sh.Cmdlet
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
def _extract_ia_identifier(text: str) -> str:
s = str(text or "").strip()
if not s:
return ""
# https://archive.org/details/<identifier>
m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
if m:
return str(m.group(1) or "").strip()
# internetarchive:<identifier>
if s.lower().startswith("internetarchive:"):
return s.split(":", 1)[-1].strip()
return ""
class Download_Data(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="download-data",
summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
alias=[],
arg=[SharedArgs.URL],
detail=[
"For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
"Select a file row with @N to run download-file on that direct URL.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
# parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
parsed = parse_cmdlet_args(args, cast(Any, self))
except Exception:
parsed = {}
raw_urls = parsed.get("url", [])
if isinstance(raw_urls, str):
raw_urls = [raw_urls]
url_arg = str(raw_urls[0]).strip() if raw_urls else ""
piped_items: List[Any] = []
if isinstance(result, list):
piped_items = list(result)
elif result is not None:
piped_items = [result]
# Prefer piped item target if present.
target = ""
if piped_items:
target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
if not target:
target = url_arg
table_name = ""
try:
table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
except Exception:
table_name = ""
identifier = ""
if piped_items:
md = get_field(piped_items[0], "full_metadata")
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
if not identifier:
identifier = _extract_ia_identifier(target)
if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
return 1
@staticmethod
def _run_internetarchive(item: Any, *, identifier: str) -> int:
try:
from Provider.internetarchive import _ia as _ia_loader
except Exception as exc:
log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
return 1
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
try:
source = str(f.get("source") or "").strip().lower()
fmt = str(f.get("format") or "").strip().lower()
except Exception:
source = ""
fmt = ""
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
ia = None
try:
ia = _ia_loader()
except Exception as exc:
log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
return 1
try:
get_item = getattr(ia, "get_item", None)
if not callable(get_item):
raise Exception("internetarchive.get_item is not available")
ia_item = cast(Any, get_item(str(identifier)))
except Exception as exc:
log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
return 1
files: List[Dict[str, Any]] = []
try:
raw_files = getattr(ia_item, "files", None)
if isinstance(raw_files, list):
for f in raw_files:
if isinstance(f, dict):
files.append(f)
except Exception:
files = []
if not files:
try:
for f in ia_item.get_files():
name = getattr(f, "name", None)
if not name and isinstance(f, dict):
name = f.get("name")
if not name:
continue
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
if not files:
log("download-data: Internet Archive item has no files", file=sys.stderr)
return 1
# Prefer non-metadata files for the picker.
candidates = [f for f in files if not _is_ia_metadata_file(f)]
if not candidates:
candidates = list(files)
def _key(f: Dict[str, Any]) -> tuple[str, str]:
fmt = str(f.get("format") or "").strip().lower()
name = str(f.get("name") or "").strip().lower()
return (fmt, name)
candidates.sort(key=_key)
title = ""
try:
title = str(get_field(item, "title") or "").strip()
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":")
if not title:
table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
# Selecting a row should expand to `download-file <direct-url>`.
table.set_source_command("download-file", [])
rows: List[Dict[str, Any]] = []
for f in candidates:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
# Keep as-is; ResultTable will stringify.
pass
direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
# Used by @N expansion: download-file <direct-url>
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-data: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception as exc:
debug(f"[download-data] Failed to register result table: {exc}")
return 0
CMDLET = Download_Data()

View File

@@ -47,7 +47,11 @@ class Download_File(Cmdlet):
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
],
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
"For streaming sites, use download-media.",
"For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.",
],
exec=self.run,
)
self.register()
@@ -121,6 +125,7 @@ class Download_File(Cmdlet):
"match_provider_name_for_url": _match_provider_name_for_url,
"SearchResult": _SearchResult,
}
except Exception:
return {
"get_search_provider": None,
@@ -129,6 +134,154 @@ class Download_File(Cmdlet):
"SearchResult": None,
}
@staticmethod
def _maybe_show_internetarchive_formats(
*,
raw_urls: Sequence[str],
piped_items: Sequence[Any],
parsed: Dict[str, Any],
config: Dict[str, Any],
quiet_mode: bool,
) -> Optional[int]:
"""If the input is an IA item page, show a selectable formats table.
Returns an exit code when handled; otherwise None.
"""
if quiet_mode:
return None
try:
total_inputs = int(len(raw_urls or []) + len(piped_items or []))
except Exception:
total_inputs = 0
if total_inputs != 1:
return None
item = piped_items[0] if piped_items else None
target = ""
if item is not None:
try:
target = str(get_field(item, "path") or get_field(item, "url") or "").strip()
except Exception:
target = ""
if not target and raw_urls:
target = str(raw_urls[0]).strip()
if not target:
return None
try:
from Provider import internetarchive as ia
except Exception:
return None
identifier = ""
try:
md = get_field(item, "full_metadata") if item is not None else None
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
except Exception:
identifier = ""
if not identifier:
try:
identifier = str(ia.extract_identifier(target) or "").strip()
except Exception:
identifier = ""
if not identifier:
return None
# Only show picker for item pages (details); direct download URLs should download immediately.
try:
if not ia.is_details_url(target):
return None
except Exception:
return None
try:
files = ia.list_download_files(identifier)
except Exception as exc:
log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
return 1
if not files:
log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
return 1
title = ""
try:
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
try:
from result_table import ResultTable
except Exception as exc:
log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
return 1
base_args: List[str] = []
out_arg = parsed.get("path") or parsed.get("output")
if out_arg:
base_args.extend(["-path", str(out_arg)])
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
table.set_source_command("download-file", base_args)
rows: List[Dict[str, Any]] = []
for f in files:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
direct_url = str(f.get("direct_url") or "").strip()
if not direct_url:
continue
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
pass
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-file: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception:
pass
log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
return 0
@staticmethod
def _openlibrary_edition_id_from_url(u: str) -> str:
try:
@@ -284,11 +437,11 @@ class Download_File(Cmdlet):
post = None
title_hint = None
tags_hint: List[str] = []
tg_tags: List[str] = []
if channel:
tags_hint.append(f"channel:{channel}")
tg_tags.append(f"channel:{channel}")
if post is not None:
tags_hint.append(f"post:{post}")
tg_tags.append(f"post:{post}")
if channel and post is not None:
title_hint = f"{channel} {post}"
elif post is not None:
@@ -300,7 +453,7 @@ class Download_File(Cmdlet):
downloaded_path=downloaded_path,
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=tg_tags,
media_kind_hint="file",
full_metadata=telegram_info,
provider_hint="telegram",
@@ -481,14 +634,15 @@ class Download_File(Cmdlet):
# Otherwise, try provider.download(SearchResult) with the URL as the target.
if provider is not None:
sr_obj = None
try:
sr = SearchResult(
sr_obj = SearchResult(
table=str(provider_name),
title=str(url),
path=str(url),
full_metadata={},
)
downloaded_path = provider.download(sr, final_output_dir) # type: ignore[call-arg]
downloaded_path = provider.download(sr_obj, final_output_dir) # type: ignore[call-arg]
except Exception:
downloaded_path = None
@@ -498,24 +652,25 @@ class Download_File(Cmdlet):
raise DownloadError("LibGen URL did not resolve to a downloadable file")
if downloaded_path:
tags_hint: Optional[List[str]] = None
emit_tags: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name).lower() == "libgen":
if str(provider_name).lower() == "libgen" and sr_obj is not None:
media_kind_hint = "book"
try:
sr_tags = getattr(sr, "tag", None)
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
emit_tags = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
emit_tags = None
try:
if isinstance(getattr(sr, "full_metadata", None), dict):
full_md = sr.full_metadata
t = str(full_md.get("title") or "").strip()
sr_full_md = getattr(sr_obj, "full_metadata", None)
if isinstance(sr_full_md, dict):
full_md = sr_full_md
t = str(sr_full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
@@ -525,7 +680,7 @@ class Download_File(Cmdlet):
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=emit_tags,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
@@ -802,6 +957,17 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
quiet_mode=quiet_mode,
)
if ia_picker_exit is not None:
return int(ia_picker_exit)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
if not final_output_dir:
@@ -817,7 +983,6 @@ class Download_File(Cmdlet):
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
registry = self._load_provider_registry()
downloaded_count = 0

View File

@@ -30,9 +30,7 @@ CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-query \"hash:<sha256>\"]",
alias=[
"get-rel",
],
alias=[],
arg=[
SharedArgs.QUERY,
SharedArgs.STORE,

View File

@@ -1054,7 +1054,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a website screenshot",
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
usage="screen-shot <url> [options]",
alias=["screenshot", "ss"],
arg=[
SharedArgs.URL,

View File

@@ -65,7 +65,7 @@ class Search_Provider(Cmdlet):
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-data",
" search-provider -provider bandcamp \"artist:grace\" | @1 | download-file",
],
exec=self.run
)

View File

@@ -606,7 +606,7 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any]
rows with @2, @2-5, @{1,3,5} syntax in subsequent commands.
Also maintains a history stack for @.. navigation (restore previous result table).
Only selectable commands (search-file, download-data) should call this to create history.
Only selectable commands (search-file, download-file when listing options) should call this to create history.
For action commands (delete-tag, add-tags, etc), use set_last_result_table_preserve_history() instead.
Args:
@@ -878,7 +878,7 @@ def get_last_result_table_source_command() -> Optional[str]:
"""Get the source command from the last displayed result table.
Returns:
Command name (e.g., 'download-data') or None if not set
Command name (e.g., 'download-file') or None if not set
"""
if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'source_command'):
return _LAST_RESULT_TABLE.source_command
@@ -916,7 +916,7 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
def set_current_stage_table(result_table: Optional[Any]) -> None:
"""Store the current pipeline stage table for @N expansion.
Used by cmdlet that display tabular results (e.g., download-data with formats)
Used by cmdlet that display tabular results (e.g., download-file listing formats)
to make their result table available for @N expansion logic.
Does NOT push to history - purely for command expansion in the current pipeline.
@@ -937,7 +937,7 @@ def get_current_stage_table_source_command() -> Optional[str]:
"""Get the source command from the current pipeline stage table.
Returns:
Command name (e.g., 'download-data') or None
Command name (e.g., 'download-file') or None
"""
if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'source_command'):
return _CURRENT_STAGE_TABLE.source_command

View File

@@ -415,7 +415,7 @@ class ResultTable:
self.input_options: Dict[str, InputOption] = {}
"""Options available for user input (cmdlet arguments)"""
self.source_command: Optional[str] = None
"""Command that generated this table (e.g., 'download-data URL')"""
"""Command that generated this table (e.g., 'download-file URL')"""
self.source_args: List[str] = []
"""Base arguments for the source command"""
self.header_lines: List[str] = []
@@ -476,7 +476,7 @@ class ResultTable:
source_command + source_args + row_selection_args | next-cmd
Args:
command: Command name (e.g., 'download-data')
command: Command name (e.g., 'download-file')
args: Base arguments for the command (e.g., ['URL'])
Returns: