dfd
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 21:24:27 -08:00
parent fcdd507d00
commit 8288ea8c66
16 changed files with 530 additions and 339 deletions

View File

@@ -47,7 +47,11 @@ class Download_File(Cmdlet):
CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
],
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
detail=[
"Download files directly via HTTP without yt-dlp processing.",
"For streaming sites, use download-media.",
"For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.",
],
exec=self.run,
)
self.register()
@@ -121,6 +125,7 @@ class Download_File(Cmdlet):
"match_provider_name_for_url": _match_provider_name_for_url,
"SearchResult": _SearchResult,
}
except Exception:
return {
"get_search_provider": None,
@@ -129,6 +134,154 @@ class Download_File(Cmdlet):
"SearchResult": None,
}
@staticmethod
def _maybe_show_internetarchive_formats(
*,
raw_urls: Sequence[str],
piped_items: Sequence[Any],
parsed: Dict[str, Any],
config: Dict[str, Any],
quiet_mode: bool,
) -> Optional[int]:
"""If the input is an IA item page, show a selectable formats table.
Returns an exit code when handled; otherwise None.
"""
if quiet_mode:
return None
try:
total_inputs = int(len(raw_urls or []) + len(piped_items or []))
except Exception:
total_inputs = 0
if total_inputs != 1:
return None
item = piped_items[0] if piped_items else None
target = ""
if item is not None:
try:
target = str(get_field(item, "path") or get_field(item, "url") or "").strip()
except Exception:
target = ""
if not target and raw_urls:
target = str(raw_urls[0]).strip()
if not target:
return None
try:
from Provider import internetarchive as ia
except Exception:
return None
identifier = ""
try:
md = get_field(item, "full_metadata") if item is not None else None
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
except Exception:
identifier = ""
if not identifier:
try:
identifier = str(ia.extract_identifier(target) or "").strip()
except Exception:
identifier = ""
if not identifier:
return None
# Only show picker for item pages (details); direct download URLs should download immediately.
try:
if not ia.is_details_url(target):
return None
except Exception:
return None
try:
files = ia.list_download_files(identifier)
except Exception as exc:
log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
return 1
if not files:
log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
return 1
title = ""
try:
title = str(get_field(item, "title") or "").strip() if item is not None else ""
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
try:
from result_table import ResultTable
except Exception as exc:
log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
return 1
base_args: List[str] = []
out_arg = parsed.get("path") or parsed.get("output")
if out_arg:
base_args.extend(["-path", str(out_arg)])
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
table.set_source_command("download-file", base_args)
rows: List[Dict[str, Any]] = []
for f in files:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
direct_url = str(f.get("direct_url") or "").strip()
if not direct_url:
continue
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
pass
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-file: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception:
pass
log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
return 0
@staticmethod
def _openlibrary_edition_id_from_url(u: str) -> str:
try:
@@ -284,11 +437,11 @@ class Download_File(Cmdlet):
post = None
title_hint = None
tags_hint: List[str] = []
tg_tags: List[str] = []
if channel:
tags_hint.append(f"channel:{channel}")
tg_tags.append(f"channel:{channel}")
if post is not None:
tags_hint.append(f"post:{post}")
tg_tags.append(f"post:{post}")
if channel and post is not None:
title_hint = f"{channel} {post}"
elif post is not None:
@@ -300,7 +453,7 @@ class Download_File(Cmdlet):
downloaded_path=downloaded_path,
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=tg_tags,
media_kind_hint="file",
full_metadata=telegram_info,
provider_hint="telegram",
@@ -481,14 +634,15 @@ class Download_File(Cmdlet):
# Otherwise, try provider.download(SearchResult) with the URL as the target.
if provider is not None:
sr_obj = None
try:
sr = SearchResult(
sr_obj = SearchResult(
table=str(provider_name),
title=str(url),
path=str(url),
full_metadata={},
)
downloaded_path = provider.download(sr, final_output_dir) # type: ignore[call-arg]
downloaded_path = provider.download(sr_obj, final_output_dir) # type: ignore[call-arg]
except Exception:
downloaded_path = None
@@ -498,24 +652,25 @@ class Download_File(Cmdlet):
raise DownloadError("LibGen URL did not resolve to a downloadable file")
if downloaded_path:
tags_hint: Optional[List[str]] = None
emit_tags: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name).lower() == "libgen":
if str(provider_name).lower() == "libgen" and sr_obj is not None:
media_kind_hint = "book"
try:
sr_tags = getattr(sr, "tag", None)
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
emit_tags = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
emit_tags = None
try:
if isinstance(getattr(sr, "full_metadata", None), dict):
full_md = sr.full_metadata
t = str(full_md.get("title") or "").strip()
sr_full_md = getattr(sr_obj, "full_metadata", None)
if isinstance(sr_full_md, dict):
full_md = sr_full_md
t = str(sr_full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
@@ -525,7 +680,7 @@ class Download_File(Cmdlet):
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=tags_hint,
tags_hint=emit_tags,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
@@ -802,6 +957,17 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
ia_picker_exit = self._maybe_show_internetarchive_formats(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
quiet_mode=quiet_mode,
)
if ia_picker_exit is not None:
return int(ia_picker_exit)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
if not final_output_dir:
@@ -817,7 +983,6 @@ class Download_File(Cmdlet):
progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
registry = self._load_provider_registry()
downloaded_count = 0