re
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
nose
2025-12-25 04:49:22 -08:00
parent 2542a68479
commit 43afa4e3fa
19 changed files with 2766 additions and 234 deletions

View File

@@ -319,6 +319,18 @@ class Download_File(Cmdlet):
except Exception:
provider_name = None
# Heuristic: LibGen often uses landing pages like edition.php/file.php.
# These should never be treated as direct file URLs.
if not provider_name:
try:
p = urlparse(str(url))
h = (p.hostname or "").strip().lower()
path = (p.path or "").strip().lower()
if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")):
provider_name = "libgen"
except Exception:
pass
if provider_name and get_provider is not None and SearchResult is not None:
# OpenLibrary URLs should be handled by the OpenLibrary provider.
if provider_name == "openlibrary":
@@ -391,11 +403,19 @@ class Download_File(Cmdlet):
progress.clear_status()
if downloaded_path:
tags_hint: Optional[List[str]] = None
try:
sr_tags = getattr(sr, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=None,
tags_hint=tags_hint,
media_kind_hint="book",
full_metadata=sr.full_metadata,
provider_hint="openlibrary",
@@ -472,14 +492,42 @@ class Download_File(Cmdlet):
except Exception:
downloaded_path = None
# Refuse to fall back to direct-download for LibGen landing pages.
# This prevents saving HTML (e.g. edition.php) as a bogus file.
if (not downloaded_path) and str(provider_name).lower() == "libgen":
raise DownloadError("LibGen URL did not resolve to a downloadable file")
if downloaded_path:
tags_hint: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name).lower() == "libgen":
media_kind_hint = "book"
try:
sr_tags = getattr(sr, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
try:
if isinstance(getattr(sr, "full_metadata", None), dict):
full_md = sr.full_metadata
t = str(full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
full_md = None
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=Path(str(downloaded_path)).stem,
tags_hint=None,
media_kind_hint="file",
full_metadata=None,
title_hint=title_hint,
tags_hint=tags_hint,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
progress=progress,
config=config,
@@ -600,6 +648,7 @@ class Download_File(Cmdlet):
# If this looks like a provider item and providers are available, prefer provider.download()
downloaded_path: Optional[Path] = None
attempted_provider_download = False
provider_sr = None
if table and get_search_provider and SearchResult:
provider = get_search_provider(str(table), config)
if provider is not None:
@@ -612,6 +661,7 @@ class Download_File(Cmdlet):
)
debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
downloaded_path = provider.download(sr, final_output_dir)
provider_sr = sr
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
@@ -693,6 +743,30 @@ class Download_File(Cmdlet):
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
continue
# Allow providers to add/enrich tags and metadata during download.
if str(table or "").lower() == "libgen" and provider_sr is not None:
try:
sr_tags = getattr(provider_sr, "tag", None)
if tags_list is None and isinstance(sr_tags, set) and sr_tags:
tags_list = sorted([str(t) for t in sr_tags if t])
except Exception:
pass
try:
sr_md = getattr(provider_sr, "full_metadata", None)
if isinstance(sr_md, dict) and sr_md:
full_metadata = sr_md
except Exception:
pass
try:
if isinstance(full_metadata, dict):
t = str(full_metadata.get("title") or "").strip()
if t:
title = t
except Exception:
pass
self._emit_local_file(
downloaded_path=downloaded_path,
source=str(target) if target else None,

View File

@@ -1593,6 +1593,36 @@ class Download_Media(Cmdlet):
debug("Preflight URL check: no matches")
return True
# If the user already answered the duplicate URL prompt for this pipeline/command,
# respect that decision and don't re-prompt for every item.
try:
current_cmd_text = pipeline_context.get_current_command_text("")
except Exception:
current_cmd_text = ""
try:
stage_ctx = pipeline_context.get_stage_context()
except Exception:
stage_ctx = None
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
except Exception:
cached_cmd = ""
cached_decision = None
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
results_list: List[Dict[str, Any]] = []
for item in url_matches:
@@ -1639,8 +1669,28 @@ class Download_Media(Cmdlet):
with cm:
get_stderr_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
if used_suspend:
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
# Cache decision for the duration of this pipeline/command.
if in_pipeline:
try:
existing = pipeline_context.load_value("preflight", default=None)
except Exception:
existing = None
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
url_dup_cache = preflight_cache.get("url_duplicates")
if not isinstance(url_dup_cache, dict):
url_dup_cache = {}
url_dup_cache["command"] = str(current_cmd_text or "")
url_dup_cache["continue"] = bool(answered_yes)
preflight_cache["url_duplicates"] = url_dup_cache
try:
pipeline_context.store_value("preflight", preflight_cache)
except Exception:
pass
if not answered_yes:
if in_pipeline and used_suspend:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
@@ -1666,6 +1716,35 @@ class Download_Media(Cmdlet):
debug("Bulk URL preflight skipped: storage unavailable")
return True
# Honor any prior duplicate URL decision for this pipeline/command.
try:
current_cmd_text = pipeline_context.get_current_command_text("")
except Exception:
current_cmd_text = ""
try:
stage_ctx = pipeline_context.get_stage_context()
except Exception:
stage_ctx = None
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
except Exception:
cached_cmd = ""
cached_decision = None
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
unique_urls: List[str] = []
for u in urls or []:
s = str(u or "").strip()
@@ -1930,7 +2009,32 @@ class Download_Media(Cmdlet):
cm = nullcontext()
with cm:
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
# Cache decision for the duration of this pipeline/command.
if in_pipeline:
try:
existing = pipeline_context.load_value("preflight", default=None)
except Exception:
existing = None
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
url_dup_cache = preflight_cache.get("url_duplicates")
if not isinstance(url_dup_cache, dict):
url_dup_cache = {}
url_dup_cache["command"] = str(current_cmd_text or "")
url_dup_cache["continue"] = bool(answered_yes)
preflight_cache["url_duplicates"] = url_dup_cache
try:
pipeline_context.store_value("preflight", preflight_cache)
except Exception:
pass
if not answered_yes:
if in_pipeline:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
return True
@@ -1947,6 +2051,20 @@ class Download_Media(Cmdlet):
if not isinstance(entries, list) or len(entries) <= 1:
return False
# Identify a stable table type so `@* | ...` pipelines can auto-insert the
# appropriate downloader stage (e.g., Bandcamp selections should insert
# `download-media` before `merge-file`).
extractor_name = ""
try:
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower()
except Exception:
extractor_name = ""
table_type: Optional[str] = None
if "bandcamp" in extractor_name:
table_type = "bandcamp"
elif "youtube" in extractor_name:
table_type = "youtube"
# Display table (limit rows to keep output reasonable)
max_rows = 200
display_entries = entries[:max_rows]
@@ -1976,6 +2094,11 @@ class Download_Media(Cmdlet):
table = ResultTable()
safe_url = str(url or "").strip()
table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media"
if table_type:
try:
table.set_table(table_type)
except Exception:
table.table = table_type
table.set_source_command("download-media", [])
try:
table.set_preserve_order(True)
@@ -2581,7 +2704,10 @@ class Download_Media(Cmdlet):
PipelineProgress(pipeline_context).step("finalized")
stage_ctx = pipeline_context.get_stage_context()
emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
# Emit per downloaded item whenever we're running under the pipeline runner.
# Live progress advances on emit(), and suppressing emits for the last stage
# causes the pipe bar to stay at 0% even while downloads complete.
emit_enabled = bool(stage_ctx is not None)
for pipe_obj_dict in pipe_objects:
if emit_enabled:
pipeline_context.emit(pipe_obj_dict)

View File

@@ -85,7 +85,12 @@ class Get_Url(Cmdlet):
if title:
table_title = f"Title: {title}"
table = ResultTable(table_title, max_columns=1).set_preserve_order(True)
table = (
ResultTable(table_title, max_columns=1)
.set_preserve_order(True)
.set_table("url")
.set_value_case("preserve")
)
table.set_source_command("get-url", [])
items: List[UrlItem] = []