This commit is contained in:
2026-01-03 03:37:48 -08:00
parent 6e9a0c28ff
commit 73f3005393
23 changed files with 1791 additions and 442 deletions

View File

@@ -28,6 +28,7 @@ class CmdletArg:
"""Argument type: 'string', 'int', 'flag', 'enum', etc."""
required: bool = False
"""Whether this argument is required"""
description: str = ""
"""Human-readable description of the argument"""
choices: List[str] = field(default_factory=list)
@@ -424,6 +425,8 @@ class Cmdlet:
"""List of arguments accepted by this cmdlet"""
detail: List[str] = field(default_factory=list)
"""Detailed explanation lines (for help text)"""
examples: List[str] = field(default_factory=list)
"""Example invocations shown in `.help`."""
# Execution function: func(result, args, config) -> int
exec: Optional[Callable[[Any,
Sequence[str],

View File

@@ -185,6 +185,9 @@ class Add_File(Cmdlet):
" file.io: Upload to file.io for temporary hosting",
" internetarchive: Upload to archive.org (optional tag: ia:<identifier> to upload into an existing item)",
],
examples=[
'download-file "https://themathesontrust.org/papers/christianity/alcock-alphabet1.pdf" | add-file -store tutorial',
],
exec=self.run,
)
self.register()
@@ -1950,6 +1953,18 @@ class Add_File(Cmdlet):
except Exception as exc:
debug(f"[add-file] sub note write failed: {exc}")
lyric_note = Add_File._get_note_text(result, pipe_obj, "lyric")
if lyric_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
debug(
f"[add-file] Writing lyric note (len={len(str(lyric_note))}) to {backend_name}:{resolved_hash}"
)
setter(resolved_hash, "lyric", lyric_note)
except Exception as exc:
debug(f"[add-file] lyric note write failed: {exc}")
chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
if chapters_note:
try:

View File

@@ -15,6 +15,8 @@ from typing import Any, Dict, List, Optional, Sequence
from urllib.parse import urlparse
from contextlib import AbstractContextManager, nullcontext
import requests
from API.alldebrid import is_magnet_link
from Provider import internetarchive as ia_provider
from Provider import alldebrid as ad_provider
@@ -142,6 +144,85 @@ class Download_File(Cmdlet):
return expanded_urls
@staticmethod
def _rewrite_archive_org_urls(raw_urls: Sequence[str]) -> List[str]:
"""Rewrite Archive.org URLs using metadata JSON to pick the right flow.
- /metadata/<id>:
- if lendable (collection contains inlibrary/printdisabled/lendinglibrary) -> /borrow/<id>
- else -> /details/<id>
- /details/<id>:
- if lendable -> /borrow/<id>
This makes `download-file` do the right thing for borrow-only items.
"""
out: List[str] = []
for u in list(raw_urls or []):
s = str(u or "").strip()
if not s:
continue
try:
p = urlparse(s)
host = (p.hostname or "").strip().lower()
path = (p.path or "").strip()
except Exception:
out.append(s)
continue
if not host or (host != "archive.org" and not host.endswith(".archive.org")):
out.append(s)
continue
low_path = path.lower().strip()
if not (low_path.startswith("/metadata/") or low_path.startswith("/details/")):
out.append(s)
continue
parts = [x for x in path.split("/") if x]
if len(parts) < 2:
out.append(s)
continue
head = str(parts[0] or "").strip().lower()
archive_id = str(parts[1] or "").strip()
if head not in {"metadata", "details"} or not archive_id:
out.append(s)
continue
lendable = False
try:
meta_url = f"https://archive.org/metadata/{archive_id}"
resp = requests.get(meta_url, timeout=8)
resp.raise_for_status()
data = resp.json() if resp is not None else {}
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
collection = meta.get("collection") if isinstance(meta, dict) else None
values: List[str] = []
if isinstance(collection, list):
values = [str(x).strip().lower() for x in collection if str(x).strip()]
elif isinstance(collection, str):
values = [collection.strip().lower()] if collection.strip() else []
lendable = any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values)
except Exception:
lendable = False
if lendable:
debug(f"[download-file] archive.org item '{archive_id}' looks lendable; using borrow flow")
out.append(f"https://archive.org/borrow/{archive_id}")
continue
# Non-lendable: turn metadata URLs into details URLs so IA picker can show files.
if head == "metadata":
out.append(f"https://archive.org/details/{archive_id}")
continue
out.append(s)
return out
@staticmethod
def _collect_piped_items_if_no_urls(result: Any,
raw_urls: Sequence[str]) -> List[Any]:
@@ -232,6 +313,14 @@ class Download_File(Cmdlet):
title_val = (title_hint or downloaded_path.stem
or "Unknown").strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
notes: Optional[Dict[str, str]] = None
try:
if isinstance(full_metadata, dict):
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
if isinstance(subtitles, str) and subtitles.strip():
notes = {"lyric": subtitles}
except Exception:
notes = None
tag: List[str] = []
if tags_hint:
tag.extend([str(t) for t in tags_hint if t])
@@ -253,6 +342,8 @@ class Download_File(Cmdlet):
payload["provider"] = str(provider_hint)
if full_metadata:
payload["full_metadata"] = full_metadata
if notes:
payload["notes"] = notes
if source and str(source).startswith("http"):
payload["url"] = source
elif source:
@@ -890,52 +981,85 @@ class Download_File(Cmdlet):
msg += f" (availability={availability or ''} reason={reason or ''})"
log(msg, file=sys.stderr)
# Fallback: run a LibGen title search so the user can pick an alternative source.
# Fallback: show a LibGen selectable ResultTable (no emits) so the user can pick @N.
# This intentionally mirrors `search-file -provider libgen` UX: results table + selection.
try:
title_text = str(title or "").strip()
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
if title_text and get_search_provider and SearchResult:
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET
fallback_query = title_text
exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None)
if not callable(exec_fn):
libgen_provider = get_search_provider("libgen", config)
if libgen_provider is None:
log(
"[download-file] search-file cmdlet unavailable; cannot run LibGen fallback search",
"[download-file] LibGen provider unavailable; cannot run fallback search",
file=sys.stderr,
)
continue
ret = exec_fn(
None,
["-provider",
"libgen",
"-query",
fallback_query],
config,
)
# Promote the search-file table to a display overlay so it renders.
try:
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()
if table_obj is not None:
pipeline_context.set_last_result_table_overlay(
table_obj,
items_obj
)
from SYS.result_table import ResultTable
except Exception:
ResultTable = None # type: ignore[assignment]
if ResultTable is None:
log(
"[download-file] ResultTable unavailable; cannot render LibGen fallback search",
file=sys.stderr,
)
continue
fallback_query = title_text
# Keep parity with search-file provider default when user didn't specify a limit.
results = libgen_provider.search(fallback_query, limit=50)
if not results:
log(
f"[download-file] LibGen: no results found for: {fallback_query}",
file=sys.stderr,
)
continue
table_title = f"Libgen: {fallback_query}".strip().rstrip(":")
table_obj = ResultTable(table_title).set_preserve_order(False)
table_obj.set_table("libgen")
try:
table_obj.set_table_metadata({"provider": "libgen"})
except Exception:
pass
# Mark as produced by download-file so the pipeline runner pauses and stores tail stages.
table_obj.set_source_command("download-file", [])
results_list: List[Dict[str, Any]] = []
for search_result in results:
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
if isinstance(search_result, dict)
else {"title": str(search_result)}
)
if "table" not in item_dict:
item_dict["table"] = "libgen"
table_obj.add_result(search_result)
results_list.append(item_dict)
# Seed selection state for @N and pause the pipeline.
try:
return int(ret) # type: ignore[arg-type]
pipeline_context.set_last_result_table(table_obj, results_list)
except Exception:
return 1
pass
try:
pipeline_context.set_current_stage_table(table_obj)
except Exception:
pass
# Returning 0 with a selectable stage table and no emits causes the CLI to render
# the table and pause, preserving the downstream pipeline tail.
return 0
except Exception:
pass
@@ -976,6 +1100,15 @@ class Download_File(Cmdlet):
)
continue
# Prefer provider-enriched metadata (providers may mutate sr.full_metadata).
if provider_sr is not None:
try:
sr_md = getattr(provider_sr, "full_metadata", None)
if isinstance(sr_md, dict) and sr_md:
full_metadata = sr_md
except Exception:
pass
# Allow providers to add/enrich tags and metadata during download.
if str(table or "").lower() == "libgen" and provider_sr is not None:
try:
@@ -3305,6 +3438,7 @@ class Download_File(Cmdlet):
parsed = parse_cmdlet_args(args, self)
raw_url = self._normalize_urls(parsed)
raw_url = self._rewrite_archive_org_urls(raw_url)
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
had_piped_input = False
@@ -3346,6 +3480,26 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
# Internet Archive details URLs should present a downloadable file picker
# before we try any streaming/ytdlp probing.
try:
quiet_mode = (
bool(config.get("_quiet_background_output"))
if isinstance(config, dict) else False
)
except Exception:
quiet_mode = False
ia_picker_exit = ia_provider.maybe_show_formats_table(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
quiet_mode=quiet_mode,
get_field=get_field,
)
if ia_picker_exit is not None:
return int(ia_picker_exit)
streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result)
supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates)
@@ -3360,13 +3514,13 @@ class Download_File(Cmdlet):
)
if streaming_exit_code == 0:
streaming_downloaded += 1
# Only remove URLs from further processing when streaming succeeded.
raw_url = [u for u in raw_url if u not in supported_streaming]
if not raw_url and not unsupported_streaming:
piped_items = []
raw_url = [u for u in raw_url if u not in supported_streaming]
if not raw_url and not unsupported_streaming:
piped_items = []
if not raw_url and not piped_items:
return int(streaming_exit_code or 0)
if not raw_url and not piped_items:
return int(streaming_exit_code or 0)
quiet_mode = (
bool(config.get("_quiet_background_output"))

View File

@@ -110,6 +110,17 @@ class search_file(Cmdlet):
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
@staticmethod
def _get_hifi_view_from_query(query: str) -> str:
text = str(query or "").strip()
if not text:
return "track"
if re.search(r"\balbum\s*:", text, flags=re.IGNORECASE):
return "album"
if re.search(r"\bartist\s*:", text, flags=re.IGNORECASE):
return "artist"
return "track"
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
store_value = str(payload.get("store") or "").lower()
@@ -236,9 +247,18 @@ class search_file(Cmdlet):
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_lower in {"youtube", "openlibrary", "loc"}
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table_type = provider_name
table_meta: Dict[str, Any] = {"provider": provider_name}
if provider_lower == "hifi":
view = self._get_hifi_view_from_query(query)
table_meta["view"] = view
table_type = f"hifi.{view}"
elif provider_lower == "internetarchive":
# Internet Archive search results are effectively folders (items); selecting @N
# should open a list of downloadable files for the chosen item.
table_type = "internetarchive.folder"
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(table_type)
if provider_lower == "alldebrid":
table_meta["view"] = "files" if effective_open_id is not None else "folders"
if effective_open_id is not None:
@@ -277,7 +297,7 @@ class search_file(Cmdlet):
)
if "table" not in item_dict:
item_dict["table"] = provider_name
item_dict["table"] = table_type
row_index = len(table.rows)
table.add_result(search_result)