df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 14:50:59 -08:00
parent 22af776ee2
commit fcdd507d00
12 changed files with 1004 additions and 66 deletions

View File

@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
s = s.replace("\u2011", "-") # non-breaking hyphen
s = s.replace("\u2012", "-") # figure dash
s = s.replace("\u2015", "-") # horizontal bar
# Collapse any whitespace runs (including newlines/tabs) to a single space.
# Some sources wrap the artist name or title across lines.
try:
s = re.sub(r"\s+", " ", s).strip()
except Exception:
s = " ".join(s.split())
return s
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
if ch.isspace():
while i < len(literal) and literal[i].isspace():
i += 1
out.append(r"\\s*")
out.append(r"\s*")
continue
out.append(re.escape(ch))
i += 1
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
raise ValueError("extract template must contain at least one (field)")
field_names: List[str] = []
parts: List[str] = [r"^\\s*"]
parts: List[str] = [r"^\s*"]
last_end = 0
for idx, m in enumerate(matches):
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
field_names.append(raw_name)
name_lower = raw_name.lower()
is_last = idx == (len(matches) - 1)
if is_last:
parts.append(fr"(?P<{raw_name}>.+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
# Heuristic: common numeric fields should capture full digit runs.
# This avoids ambiguous splits like track='2', title='3 ...'.
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
parts.append(fr"(?P<{raw_name}>\d+)")
else:
parts.append(fr"(?P<{raw_name}>.+?)")
last_end = m.end()
tail = tpl[last_end:]
if tail:
parts.append(_literal_to_title_pattern_regex(tail))
parts.append(r"\\s*$")
parts.append(r"\s*$")
rx = "".join(parts)
return re.compile(rx, flags=re.IGNORECASE), field_names

426
cmdlet/archive_file.py Normal file
View File

@@ -0,0 +1,426 @@
"""Create a single .tar.zst archive from piped file selections."""
from __future__ import annotations
import re
import sys
import tarfile
import tempfile
import time
import uuid
from pathlib import Path
from typing import Any, Dict, List, Sequence, Set
from urllib.parse import parse_qs, urlparse
from SYS.logger import log
import pipeline as ctx
from config import resolve_output_dir
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
coerce_to_pipe_object = sh.coerce_to_pipe_object
create_pipe_object_result = sh.create_pipe_object_result
parse_cmdlet_args = sh.parse_cmdlet_args
should_show_help = sh.should_show_help
_SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
def _extract_sha256_hex(item: Any) -> str:
try:
if isinstance(item, dict):
h = item.get("hash")
else:
h = getattr(item, "hash", None)
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _extract_store_name(item: Any) -> str:
try:
if isinstance(item, dict):
s = item.get("store")
else:
s = getattr(item, "store", None)
return str(s or "").strip()
except Exception:
return ""
def _extract_url(item: Any) -> str:
try:
u = sh.get_field(item, "url") or sh.get_field(item, "target")
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
return u.strip()
except Exception:
pass
return ""
def _extract_hash_from_hydrus_file_url(url: str) -> str:
try:
parsed = urlparse(str(url))
if not (parsed.path or "").endswith("/get_files/file"):
return ""
qs = parse_qs(parsed.query or "")
h = (qs.get("hash") or [""])[0]
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
return h.strip().lower()
except Exception:
pass
return ""
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
except Exception:
instances = set()
return instances
def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
"""Download a Hydrus-backed item to a local temp path (best-effort).
This is intentionally side-effect free except for writing the local temp file.
"""
try:
from config import get_hydrus_access_key, get_hydrus_url
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
except Exception:
return None
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
if not file_hash:
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
# Prefer store name as instance key; fall back to "home".
access_key = None
hydrus_url = None
for inst in [s for s in [store_lower, "home"] if s]:
try:
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
if access_key and hydrus_url:
break
except Exception:
access_key = None
hydrus_url = None
if not access_key or not hydrus_url:
return None
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
# Best-effort extension from Hydrus metadata.
suffix = ".hydrus"
try:
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
entry = entries[0]
if isinstance(entry, dict):
ext = entry.get("ext")
if isinstance(ext, str) and ext.strip():
cleaned = ext.strip()
if not cleaned.startswith("."):
cleaned = "." + cleaned.lstrip(".")
if len(cleaned) <= 12:
suffix = cleaned
except Exception:
pass
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
dest = output_dir / f"{file_hash}{suffix}"
if dest.exists():
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
headers = {"Hydrus-Client-API-Access-Key": access_key}
download_hydrus_file(file_url, headers, dest, timeout=60.0)
try:
if dest.exists() and dest.is_file():
return dest
except Exception:
return None
return None
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
from Store import Store
store = Store(config)
backend = store[store_name]
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
return None, None
def _unique_arcname(name: str, seen: Set[str]) -> str:
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
parsed = parse_cmdlet_args(args, CMDLET)
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = []
if isinstance(result, list):
items = list(result)
elif result is not None:
items = [result]
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
except Exception:
pass
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
CMDLET = Cmdlet(
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -297,51 +297,81 @@ class Delete_File(sh.Cmdlet):
should_try_hydrus = False
if should_try_hydrus and hash_hex:
client = None
if store:
# Store specified: do not fall back to a global/default Hydrus client.
try:
registry = Store(config)
backend = registry[str(store)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "_post"):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
return False
else:
# No store context; use default Hydrus client.
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log("Hydrus client unavailable", file=sys.stderr)
return False
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
# Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
# This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
did_backend_delete = False
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
if backend is not None:
deleter = getattr(backend, "delete_file", None)
if callable(deleter):
did_backend_delete = bool(deleter(hash_hex, reason=reason))
except Exception:
did_backend_delete = False
if did_backend_delete:
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return []
else:
# Fallback to direct client calls.
client = None
if store:
# Store specified: do not fall back to a global/default Hydrus client.
try:
registry = Store(config)
backend = registry[str(store)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "_post"):
client = candidate
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
return False
else:
# No store context; use default Hydrus client.
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
if not local_deleted:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return False
if client is None:
if not local_deleted:
log("Hydrus client unavailable", file=sys.stderr)
return False
payload: Dict[str, Any] = {"hashes": [hash_hex]}
if reason:
payload["reason"] = reason
try:
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
# Best-effort clear deletion record if supported by this client.
try:
clearer = getattr(client, "clear_file_deletion_record", None)
if callable(clearer):
clearer([hash_hex])
else:
client._post("/add_files/clear_file_deletion_record", data={"hashes": [hash_hex]}) # type: ignore[attr-defined]
except Exception:
pass
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
except Exception:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return []
if hydrus_deleted and hash_hex:
size_hint = None

267
cmdlet/download_data.py Normal file
View File

@@ -0,0 +1,267 @@
"""Smart downloader front-door.
Currently focused on Internet Archive item pages:
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
- Selecting a row via @N expands to download-file <direct-url>
This enables:
search-provider -provider internetarchive "..."
@3 # shows formats table
@2 | add-file ... # downloads selected file then pipes to add-file
"""
from __future__ import annotations
import re
import sys
from typing import Any, Dict, List, Sequence, cast
from urllib.parse import quote
from SYS.logger import log, debug
import pipeline as pipeline_context
from result_table import ResultTable
from . import _shared as sh
Cmdlet = sh.Cmdlet
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
def _extract_ia_identifier(text: str) -> str:
s = str(text or "").strip()
if not s:
return ""
# https://archive.org/details/<identifier>
m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
if m:
return str(m.group(1) or "").strip()
# internetarchive:<identifier>
if s.lower().startswith("internetarchive:"):
return s.split(":", 1)[-1].strip()
return ""
class Download_Data(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="download-data",
summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
alias=[],
arg=[SharedArgs.URL],
detail=[
"For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
"Select a file row with @N to run download-file on that direct URL.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
# parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
parsed = parse_cmdlet_args(args, cast(Any, self))
except Exception:
parsed = {}
raw_urls = parsed.get("url", [])
if isinstance(raw_urls, str):
raw_urls = [raw_urls]
url_arg = str(raw_urls[0]).strip() if raw_urls else ""
piped_items: List[Any] = []
if isinstance(result, list):
piped_items = list(result)
elif result is not None:
piped_items = [result]
# Prefer piped item target if present.
target = ""
if piped_items:
target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
if not target:
target = url_arg
table_name = ""
try:
table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
except Exception:
table_name = ""
identifier = ""
if piped_items:
md = get_field(piped_items[0], "full_metadata")
if isinstance(md, dict):
identifier = str(md.get("identifier") or "").strip()
if not identifier:
identifier = _extract_ia_identifier(target)
if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
return 1
@staticmethod
def _run_internetarchive(item: Any, *, identifier: str) -> int:
try:
from Provider.internetarchive import _ia as _ia_loader
except Exception as exc:
log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
return 1
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
try:
source = str(f.get("source") or "").strip().lower()
fmt = str(f.get("format") or "").strip().lower()
except Exception:
source = ""
fmt = ""
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
ia = None
try:
ia = _ia_loader()
except Exception as exc:
log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
return 1
try:
get_item = getattr(ia, "get_item", None)
if not callable(get_item):
raise Exception("internetarchive.get_item is not available")
ia_item = cast(Any, get_item(str(identifier)))
except Exception as exc:
log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
return 1
files: List[Dict[str, Any]] = []
try:
raw_files = getattr(ia_item, "files", None)
if isinstance(raw_files, list):
for f in raw_files:
if isinstance(f, dict):
files.append(f)
except Exception:
files = []
if not files:
try:
for f in ia_item.get_files():
name = getattr(f, "name", None)
if not name and isinstance(f, dict):
name = f.get("name")
if not name:
continue
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
if not files:
log("download-data: Internet Archive item has no files", file=sys.stderr)
return 1
# Prefer non-metadata files for the picker.
candidates = [f for f in files if not _is_ia_metadata_file(f)]
if not candidates:
candidates = list(files)
def _key(f: Dict[str, Any]) -> tuple[str, str]:
fmt = str(f.get("format") or "").strip().lower()
name = str(f.get("name") or "").strip().lower()
return (fmt, name)
candidates.sort(key=_key)
title = ""
try:
title = str(get_field(item, "title") or "").strip()
except Exception:
title = ""
table_title = f"Internet Archive: {title}".strip().rstrip(":")
if not title:
table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
table = ResultTable(table_title).set_preserve_order(True)
table.set_table("internetarchive.formats")
# Selecting a row should expand to `download-file <direct-url>`.
table.set_source_command("download-file", [])
rows: List[Dict[str, Any]] = []
for f in candidates:
name = str(f.get("name") or "").strip()
if not name:
continue
fmt = str(f.get("format") or "").strip()
src = str(f.get("source") or "").strip()
size_val: Any = f.get("size")
try:
size_val = int(size_val) if size_val not in (None, "") else ""
except Exception:
# Keep as-is; ResultTable will stringify.
pass
direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
row_item: Dict[str, Any] = {
"table": "internetarchive",
"title": fmt or name,
"path": direct_url,
"url": direct_url,
"columns": [
("Format", fmt),
("Name", name),
("Size", size_val),
("Source", src),
],
# Used by @N expansion: download-file <direct-url>
"_selection_args": [direct_url],
"full_metadata": {
"identifier": identifier,
"name": name,
"format": fmt,
"source": src,
"size": f.get("size"),
},
}
rows.append(row_item)
table.add_result(row_item)
if not rows:
log("download-data: no downloadable files found for this item", file=sys.stderr)
return 1
try:
pipeline_context.set_last_result_table(table, rows, subject=item)
pipeline_context.set_current_stage_table(table)
except Exception as exc:
debug(f"[download-data] Failed to register result table: {exc}")
return 0
CMDLET = Download_Data()

View File

@@ -16,6 +16,7 @@ from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
create_pipe_object_result = sh.create_pipe_object_result
get_field = sh.get_field
get_pipe_object_hash = sh.get_pipe_object_hash
@@ -37,7 +38,6 @@ except ImportError:
try:
from metadata import (
read_tags_from_file,
dedup_tags_by_namespace,
merge_multiple_tag_lists,
)
HAS_METADATA_API = True
@@ -87,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
delete_after = parsed.get("delete", False)
output_override: Optional[Path] = None
output_arg = parsed.get("output")
output_arg = parsed.get("path")
if output_arg:
try:
output_override = Path(str(output_arg)).expanduser()
@@ -928,10 +928,10 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
CMDLET = Cmdlet(
name="merge-file",
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
arg=[
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
CmdletArg("-output", description="Override output file path."),
SharedArgs.PATH,
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
],
detail=[