df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
This commit is contained in:
@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
|
||||
s = s.replace("\u2011", "-") # non-breaking hyphen
|
||||
s = s.replace("\u2012", "-") # figure dash
|
||||
s = s.replace("\u2015", "-") # horizontal bar
|
||||
|
||||
# Collapse any whitespace runs (including newlines/tabs) to a single space.
|
||||
# Some sources wrap the artist name or title across lines.
|
||||
try:
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
except Exception:
|
||||
s = " ".join(s.split())
|
||||
return s
|
||||
|
||||
|
||||
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
|
||||
if ch.isspace():
|
||||
while i < len(literal) and literal[i].isspace():
|
||||
i += 1
|
||||
out.append(r"\\s*")
|
||||
out.append(r"\s*")
|
||||
continue
|
||||
out.append(re.escape(ch))
|
||||
i += 1
|
||||
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
||||
raise ValueError("extract template must contain at least one (field)")
|
||||
|
||||
field_names: List[str] = []
|
||||
parts: List[str] = [r"^\\s*"]
|
||||
parts: List[str] = [r"^\s*"]
|
||||
last_end = 0
|
||||
|
||||
for idx, m in enumerate(matches):
|
||||
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
||||
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
|
||||
field_names.append(raw_name)
|
||||
|
||||
name_lower = raw_name.lower()
|
||||
is_last = idx == (len(matches) - 1)
|
||||
if is_last:
|
||||
parts.append(fr"(?P<{raw_name}>.+)")
|
||||
else:
|
||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||
# Heuristic: common numeric fields should capture full digit runs.
|
||||
# This avoids ambiguous splits like track='2', title='3 ...'.
|
||||
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
|
||||
parts.append(fr"(?P<{raw_name}>\d+)")
|
||||
else:
|
||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||
|
||||
last_end = m.end()
|
||||
|
||||
tail = tpl[last_end:]
|
||||
if tail:
|
||||
parts.append(_literal_to_title_pattern_regex(tail))
|
||||
parts.append(r"\\s*$")
|
||||
parts.append(r"\s*$")
|
||||
|
||||
rx = "".join(parts)
|
||||
return re.compile(rx, flags=re.IGNORECASE), field_names
|
||||
|
||||
426
cmdlet/archive_file.py
Normal file
426
cmdlet/archive_file.py
Normal file
@@ -0,0 +1,426 @@
|
||||
"""Create a single .tar.zst archive from piped file selections."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Sequence, Set
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from config import resolve_output_dir
|
||||
from . import _shared as sh
|
||||
|
||||
Cmdlet = sh.Cmdlet
|
||||
CmdletArg = sh.CmdletArg
|
||||
SharedArgs = sh.SharedArgs
|
||||
coerce_to_pipe_object = sh.coerce_to_pipe_object
|
||||
create_pipe_object_result = sh.create_pipe_object_result
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
should_show_help = sh.should_show_help
|
||||
|
||||
|
||||
_SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
|
||||
|
||||
|
||||
def _extract_sha256_hex(item: Any) -> str:
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
h = item.get("hash")
|
||||
else:
|
||||
h = getattr(item, "hash", None)
|
||||
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
|
||||
return h.strip().lower()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_store_name(item: Any) -> str:
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
s = item.get("store")
|
||||
else:
|
||||
s = getattr(item, "store", None)
|
||||
return str(s or "").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_url(item: Any) -> str:
|
||||
try:
|
||||
u = sh.get_field(item, "url") or sh.get_field(item, "target")
|
||||
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
|
||||
return u.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_hash_from_hydrus_file_url(url: str) -> str:
|
||||
try:
|
||||
parsed = urlparse(str(url))
|
||||
if not (parsed.path or "").endswith("/get_files/file"):
|
||||
return ""
|
||||
qs = parse_qs(parsed.query or "")
|
||||
h = (qs.get("hash") or [""])[0]
|
||||
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
|
||||
return h.strip().lower()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
|
||||
instances: Set[str] = set()
|
||||
try:
|
||||
store_cfg = config.get("store") if isinstance(config, dict) else None
|
||||
if isinstance(store_cfg, dict):
|
||||
hydrus_cfg = store_cfg.get("hydrusnetwork")
|
||||
if isinstance(hydrus_cfg, dict):
|
||||
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
|
||||
except Exception:
|
||||
instances = set()
|
||||
return instances
|
||||
|
||||
|
||||
def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
|
||||
"""Download a Hydrus-backed item to a local temp path (best-effort).
|
||||
|
||||
This is intentionally side-effect free except for writing the local temp file.
|
||||
"""
|
||||
try:
|
||||
from config import get_hydrus_access_key, get_hydrus_url
|
||||
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
store_name = _extract_store_name(item)
|
||||
store_lower = store_name.lower()
|
||||
hydrus_instances = _hydrus_instance_names(config)
|
||||
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
|
||||
|
||||
url = _extract_url(item)
|
||||
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
|
||||
if not file_hash:
|
||||
return None
|
||||
|
||||
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
|
||||
is_hydrus_url = False
|
||||
if url:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
|
||||
except Exception:
|
||||
is_hydrus_url = False
|
||||
if not (is_hydrus_url or store_hint):
|
||||
return None
|
||||
|
||||
# Prefer store name as instance key; fall back to "home".
|
||||
access_key = None
|
||||
hydrus_url = None
|
||||
for inst in [s for s in [store_lower, "home"] if s]:
|
||||
try:
|
||||
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
|
||||
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
|
||||
if access_key and hydrus_url:
|
||||
break
|
||||
except Exception:
|
||||
access_key = None
|
||||
hydrus_url = None
|
||||
|
||||
if not access_key or not hydrus_url:
|
||||
return None
|
||||
|
||||
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
|
||||
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
|
||||
|
||||
# Best-effort extension from Hydrus metadata.
|
||||
suffix = ".hydrus"
|
||||
try:
|
||||
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
|
||||
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
|
||||
if isinstance(entries, list) and entries:
|
||||
entry = entries[0]
|
||||
if isinstance(entry, dict):
|
||||
ext = entry.get("ext")
|
||||
if isinstance(ext, str) and ext.strip():
|
||||
cleaned = ext.strip()
|
||||
if not cleaned.startswith("."):
|
||||
cleaned = "." + cleaned.lstrip(".")
|
||||
if len(cleaned) <= 12:
|
||||
suffix = cleaned
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
dest = output_dir / f"{file_hash}{suffix}"
|
||||
if dest.exists():
|
||||
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
|
||||
|
||||
headers = {"Hydrus-Client-API-Access-Key": access_key}
|
||||
download_hydrus_file(file_url, headers, dest, timeout=60.0)
|
||||
try:
|
||||
if dest.exists() and dest.is_file():
|
||||
return dest
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
|
||||
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
|
||||
# 1) Direct local path
|
||||
try:
|
||||
po = coerce_to_pipe_object(item, None)
|
||||
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
|
||||
if raw_path:
|
||||
p = Path(str(raw_path)).expanduser()
|
||||
if p.exists():
|
||||
return p, None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2) Store-backed path
|
||||
file_hash = _extract_sha256_hex(item)
|
||||
store_name = _extract_store_name(item)
|
||||
if file_hash and store_name:
|
||||
try:
|
||||
from Store import Store
|
||||
|
||||
store = Store(config)
|
||||
backend = store[store_name]
|
||||
src = backend.get_file(file_hash)
|
||||
if isinstance(src, Path):
|
||||
if src.exists():
|
||||
return src, None
|
||||
elif isinstance(src, str) and src.strip():
|
||||
cand = Path(src).expanduser()
|
||||
if cand.exists():
|
||||
return cand, None
|
||||
# If the backend returns a URL (HydrusNetwork), download it.
|
||||
if src.strip().lower().startswith(("http://", "https://")):
|
||||
tmp_base = None
|
||||
try:
|
||||
tmp_base = config.get("temp") if isinstance(config, dict) else None
|
||||
except Exception:
|
||||
tmp_base = None
|
||||
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
|
||||
out_dir = out_dir / "archive" / "hydrus"
|
||||
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
|
||||
if downloaded is not None:
|
||||
return downloaded, downloaded
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3) Hydrus-backed items without backend.get_file path.
|
||||
try:
|
||||
tmp_base = config.get("temp") if isinstance(config, dict) else None
|
||||
except Exception:
|
||||
tmp_base = None
|
||||
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
|
||||
out_dir = out_dir / "archive" / "hydrus"
|
||||
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
|
||||
if downloaded is not None:
|
||||
return downloaded, downloaded
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def _unique_arcname(name: str, seen: Set[str]) -> str:
|
||||
base = str(name or "").replace("\\", "/")
|
||||
base = base.lstrip("/")
|
||||
if not base:
|
||||
base = "file"
|
||||
if base not in seen:
|
||||
seen.add(base)
|
||||
return base
|
||||
|
||||
stem = base
|
||||
suffix = ""
|
||||
if "/" not in base:
|
||||
p = Path(base)
|
||||
stem = p.stem
|
||||
suffix = p.suffix
|
||||
|
||||
n = 2
|
||||
while True:
|
||||
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
|
||||
if candidate not in seen:
|
||||
seen.add(candidate)
|
||||
return candidate
|
||||
n += 1
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
level_raw = parsed.get("level")
|
||||
try:
|
||||
level = int(level_raw) if level_raw is not None else 11
|
||||
except Exception:
|
||||
level = 11
|
||||
if level < 1:
|
||||
level = 1
|
||||
if level > 22:
|
||||
level = 22
|
||||
|
||||
# Output destination is controlled by the shared -path behavior in the pipeline runner.
|
||||
# This cmdlet always creates the archive in the configured output directory and emits it.
|
||||
|
||||
# Collect piped items; archive-file is a batch command (single output).
|
||||
items: List[Any] = []
|
||||
if isinstance(result, list):
|
||||
items = list(result)
|
||||
elif result is not None:
|
||||
items = [result]
|
||||
|
||||
if not items:
|
||||
log("No piped items provided to archive-file", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
temp_downloads: List[Path] = []
|
||||
try:
|
||||
paths: List[Path] = []
|
||||
for it in items:
|
||||
p, tmp = _resolve_existing_or_fetch_path(it, config)
|
||||
if p is None:
|
||||
continue
|
||||
paths.append(p)
|
||||
if tmp is not None:
|
||||
temp_downloads.append(tmp)
|
||||
|
||||
# Keep stable order, remove duplicates.
|
||||
uniq: List[Path] = []
|
||||
seen_paths: Set[str] = set()
|
||||
for p in paths:
|
||||
key = str(p.resolve()) if p.exists() else str(p)
|
||||
if key in seen_paths:
|
||||
continue
|
||||
seen_paths.add(key)
|
||||
uniq.append(p)
|
||||
paths = uniq
|
||||
|
||||
if not paths:
|
||||
log("No existing file paths found in piped items", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
out_dir = resolve_output_dir(config)
|
||||
try:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
stamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
out_path = out_dir / f"archive_{stamp}.tar.zst"
|
||||
try:
|
||||
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as exc:
|
||||
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Import zstandard lazily so the rest of the CLI still runs without it.
|
||||
try:
|
||||
import zstandard as zstd # type: ignore
|
||||
except Exception:
|
||||
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Write tar stream into zstd stream.
|
||||
try:
|
||||
with open(out_path, "wb") as out_handle:
|
||||
cctx = zstd.ZstdCompressor(level=level)
|
||||
with cctx.stream_writer(out_handle) as compressor:
|
||||
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
|
||||
seen_names: Set[str] = set()
|
||||
for p in paths:
|
||||
arcname = _unique_arcname(p.name, seen_names)
|
||||
# For directories, tarfile will include contents when recursive=True.
|
||||
try:
|
||||
tf.add(str(p), arcname=arcname, recursive=True)
|
||||
except Exception as exc:
|
||||
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Archive creation failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Emit a single artifact downstream.
|
||||
hash_value = None
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
hash_value = sha256_file(out_path)
|
||||
except Exception:
|
||||
hash_value = None
|
||||
|
||||
pipe_obj = create_pipe_object_result(
|
||||
source="archive",
|
||||
identifier=out_path.stem,
|
||||
file_path=str(out_path),
|
||||
cmdlet_name="archive-file",
|
||||
title=out_path.name,
|
||||
hash_value=hash_value,
|
||||
is_temp=True,
|
||||
store="PATH",
|
||||
extra={
|
||||
"target": str(out_path),
|
||||
"archive_format": "tar.zst",
|
||||
"compression": "zstd",
|
||||
"level": level,
|
||||
"source_count": len(paths),
|
||||
"source_paths": [str(p) for p in paths],
|
||||
},
|
||||
)
|
||||
ctx.emit(pipe_obj)
|
||||
return 0
|
||||
finally:
|
||||
# Best-effort cleanup of any temp Hydrus downloads we created.
|
||||
for tmp in temp_downloads:
|
||||
try:
|
||||
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
|
||||
except TypeError:
|
||||
try:
|
||||
if tmp.exists():
|
||||
tmp.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="archive-file",
|
||||
summary="Archive piped files into a single .tar.zst.",
|
||||
usage="@N | archive-file [-level <1-22>] [-path <path>]",
|
||||
arg=[
|
||||
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
|
||||
SharedArgs.PATH,
|
||||
],
|
||||
detail=[
|
||||
"- Example: @1-5 | archive-file",
|
||||
"- Default zstd level is 11.",
|
||||
"- Emits one output item (the archive) for downstream piping.",
|
||||
],
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -297,51 +297,81 @@ class Delete_File(sh.Cmdlet):
|
||||
should_try_hydrus = False
|
||||
|
||||
if should_try_hydrus and hash_hex:
|
||||
client = None
|
||||
if store:
|
||||
# Store specified: do not fall back to a global/default Hydrus client.
|
||||
try:
|
||||
registry = Store(config)
|
||||
backend = registry[str(store)]
|
||||
candidate = getattr(backend, "_client", None)
|
||||
if candidate is not None and hasattr(candidate, "_post"):
|
||||
client = candidate
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
|
||||
return False
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
|
||||
return False
|
||||
else:
|
||||
# No store context; use default Hydrus client.
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return False
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return False
|
||||
|
||||
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||
if reason:
|
||||
payload["reason"] = reason
|
||||
# Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
|
||||
# This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
|
||||
did_backend_delete = False
|
||||
try:
|
||||
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||
if backend is not None:
|
||||
deleter = getattr(backend, "delete_file", None)
|
||||
if callable(deleter):
|
||||
did_backend_delete = bool(deleter(hash_hex, reason=reason))
|
||||
except Exception:
|
||||
did_backend_delete = False
|
||||
|
||||
if did_backend_delete:
|
||||
hydrus_deleted = True
|
||||
title_str = str(title_val).strip() if title_val else ""
|
||||
if title_str:
|
||||
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
|
||||
else:
|
||||
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
|
||||
except Exception:
|
||||
# If it's not in Hydrus (e.g. 404 or similar), that's fine
|
||||
if not local_deleted:
|
||||
return []
|
||||
else:
|
||||
# Fallback to direct client calls.
|
||||
client = None
|
||||
if store:
|
||||
# Store specified: do not fall back to a global/default Hydrus client.
|
||||
try:
|
||||
registry = Store(config)
|
||||
backend = registry[str(store)]
|
||||
candidate = getattr(backend, "_client", None)
|
||||
if candidate is not None and hasattr(candidate, "_post"):
|
||||
client = candidate
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
|
||||
return False
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
|
||||
return False
|
||||
else:
|
||||
# No store context; use default Hydrus client.
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
if not local_deleted:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
return False
|
||||
if client is None:
|
||||
if not local_deleted:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return False
|
||||
|
||||
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||
if reason:
|
||||
payload["reason"] = reason
|
||||
try:
|
||||
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||
# Best-effort clear deletion record if supported by this client.
|
||||
try:
|
||||
clearer = getattr(client, "clear_file_deletion_record", None)
|
||||
if callable(clearer):
|
||||
clearer([hash_hex])
|
||||
else:
|
||||
client._post("/add_files/clear_file_deletion_record", data={"hashes": [hash_hex]}) # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
hydrus_deleted = True
|
||||
title_str = str(title_val).strip() if title_val else ""
|
||||
if title_str:
|
||||
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
|
||||
else:
|
||||
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
|
||||
except Exception:
|
||||
# If it's not in Hydrus (e.g. 404 or similar), that's fine
|
||||
if not local_deleted:
|
||||
return []
|
||||
|
||||
if hydrus_deleted and hash_hex:
|
||||
size_hint = None
|
||||
|
||||
267
cmdlet/download_data.py
Normal file
267
cmdlet/download_data.py
Normal file
@@ -0,0 +1,267 @@
|
||||
"""Smart downloader front-door.
|
||||
|
||||
Currently focused on Internet Archive item pages:
|
||||
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
|
||||
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
|
||||
- Selecting a row via @N expands to download-file <direct-url>
|
||||
|
||||
This enables:
|
||||
search-provider -provider internetarchive "..."
|
||||
@3 # shows formats table
|
||||
@2 | add-file ... # downloads selected file then pipes to add-file
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Sequence, cast
|
||||
from urllib.parse import quote
|
||||
|
||||
from SYS.logger import log, debug
|
||||
import pipeline as pipeline_context
|
||||
from result_table import ResultTable
|
||||
|
||||
from . import _shared as sh
|
||||
|
||||
Cmdlet = sh.Cmdlet
|
||||
SharedArgs = sh.SharedArgs
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
get_field = sh.get_field
|
||||
|
||||
|
||||
def _extract_ia_identifier(text: str) -> str:
|
||||
s = str(text or "").strip()
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
# https://archive.org/details/<identifier>
|
||||
m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
|
||||
if m:
|
||||
return str(m.group(1) or "").strip()
|
||||
|
||||
# internetarchive:<identifier>
|
||||
if s.lower().startswith("internetarchive:"):
|
||||
return s.split(":", 1)[-1].strip()
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
class Download_Data(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="download-data",
|
||||
summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
|
||||
usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
|
||||
alias=[],
|
||||
arg=[SharedArgs.URL],
|
||||
detail=[
|
||||
"For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
|
||||
"Select a file row with @N to run download-file on that direct URL.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
try:
|
||||
# parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
|
||||
parsed = parse_cmdlet_args(args, cast(Any, self))
|
||||
except Exception:
|
||||
parsed = {}
|
||||
|
||||
raw_urls = parsed.get("url", [])
|
||||
if isinstance(raw_urls, str):
|
||||
raw_urls = [raw_urls]
|
||||
url_arg = str(raw_urls[0]).strip() if raw_urls else ""
|
||||
|
||||
piped_items: List[Any] = []
|
||||
if isinstance(result, list):
|
||||
piped_items = list(result)
|
||||
elif result is not None:
|
||||
piped_items = [result]
|
||||
|
||||
# Prefer piped item target if present.
|
||||
target = ""
|
||||
if piped_items:
|
||||
target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
|
||||
if not target:
|
||||
target = url_arg
|
||||
|
||||
table_name = ""
|
||||
try:
|
||||
table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
|
||||
except Exception:
|
||||
table_name = ""
|
||||
|
||||
identifier = ""
|
||||
if piped_items:
|
||||
md = get_field(piped_items[0], "full_metadata")
|
||||
if isinstance(md, dict):
|
||||
identifier = str(md.get("identifier") or "").strip()
|
||||
if not identifier:
|
||||
identifier = _extract_ia_identifier(target)
|
||||
|
||||
if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
|
||||
return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
|
||||
|
||||
log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def _run_internetarchive(item: Any, *, identifier: str) -> int:
|
||||
try:
|
||||
from Provider.internetarchive import _ia as _ia_loader
|
||||
except Exception as exc:
|
||||
log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
|
||||
try:
|
||||
source = str(f.get("source") or "").strip().lower()
|
||||
fmt = str(f.get("format") or "").strip().lower()
|
||||
except Exception:
|
||||
source = ""
|
||||
fmt = ""
|
||||
|
||||
if source == "metadata":
|
||||
return True
|
||||
if fmt in {"metadata", "archive bittorrent"}:
|
||||
return True
|
||||
if fmt.startswith("thumbnail"):
|
||||
return True
|
||||
return False
|
||||
|
||||
ia = None
|
||||
try:
|
||||
ia = _ia_loader()
|
||||
except Exception as exc:
|
||||
log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
get_item = getattr(ia, "get_item", None)
|
||||
if not callable(get_item):
|
||||
raise Exception("internetarchive.get_item is not available")
|
||||
ia_item = cast(Any, get_item(str(identifier)))
|
||||
except Exception as exc:
|
||||
log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
files: List[Dict[str, Any]] = []
|
||||
try:
|
||||
raw_files = getattr(ia_item, "files", None)
|
||||
if isinstance(raw_files, list):
|
||||
for f in raw_files:
|
||||
if isinstance(f, dict):
|
||||
files.append(f)
|
||||
except Exception:
|
||||
files = []
|
||||
|
||||
if not files:
|
||||
try:
|
||||
for f in ia_item.get_files():
|
||||
name = getattr(f, "name", None)
|
||||
if not name and isinstance(f, dict):
|
||||
name = f.get("name")
|
||||
if not name:
|
||||
continue
|
||||
files.append(
|
||||
{
|
||||
"name": str(name),
|
||||
"size": getattr(f, "size", None),
|
||||
"format": getattr(f, "format", None),
|
||||
"source": getattr(f, "source", None),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
files = []
|
||||
|
||||
if not files:
|
||||
log("download-data: Internet Archive item has no files", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Prefer non-metadata files for the picker.
|
||||
candidates = [f for f in files if not _is_ia_metadata_file(f)]
|
||||
if not candidates:
|
||||
candidates = list(files)
|
||||
|
||||
def _key(f: Dict[str, Any]) -> tuple[str, str]:
|
||||
fmt = str(f.get("format") or "").strip().lower()
|
||||
name = str(f.get("name") or "").strip().lower()
|
||||
return (fmt, name)
|
||||
|
||||
candidates.sort(key=_key)
|
||||
|
||||
title = ""
|
||||
try:
|
||||
title = str(get_field(item, "title") or "").strip()
|
||||
except Exception:
|
||||
title = ""
|
||||
|
||||
table_title = f"Internet Archive: {title}".strip().rstrip(":")
|
||||
if not title:
|
||||
table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
|
||||
|
||||
table = ResultTable(table_title).set_preserve_order(True)
|
||||
table.set_table("internetarchive.formats")
|
||||
# Selecting a row should expand to `download-file <direct-url>`.
|
||||
table.set_source_command("download-file", [])
|
||||
|
||||
rows: List[Dict[str, Any]] = []
|
||||
for f in candidates:
|
||||
name = str(f.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
fmt = str(f.get("format") or "").strip()
|
||||
src = str(f.get("source") or "").strip()
|
||||
|
||||
size_val: Any = f.get("size")
|
||||
try:
|
||||
size_val = int(size_val) if size_val not in (None, "") else ""
|
||||
except Exception:
|
||||
# Keep as-is; ResultTable will stringify.
|
||||
pass
|
||||
|
||||
direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
|
||||
|
||||
row_item: Dict[str, Any] = {
|
||||
"table": "internetarchive",
|
||||
"title": fmt or name,
|
||||
"path": direct_url,
|
||||
"url": direct_url,
|
||||
"columns": [
|
||||
("Format", fmt),
|
||||
("Name", name),
|
||||
("Size", size_val),
|
||||
("Source", src),
|
||||
],
|
||||
# Used by @N expansion: download-file <direct-url>
|
||||
"_selection_args": [direct_url],
|
||||
"full_metadata": {
|
||||
"identifier": identifier,
|
||||
"name": name,
|
||||
"format": fmt,
|
||||
"source": src,
|
||||
"size": f.get("size"),
|
||||
},
|
||||
}
|
||||
|
||||
rows.append(row_item)
|
||||
table.add_result(row_item)
|
||||
|
||||
if not rows:
|
||||
log("download-data: no downloadable files found for this item", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
pipeline_context.set_last_result_table(table, rows, subject=item)
|
||||
pipeline_context.set_current_stage_table(table)
|
||||
except Exception as exc:
|
||||
debug(f"[download-data] Failed to register result table: {exc}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Download_Data()
|
||||
@@ -16,6 +16,7 @@ from . import _shared as sh
|
||||
|
||||
Cmdlet = sh.Cmdlet
|
||||
CmdletArg = sh.CmdletArg
|
||||
SharedArgs = sh.SharedArgs
|
||||
create_pipe_object_result = sh.create_pipe_object_result
|
||||
get_field = sh.get_field
|
||||
get_pipe_object_hash = sh.get_pipe_object_hash
|
||||
@@ -37,7 +38,6 @@ except ImportError:
|
||||
try:
|
||||
from metadata import (
|
||||
read_tags_from_file,
|
||||
dedup_tags_by_namespace,
|
||||
merge_multiple_tag_lists,
|
||||
)
|
||||
HAS_METADATA_API = True
|
||||
@@ -87,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
output_override: Optional[Path] = None
|
||||
output_arg = parsed.get("output")
|
||||
output_arg = parsed.get("path")
|
||||
if output_arg:
|
||||
try:
|
||||
output_override = Path(str(output_arg)).expanduser()
|
||||
@@ -928,10 +928,10 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||
CMDLET = Cmdlet(
|
||||
name="merge-file",
|
||||
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
arg=[
|
||||
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||
CmdletArg("-output", description="Override output file path."),
|
||||
SharedArgs.PATH,
|
||||
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||
],
|
||||
detail=[
|
||||
|
||||
Reference in New Issue
Block a user