dfd
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-27 21:24:27 -08:00
parent fcdd507d00
commit 8288ea8c66
16 changed files with 530 additions and 339 deletions

View File

@@ -7,6 +7,8 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.logger import log
@@ -71,6 +73,143 @@ def _extract_identifier_from_any(value: str) -> str:
return raw
def extract_identifier(value: str) -> str:
"""Public wrapper for extracting an IA identifier from URLs/tags/bare ids."""
return _extract_identifier_from_any(value)
def is_details_url(url: str) -> bool:
raw = str(url or "").strip()
if not raw:
return False
if not (raw.startswith("http://") or raw.startswith("https://")):
return False
try:
p = urlparse(raw)
host = (p.hostname or "").lower().strip()
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
return False
if not host.endswith("archive.org"):
return False
return len(parts) >= 2 and parts[0].lower() == "details" and bool(parts[1].strip())
def is_download_file_url(url: str) -> bool:
raw = str(url or "").strip()
if not raw:
return False
if not (raw.startswith("http://") or raw.startswith("https://")):
return False
try:
p = urlparse(raw)
host = (p.hostname or "").lower().strip()
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
return False
if not host.endswith("archive.org"):
return False
# /download/<identifier>/<filename>
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
"""Return a sorted list of downloadable files for an IA identifier.
Each entry includes: name, size, format, source, direct_url.
"""
ident = str(identifier or "").strip()
if not ident:
return []
ia = _ia()
get_item = getattr(ia, "get_item", None)
if not callable(get_item):
raise Exception("internetarchive.get_item is not available")
try:
item: Any = get_item(str(ident))
except Exception as exc:
raise Exception(f"Internet Archive item lookup failed: {exc}")
files: List[Dict[str, Any]] = []
try:
raw_files = getattr(item, "files", None)
if isinstance(raw_files, list):
for f in raw_files:
if isinstance(f, dict):
files.append(f)
except Exception:
files = []
if not files:
try:
for f in item.get_files():
name = getattr(f, "name", None)
if not name and isinstance(f, dict):
name = f.get("name")
if not name:
continue
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
if not files:
return []
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
try:
source = str(f.get("source") or "").strip().lower()
fmt = str(f.get("format") or "").strip().lower()
except Exception:
source = ""
fmt = ""
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
candidates = [f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)]
if not candidates:
candidates = [f for f in files if isinstance(f, dict)]
out: List[Dict[str, Any]] = []
for f in candidates:
name = str(f.get("name") or "").strip()
if not name:
continue
direct_url = f"https://archive.org/download/{ident}/{quote(name, safe='')}"
out.append(
{
"name": name,
"size": f.get("size"),
"format": f.get("format"),
"source": f.get("source"),
"direct_url": direct_url,
}
)
def _key(f: Dict[str, Any]) -> tuple[str, str]:
fmt = str(f.get("format") or "").strip().lower()
name = str(f.get("name") or "").strip().lower()
return (fmt, name)
out.sort(key=_key)
return out
def _extract_download_filename_from_url(url: str) -> str:
raw = str(url or "").strip()
if not raw: