dfd
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
This commit is contained in:
@@ -7,6 +7,8 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from urllib.parse import quote, urlparse
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from ProviderCore.download import sanitize_filename
|
||||
from SYS.logger import log
|
||||
@@ -71,6 +73,143 @@ def _extract_identifier_from_any(value: str) -> str:
|
||||
return raw
|
||||
|
||||
|
||||
def extract_identifier(value: str) -> str:
|
||||
"""Public wrapper for extracting an IA identifier from URLs/tags/bare ids."""
|
||||
return _extract_identifier_from_any(value)
|
||||
|
||||
|
||||
def is_details_url(url: str) -> bool:
|
||||
raw = str(url or "").strip()
|
||||
if not raw:
|
||||
return False
|
||||
if not (raw.startswith("http://") or raw.startswith("https://")):
|
||||
return False
|
||||
try:
|
||||
p = urlparse(raw)
|
||||
host = (p.hostname or "").lower().strip()
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
return False
|
||||
if not host.endswith("archive.org"):
|
||||
return False
|
||||
return len(parts) >= 2 and parts[0].lower() == "details" and bool(parts[1].strip())
|
||||
|
||||
|
||||
def is_download_file_url(url: str) -> bool:
|
||||
raw = str(url or "").strip()
|
||||
if not raw:
|
||||
return False
|
||||
if not (raw.startswith("http://") or raw.startswith("https://")):
|
||||
return False
|
||||
try:
|
||||
p = urlparse(raw)
|
||||
host = (p.hostname or "").lower().strip()
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
return False
|
||||
if not host.endswith("archive.org"):
|
||||
return False
|
||||
# /download/<identifier>/<filename>
|
||||
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
|
||||
|
||||
|
||||
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
|
||||
"""Return a sorted list of downloadable files for an IA identifier.
|
||||
|
||||
Each entry includes: name, size, format, source, direct_url.
|
||||
"""
|
||||
ident = str(identifier or "").strip()
|
||||
if not ident:
|
||||
return []
|
||||
|
||||
ia = _ia()
|
||||
get_item = getattr(ia, "get_item", None)
|
||||
if not callable(get_item):
|
||||
raise Exception("internetarchive.get_item is not available")
|
||||
|
||||
try:
|
||||
item: Any = get_item(str(ident))
|
||||
except Exception as exc:
|
||||
raise Exception(f"Internet Archive item lookup failed: {exc}")
|
||||
|
||||
files: List[Dict[str, Any]] = []
|
||||
try:
|
||||
raw_files = getattr(item, "files", None)
|
||||
if isinstance(raw_files, list):
|
||||
for f in raw_files:
|
||||
if isinstance(f, dict):
|
||||
files.append(f)
|
||||
except Exception:
|
||||
files = []
|
||||
|
||||
if not files:
|
||||
try:
|
||||
for f in item.get_files():
|
||||
name = getattr(f, "name", None)
|
||||
if not name and isinstance(f, dict):
|
||||
name = f.get("name")
|
||||
if not name:
|
||||
continue
|
||||
files.append(
|
||||
{
|
||||
"name": str(name),
|
||||
"size": getattr(f, "size", None),
|
||||
"format": getattr(f, "format", None),
|
||||
"source": getattr(f, "source", None),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
files = []
|
||||
|
||||
if not files:
|
||||
return []
|
||||
|
||||
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
|
||||
try:
|
||||
source = str(f.get("source") or "").strip().lower()
|
||||
fmt = str(f.get("format") or "").strip().lower()
|
||||
except Exception:
|
||||
source = ""
|
||||
fmt = ""
|
||||
|
||||
if source == "metadata":
|
||||
return True
|
||||
if fmt in {"metadata", "archive bittorrent"}:
|
||||
return True
|
||||
if fmt.startswith("thumbnail"):
|
||||
return True
|
||||
return False
|
||||
|
||||
candidates = [f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)]
|
||||
if not candidates:
|
||||
candidates = [f for f in files if isinstance(f, dict)]
|
||||
|
||||
out: List[Dict[str, Any]] = []
|
||||
for f in candidates:
|
||||
name = str(f.get("name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
direct_url = f"https://archive.org/download/{ident}/{quote(name, safe='')}"
|
||||
out.append(
|
||||
{
|
||||
"name": name,
|
||||
"size": f.get("size"),
|
||||
"format": f.get("format"),
|
||||
"source": f.get("source"),
|
||||
"direct_url": direct_url,
|
||||
}
|
||||
)
|
||||
|
||||
def _key(f: Dict[str, Any]) -> tuple[str, str]:
|
||||
fmt = str(f.get("format") or "").strip().lower()
|
||||
name = str(f.get("name") or "").strip().lower()
|
||||
return (fmt, name)
|
||||
|
||||
out.sort(key=_key)
|
||||
return out
|
||||
|
||||
|
||||
def _extract_download_filename_from_url(url: str) -> str:
|
||||
raw = str(url or "").strip()
|
||||
if not raw:
|
||||
|
||||
Reference in New Issue
Block a user