fdf
This commit is contained in:
@@ -361,6 +361,44 @@ def is_download_file_url(url: str) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _archive_item_access(identifier: str) -> Dict[str, Any]:
|
||||
ident = str(identifier or "").strip()
|
||||
if not ident:
|
||||
return {"mediatype": "", "lendable": False, "collection": []}
|
||||
|
||||
session = requests.Session()
|
||||
try:
|
||||
response = session.get(f"https://archive.org/metadata/{ident}", timeout=8)
|
||||
response.raise_for_status()
|
||||
data = response.json() if response is not None else {}
|
||||
except Exception:
|
||||
return {"mediatype": "", "lendable": False, "collection": []}
|
||||
finally:
|
||||
try:
|
||||
session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
|
||||
mediatype = str(meta.get("mediatype") or "").strip().lower()
|
||||
collection = meta.get("collection")
|
||||
values: List[str] = []
|
||||
if isinstance(collection, list):
|
||||
values = [str(x).strip().lower() for x in collection if str(x).strip()]
|
||||
elif isinstance(collection, str) and collection.strip():
|
||||
values = [collection.strip().lower()]
|
||||
|
||||
lendable = any(v in {"inlibrary", "lendinglibrary"} for v in values)
|
||||
return {
|
||||
"mediatype": mediatype,
|
||||
"lendable": lendable,
|
||||
"collection": values,
|
||||
}
|
||||
|
||||
|
||||
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
|
||||
"""Return a sorted list of downloadable files for an IA identifier.
|
||||
|
||||
@@ -620,6 +658,11 @@ class InternetArchive(Provider):
|
||||
quiet_mode: bool,
|
||||
) -> Optional[int]:
|
||||
"""Generic hook for download-file to show a selection table for IA items."""
|
||||
try:
|
||||
if self._should_delegate_borrow(str(url or "")):
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.field_access import get_field as sh_get_field
|
||||
return maybe_show_formats_table(
|
||||
raw_urls=[url] if url else [],
|
||||
@@ -638,6 +681,72 @@ class InternetArchive(Provider):
|
||||
self._collection = conf.get("collection") or conf.get("default_collection")
|
||||
self._mediatype = conf.get("mediatype") or conf.get("default_mediatype")
|
||||
|
||||
@staticmethod
|
||||
def _should_delegate_borrow(url: str) -> bool:
|
||||
raw = str(url or "").strip()
|
||||
if not is_details_url(raw):
|
||||
return False
|
||||
identifier = extract_identifier(raw)
|
||||
if not identifier:
|
||||
return False
|
||||
access = _archive_item_access(identifier)
|
||||
return bool(access.get("lendable")) and str(access.get("mediatype") or "") == "texts"
|
||||
|
||||
def _download_via_openlibrary(self, url: str, output_dir: Path) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
except Exception as exc:
|
||||
log(f"[internetarchive] OpenLibrary borrow helper unavailable: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
provider = OpenLibrary(self.config)
|
||||
try:
|
||||
result = provider.download_url(url, output_dir)
|
||||
finally:
|
||||
try:
|
||||
session = getattr(provider, "_session", None)
|
||||
if session is not None:
|
||||
session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not isinstance(result, dict):
|
||||
return result
|
||||
|
||||
search_result = result.get("search_result")
|
||||
metadata: Dict[str, Any] = {}
|
||||
title = None
|
||||
tags: List[str] = []
|
||||
if search_result is not None:
|
||||
try:
|
||||
title = str(getattr(search_result, "title", "") or "").strip() or None
|
||||
except Exception:
|
||||
title = None
|
||||
try:
|
||||
metadata = dict(getattr(search_result, "full_metadata", {}) or {})
|
||||
except Exception:
|
||||
metadata = {}
|
||||
try:
|
||||
tags_val = getattr(search_result, "tag", None)
|
||||
if isinstance(tags_val, set):
|
||||
tags = [str(t) for t in sorted(tags_val) if t]
|
||||
elif isinstance(tags_val, list):
|
||||
tags = [str(t) for t in tags_val if t]
|
||||
except Exception:
|
||||
tags = []
|
||||
|
||||
normalized: Dict[str, Any] = {"path": result.get("path")}
|
||||
if metadata:
|
||||
normalized["metadata"] = metadata
|
||||
normalized["full_metadata"] = metadata
|
||||
if title:
|
||||
normalized["title"] = title
|
||||
if tags:
|
||||
normalized["tags"] = tags
|
||||
normalized["media_kind"] = "book"
|
||||
normalized["provider_action"] = "borrow"
|
||||
return normalized
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
_ia()
|
||||
@@ -824,13 +933,18 @@ class InternetArchive(Provider):
|
||||
|
||||
return out
|
||||
|
||||
def download_url(self, url: str, output_dir: Path) -> Optional[Path]:
|
||||
def download_url(self, url: str, output_dir: Path) -> Optional[Any]:
|
||||
"""Download an Internet Archive URL.
|
||||
|
||||
Supports:
|
||||
- https://archive.org/details/<identifier>
|
||||
- https://archive.org/download/<identifier>/<filename>
|
||||
"""
|
||||
if self._should_delegate_borrow(url):
|
||||
delegated = self._download_via_openlibrary(url, output_dir)
|
||||
if delegated is not None:
|
||||
return delegated
|
||||
|
||||
sr = SearchResult(
|
||||
table="internetarchive",
|
||||
title=str(url),
|
||||
@@ -842,6 +956,15 @@ class InternetArchive(Provider):
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
raw_path = str(getattr(result, "path", "") or "").strip()
|
||||
|
||||
if self._should_delegate_borrow(raw_path):
|
||||
delegated = self._download_via_openlibrary(raw_path, output_dir)
|
||||
if isinstance(delegated, dict):
|
||||
delegated_path = delegated.get("path")
|
||||
if delegated_path:
|
||||
return Path(str(delegated_path))
|
||||
if isinstance(delegated, (str, Path)):
|
||||
return Path(str(delegated))
|
||||
|
||||
# Fast path for explicit IA file URLs.
|
||||
# This uses the shared direct downloader, which already integrates with
|
||||
# pipeline transfer progress bars.
|
||||
|
||||
Reference in New Issue
Block a user