This commit is contained in:
2026-02-19 20:38:54 -08:00
parent 615a4fd1a4
commit 39a84b3274
5 changed files with 1475 additions and 69 deletions

View File

@@ -4,6 +4,7 @@ import importlib
import os
import re
import sys
import requests
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -11,8 +12,9 @@ from urllib.parse import quote, unquote, urlparse
from API.HTTP import _download_direct_file
from ProviderCore.base import Provider, SearchResult
from SYS.utils import sanitize_filename
from SYS.utils import sanitize_filename, unique_path
from SYS.logger import log
from SYS.config import get_provider_block
# Helper for download-file: render selectable formats for a details URL.
def maybe_show_formats_table(
@@ -184,6 +186,96 @@ def _pick_provider_config(config: Any) -> Dict[str, Any]:
return {}
def _pick_archive_credentials(config: Any) -> tuple[Optional[str], Optional[str]]:
"""Resolve Archive.org credentials.
Preference order:
1) provider.internetarchive (email/username + password)
2) provider.openlibrary (email + password)
"""
if not isinstance(config, dict):
return None, None
ia_block = get_provider_block(config, "internetarchive")
if isinstance(ia_block, dict):
email = (
ia_block.get("email")
or ia_block.get("username")
or ia_block.get("user")
)
password = ia_block.get("password")
email_text = str(email).strip() if email else ""
password_text = str(password).strip() if password else ""
if email_text and password_text:
return email_text, password_text
ol_block = get_provider_block(config, "openlibrary")
if isinstance(ol_block, dict):
email = ol_block.get("email")
password = ol_block.get("password")
email_text = str(email).strip() if email else ""
password_text = str(password).strip() if password else ""
if email_text and password_text:
return email_text, password_text
return None, None
def _filename_from_response(url: str, response: requests.Response, suggested_filename: Optional[str] = None) -> str:
suggested = str(suggested_filename or "").strip()
if suggested:
guessed_ext = Path(str(_extract_download_filename_from_url(url) or "")).suffix
if Path(suggested).suffix:
return sanitize_filename(suggested)
merged = f"{suggested}{guessed_ext}" if guessed_ext else suggested
return sanitize_filename(merged)
content_disposition = ""
try:
content_disposition = str(response.headers.get("content-disposition", "") or "")
except Exception:
content_disposition = ""
if content_disposition:
m = re.search(r'filename\*?=(?:"([^"]+)"|([^;\s]+))', content_disposition)
if m:
candidate = (m.group(1) or m.group(2) or "").strip().strip('"')
if candidate:
return sanitize_filename(unquote(candidate))
extracted = _extract_download_filename_from_url(url)
if extracted:
return sanitize_filename(extracted)
fallback = Path(urlparse(url).path).name or "download.bin"
return sanitize_filename(unquote(fallback))
def _download_with_requests_session(
*,
session: requests.Session,
url: str,
output_dir: Path,
suggested_filename: Optional[str] = None,
) -> Path:
headers = {
"Referer": "https://archive.org/",
"Accept": "*/*",
}
response = session.get(url, headers=headers, stream=True, allow_redirects=True, timeout=120)
response.raise_for_status()
filename = _filename_from_response(url, response, suggested_filename=suggested_filename)
out_path = unique_path(Path(output_dir) / filename)
with open(out_path, "wb") as handle:
for chunk in response.iter_content(chunk_size=1024 * 256):
if chunk:
handle.write(chunk)
return out_path
def _looks_fielded_query(q: str) -> bool:
low = (q or "").lower()
return (":" in low) or (" and " in low) or (" or "
@@ -476,6 +568,17 @@ class InternetArchive(Provider):
@classmethod
def config_schema(cls) -> List[Dict[str, Any]]:
return [
{
"key": "email",
"label": "Archive.org Email (restricted downloads)",
"default": ""
},
{
"key": "password",
"label": "Archive.org Password (restricted downloads)",
"default": "",
"secret": True
},
{
"key": "access_key",
"label": "Access Key (for uploads)",
@@ -542,6 +645,73 @@ class InternetArchive(Provider):
except Exception:
return False
def _download_with_archive_auth(
self,
*,
url: str,
output_dir: Path,
suggested_filename: Optional[str] = None,
) -> Optional[Path]:
email, password = _pick_archive_credentials(self.config or {})
if not email or not password:
return None
try:
from Provider.openlibrary import OpenLibrary
except Exception as exc:
log(f"[internetarchive] OpenLibrary auth helper unavailable: {exc}", file=sys.stderr)
return None
identifier = _extract_identifier_from_any(url)
session: Optional[requests.Session] = None
loaned = False
try:
session = OpenLibrary._archive_login(email, password)
if identifier:
try:
session.get(
f"https://archive.org/details/{identifier}",
timeout=30,
allow_redirects=True,
)
except Exception:
pass
try:
session.get(
f"https://archive.org/download/{identifier}",
timeout=30,
allow_redirects=True,
)
except Exception:
pass
try:
session = OpenLibrary._archive_loan(session, identifier, verbose=False)
loaned = True
except Exception:
loaned = False
return _download_with_requests_session(
session=session,
url=url,
output_dir=output_dir,
suggested_filename=suggested_filename,
)
except Exception as exc:
log(f"[internetarchive] authenticated download failed: {exc}", file=sys.stderr)
return None
finally:
if session is not None:
if loaned and identifier:
try:
OpenLibrary._archive_return_loan(session, identifier)
except Exception:
pass
try:
OpenLibrary._archive_logout(session)
except Exception:
pass
@staticmethod
def _media_kind_from_mediatype(mediatype: str) -> str:
mt = str(mediatype or "").strip().lower()
@@ -715,6 +885,13 @@ class InternetArchive(Provider):
return None
except Exception as exc:
log(f"[internetarchive] direct file download failed, falling back to IA API: {exc}", file=sys.stderr)
auth_path = self._download_with_archive_auth(
url=raw_path,
output_dir=output_dir,
suggested_filename=suggested_filename,
)
if auth_path is not None:
return auth_path
ia = _ia()
get_item = getattr(ia, "get_item", None)