g
This commit is contained in:
@@ -4,6 +4,7 @@ import importlib
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -11,8 +12,9 @@ from urllib.parse import quote, unquote, urlparse
|
||||
|
||||
from API.HTTP import _download_direct_file
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.utils import sanitize_filename
|
||||
from SYS.utils import sanitize_filename, unique_path
|
||||
from SYS.logger import log
|
||||
from SYS.config import get_provider_block
|
||||
|
||||
# Helper for download-file: render selectable formats for a details URL.
|
||||
def maybe_show_formats_table(
|
||||
@@ -184,6 +186,96 @@ def _pick_provider_config(config: Any) -> Dict[str, Any]:
|
||||
return {}
|
||||
|
||||
|
||||
def _pick_archive_credentials(config: Any) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Resolve Archive.org credentials.
|
||||
|
||||
Preference order:
|
||||
1) provider.internetarchive (email/username + password)
|
||||
2) provider.openlibrary (email + password)
|
||||
"""
|
||||
if not isinstance(config, dict):
|
||||
return None, None
|
||||
|
||||
ia_block = get_provider_block(config, "internetarchive")
|
||||
if isinstance(ia_block, dict):
|
||||
email = (
|
||||
ia_block.get("email")
|
||||
or ia_block.get("username")
|
||||
or ia_block.get("user")
|
||||
)
|
||||
password = ia_block.get("password")
|
||||
email_text = str(email).strip() if email else ""
|
||||
password_text = str(password).strip() if password else ""
|
||||
if email_text and password_text:
|
||||
return email_text, password_text
|
||||
|
||||
ol_block = get_provider_block(config, "openlibrary")
|
||||
if isinstance(ol_block, dict):
|
||||
email = ol_block.get("email")
|
||||
password = ol_block.get("password")
|
||||
email_text = str(email).strip() if email else ""
|
||||
password_text = str(password).strip() if password else ""
|
||||
if email_text and password_text:
|
||||
return email_text, password_text
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def _filename_from_response(url: str, response: requests.Response, suggested_filename: Optional[str] = None) -> str:
|
||||
suggested = str(suggested_filename or "").strip()
|
||||
if suggested:
|
||||
guessed_ext = Path(str(_extract_download_filename_from_url(url) or "")).suffix
|
||||
if Path(suggested).suffix:
|
||||
return sanitize_filename(suggested)
|
||||
merged = f"{suggested}{guessed_ext}" if guessed_ext else suggested
|
||||
return sanitize_filename(merged)
|
||||
|
||||
content_disposition = ""
|
||||
try:
|
||||
content_disposition = str(response.headers.get("content-disposition", "") or "")
|
||||
except Exception:
|
||||
content_disposition = ""
|
||||
|
||||
if content_disposition:
|
||||
m = re.search(r'filename\*?=(?:"([^"]+)"|([^;\s]+))', content_disposition)
|
||||
if m:
|
||||
candidate = (m.group(1) or m.group(2) or "").strip().strip('"')
|
||||
if candidate:
|
||||
return sanitize_filename(unquote(candidate))
|
||||
|
||||
extracted = _extract_download_filename_from_url(url)
|
||||
if extracted:
|
||||
return sanitize_filename(extracted)
|
||||
|
||||
fallback = Path(urlparse(url).path).name or "download.bin"
|
||||
return sanitize_filename(unquote(fallback))
|
||||
|
||||
|
||||
def _download_with_requests_session(
|
||||
*,
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
suggested_filename: Optional[str] = None,
|
||||
) -> Path:
|
||||
headers = {
|
||||
"Referer": "https://archive.org/",
|
||||
"Accept": "*/*",
|
||||
}
|
||||
response = session.get(url, headers=headers, stream=True, allow_redirects=True, timeout=120)
|
||||
response.raise_for_status()
|
||||
|
||||
filename = _filename_from_response(url, response, suggested_filename=suggested_filename)
|
||||
out_path = unique_path(Path(output_dir) / filename)
|
||||
|
||||
with open(out_path, "wb") as handle:
|
||||
for chunk in response.iter_content(chunk_size=1024 * 256):
|
||||
if chunk:
|
||||
handle.write(chunk)
|
||||
|
||||
return out_path
|
||||
|
||||
|
||||
def _looks_fielded_query(q: str) -> bool:
|
||||
low = (q or "").lower()
|
||||
return (":" in low) or (" and " in low) or (" or "
|
||||
@@ -476,6 +568,17 @@ class InternetArchive(Provider):
|
||||
@classmethod
|
||||
def config_schema(cls) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"key": "email",
|
||||
"label": "Archive.org Email (restricted downloads)",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"key": "password",
|
||||
"label": "Archive.org Password (restricted downloads)",
|
||||
"default": "",
|
||||
"secret": True
|
||||
},
|
||||
{
|
||||
"key": "access_key",
|
||||
"label": "Access Key (for uploads)",
|
||||
@@ -542,6 +645,73 @@ class InternetArchive(Provider):
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _download_with_archive_auth(
|
||||
self,
|
||||
*,
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
suggested_filename: Optional[str] = None,
|
||||
) -> Optional[Path]:
|
||||
email, password = _pick_archive_credentials(self.config or {})
|
||||
if not email or not password:
|
||||
return None
|
||||
|
||||
try:
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
except Exception as exc:
|
||||
log(f"[internetarchive] OpenLibrary auth helper unavailable: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
identifier = _extract_identifier_from_any(url)
|
||||
session: Optional[requests.Session] = None
|
||||
loaned = False
|
||||
try:
|
||||
session = OpenLibrary._archive_login(email, password)
|
||||
|
||||
if identifier:
|
||||
try:
|
||||
session.get(
|
||||
f"https://archive.org/details/{identifier}",
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
session.get(
|
||||
f"https://archive.org/download/{identifier}",
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
session = OpenLibrary._archive_loan(session, identifier, verbose=False)
|
||||
loaned = True
|
||||
except Exception:
|
||||
loaned = False
|
||||
|
||||
return _download_with_requests_session(
|
||||
session=session,
|
||||
url=url,
|
||||
output_dir=output_dir,
|
||||
suggested_filename=suggested_filename,
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"[internetarchive] authenticated download failed: {exc}", file=sys.stderr)
|
||||
return None
|
||||
finally:
|
||||
if session is not None:
|
||||
if loaned and identifier:
|
||||
try:
|
||||
OpenLibrary._archive_return_loan(session, identifier)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
OpenLibrary._archive_logout(session)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _media_kind_from_mediatype(mediatype: str) -> str:
|
||||
mt = str(mediatype or "").strip().lower()
|
||||
@@ -715,6 +885,13 @@ class InternetArchive(Provider):
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"[internetarchive] direct file download failed, falling back to IA API: {exc}", file=sys.stderr)
|
||||
auth_path = self._download_with_archive_auth(
|
||||
url=raw_path,
|
||||
output_dir=output_dir,
|
||||
suggested_filename=suggested_filename,
|
||||
)
|
||||
if auth_path is not None:
|
||||
return auth_path
|
||||
|
||||
ia = _ia()
|
||||
get_item = getattr(ia, "get_item", None)
|
||||
|
||||
Reference in New Issue
Block a user