This commit is contained in:
nose
2025-12-11 19:04:02 -08:00
parent 6863c6c7ea
commit 16d8a763cd
103 changed files with 4759 additions and 9156 deletions

579
API/HTTP.py Normal file
View File

@@ -0,0 +1,579 @@
"""
Unified HTTP client for downlow using httpx.
Provides synchronous and asynchronous HTTP operations with:
- Automatic retries on transient failures
- Configurable timeouts and headers
- Built-in progress tracking for downloads
- Request/response logging support
"""
import httpx
import asyncio
from typing import Optional, Dict, Any, Callable, BinaryIO
from pathlib import Path
import logging
logger = logging.getLogger(__name__)
# Default configuration
DEFAULT_TIMEOUT = 30.0
DEFAULT_RETRIES = 3
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
class HTTPClient:
"""Unified HTTP client with sync support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
retries: int = DEFAULT_RETRIES,
user_agent: str = DEFAULT_USER_AGENT,
verify_ssl: bool = True,
headers: Optional[Dict[str, str]] = None,
):
"""
Initialize HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
user_agent: User-Agent header value
verify_ssl: Whether to verify SSL certificates
headers: Additional headers to include in all requests
"""
self.timeout = timeout
self.retries = retries
self.user_agent = user_agent
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.Client] = None
def __enter__(self):
"""Context manager entry."""
self._client = httpx.Client(
timeout=self.timeout,
verify=self.verify_ssl,
headers=self._get_headers(),
)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
if self._client:
self._client.close()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {"User-Agent": self.user_agent}
headers.update(self.base_headers)
return headers
def get(
self,
url: str,
params: Optional[Dict[str, Any]] = None,
headers: Optional[Dict[str, str]] = None,
allow_redirects: bool = True,
) -> httpx.Response:
"""
Make a GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
return self._request(
"GET",
url,
params=params,
headers=headers,
follow_redirects=allow_redirects,
)
def post(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
files: Optional[Dict] = None,
headers: Optional[Dict[str, str]] = None,
) -> httpx.Response:
"""
Make a POST request.
Args:
url: Request URL
data: Form data
json: JSON data
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"POST",
url,
data=data,
json=json,
files=files,
headers=headers,
)
def put(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
content: Optional[Any] = None,
files: Optional[Dict] = None,
headers: Optional[Dict[str, str]] = None,
) -> httpx.Response:
"""
Make a PUT request.
Args:
url: Request URL
data: Form data
json: JSON data
content: Raw content
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"PUT",
url,
data=data,
json=json,
content=content,
files=files,
headers=headers,
)
def delete(
self,
url: str,
headers: Optional[Dict[str, str]] = None,
) -> httpx.Response:
"""
Make a DELETE request.
Args:
url: Request URL
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"DELETE",
url,
headers=headers,
)
def request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
"""
Make a generic HTTP request.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments
Returns:
httpx.Response object
"""
return self._request(method, url, **kwargs)
def download(
self,
url: str,
file_path: str,
chunk_size: int = 8192,
progress_callback: Optional[Callable[[int, int], None]] = None,
headers: Optional[Dict[str, str]] = None,
) -> Path:
"""
Download a file from URL with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
with open(path, "wb") as f:
for chunk in response.iter_bytes(chunk_size):
if chunk:
f.write(chunk)
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
return path
def _request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
"""
Make an HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.Client.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
last_exception = None
for attempt in range(self.retries):
try:
response = self._client.request(method, url, **kwargs)
response.raise_for_status()
return response
except httpx.TimeoutException as e:
last_exception = e
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
if attempt < self.retries - 1:
continue
except httpx.HTTPStatusError as e:
# Don't retry on 4xx errors
if 400 <= e.response.status_code < 500:
try:
response_text = e.response.text[:500]
except:
response_text = "<unable to read response>"
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
raise
last_exception = e
try:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
if attempt < self.retries - 1:
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
if attempt < self.retries - 1:
continue
if last_exception:
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
return self._client.stream(method, url, **kwargs)
class AsyncHTTPClient:
"""Unified async HTTP client with asyncio support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
retries: int = DEFAULT_RETRIES,
user_agent: str = DEFAULT_USER_AGENT,
verify_ssl: bool = True,
headers: Optional[Dict[str, str]] = None,
):
"""
Initialize async HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
user_agent: User-Agent header value
verify_ssl: Whether to verify SSL certificates
headers: Additional headers to include in all requests
"""
self.timeout = timeout
self.retries = retries
self.user_agent = user_agent
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.AsyncClient] = None
async def __aenter__(self):
"""Async context manager entry."""
self._client = httpx.AsyncClient(
timeout=self.timeout,
verify=self.verify_ssl,
headers=self._get_headers(),
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit."""
if self._client:
await self._client.aclose()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {"User-Agent": self.user_agent}
headers.update(self.base_headers)
return headers
async def get(
self,
url: str,
params: Optional[Dict[str, Any]] = None,
headers: Optional[Dict[str, str]] = None,
allow_redirects: bool = True,
) -> httpx.Response:
"""
Make an async GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
return await self._request(
"GET",
url,
params=params,
headers=headers,
follow_redirects=allow_redirects,
)
async def post(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
headers: Optional[Dict[str, str]] = None,
) -> httpx.Response:
"""
Make an async POST request.
Args:
url: Request URL
data: Form data
json: JSON data
headers: Additional headers
Returns:
httpx.Response object
"""
return await self._request(
"POST",
url,
data=data,
json=json,
headers=headers,
)
async def download(
self,
url: str,
file_path: str,
chunk_size: int = 8192,
progress_callback: Optional[Callable[[int, int], None]] = None,
headers: Optional[Dict[str, str]] = None,
) -> Path:
"""
Download a file from URL asynchronously with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
async with self._request_stream("GET", url, headers=headers) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
with open(path, "wb") as f:
async for chunk in response.aiter_bytes(chunk_size):
if chunk:
f.write(chunk)
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
return path
async def _request(
self,
method: str,
url: str,
**kwargs
) -> httpx.Response:
"""
Make an async HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.AsyncClient.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
last_exception = None
for attempt in range(self.retries):
try:
response = await self._client.request(method, url, **kwargs)
response.raise_for_status()
return response
except httpx.TimeoutException as e:
last_exception = e
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
if attempt < self.retries - 1:
await asyncio.sleep(0.5) # Brief delay before retry
continue
except httpx.HTTPStatusError as e:
# Don't retry on 4xx errors
if 400 <= e.response.status_code < 500:
try:
response_text = e.response.text[:500]
except:
response_text = "<unable to read response>"
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
raise
last_exception = e
try:
response_text = e.response.text[:200]
except:
response_text = "<unable to read response>"
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
if last_exception:
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
return self._client.stream(method, url, **kwargs)
# Convenience function for quick sync requests
def get(url: str, **kwargs) -> httpx.Response:
"""Quick GET request without context manager."""
with HTTPClient() as client:
return client.get(url, **kwargs)
def post(url: str, **kwargs) -> httpx.Response:
"""Quick POST request without context manager."""
with HTTPClient() as client:
return client.post(url, **kwargs)
def download(
url: str,
file_path: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
**kwargs
) -> Path:
"""Quick file download without context manager."""
with HTTPClient() as client:
return client.download(url, file_path, progress_callback=progress_callback, **kwargs)

1570
API/HydrusNetwork.py Normal file

File diff suppressed because it is too large Load Diff

829
API/alldebrid.py Normal file
View File

@@ -0,0 +1,829 @@
"""AllDebrid API integration for converting free links to direct downloads.
AllDebrid is a debrid service that unlocks free file hosters and provides direct download links.
API docs: https://docs.alldebrid.com/#general-informations
"""
from __future__ import annotations
import json
import sys
from SYS.logger import log, debug
import time
import logging
from pathlib import Path
from typing import Any, Dict, Optional, Set, List, Sequence
from urllib.parse import urlencode, urlparse
from .HTTP import HTTPClient
logger = logging.getLogger(__name__)
class AllDebridError(Exception):
"""Raised when AllDebrid API request fails."""
pass
# Cache for supported hosters (domain -> host info)
_SUPPORTED_HOSTERS_CACHE: Optional[Dict[str, Dict[str, Any]]] = None
_CACHE_TIMESTAMP: float = 0
_CACHE_DURATION: float = 3600 # 1 hour
class AllDebridClient:
"""Client for AllDebrid API."""
# Try both v4 and v3 APIs
BASE_url = [
"https://api.alldebrid.com/v4",
"https://api.alldebrid.com/v3",
]
def __init__(self, api_key: str):
"""Initialize AllDebrid client with API key.
Args:
api_key: AllDebrid API key from config
"""
self.api_key = api_key.strip()
if not self.api_key:
raise AllDebridError("AllDebrid API key is empty")
self.base_url = self.BASE_url[0] # Start with v4
def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
"""Make a request to AllDebrid API.
Args:
endpoint: API endpoint (e.g., "user/profile", "link/unlock")
params: Query parameters
Returns:
Parsed JSON response
Raises:
AllDebridError: If request fails or API returns error
"""
if params is None:
params = {}
# Add API key to params
params['apikey'] = self.api_key
url = f"{self.base_url}/{endpoint}"
query_string = urlencode(params)
full_url = f"{url}?{query_string}"
logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...")
try:
# Pass timeout to HTTPClient init, not to get()
with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client:
try:
response = client.get(full_url)
response.raise_for_status()
except Exception as req_err:
# Log detailed error info
logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True)
if hasattr(req_err, 'response') and req_err.response is not None: # type: ignore
try:
error_body = req_err.response.content.decode('utf-8') # type: ignore
logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
except:
pass
raise
data = json.loads(response.content.decode('utf-8'))
logger.debug(f"[AllDebrid] Response status: {response.status_code}")
# Check for API errors
if data.get('status') == 'error':
error_msg = data.get('error', {}).get('message', 'Unknown error')
logger.error(f"[AllDebrid] API error: {error_msg}")
raise AllDebridError(f"AllDebrid API error: {error_msg}")
return data
except AllDebridError:
raise
except Exception as exc:
error_msg = f"AllDebrid request failed: {exc}"
logger.error(f"[AllDebrid] {error_msg}", exc_info=True)
raise AllDebridError(error_msg)
def unlock_link(self, link: str) -> Optional[str]:
"""Unlock a restricted link and get direct download URL.
Args:
link: Restricted link to unlock
Returns:
Direct download URL, or None if already unrestricted
Raises:
AllDebridError: If unlock fails
"""
if not link.startswith(('http://', 'https://')):
raise AllDebridError(f"Invalid URL: {link}")
try:
response = self._request('link/unlock', {'link': link})
# Check if unlock was successful
if response.get('status') == 'success':
data = response.get('data', {})
# AllDebrid returns the download info in 'link' field
if 'link' in data:
return data['link']
# Alternative: check for 'file' field
if 'file' in data:
return data['file']
# If no direct link, return the input link
return link
return None
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to unlock link: {exc}")
def check_host(self, hostname: str) -> Dict[str, Any]:
"""Check if a host is supported by AllDebrid.
Args:
hostname: Hostname to check (e.g., "uploadhaven.com")
Returns:
Host information dict with support status
Raises:
AllDebridError: If request fails
"""
try:
response = self._request('host', {'name': hostname})
if response.get('status') == 'success':
return response.get('data', {})
return {}
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to check host: {exc}")
def get_user_info(self) -> Dict[str, Any]:
"""Get current user account information.
Returns:
User information dict
Raises:
AllDebridError: If request fails
"""
try:
response = self._request('user/profile')
if response.get('status') == 'success':
return response.get('data', {})
return {}
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to get user info: {exc}")
def get_supported_hosters(self) -> Dict[str, Dict[str, Any]]:
"""Get list of all supported hosters from AllDebrid API.
Returns:
Dict mapping domain to host info (status, name, etc)
Raises:
AllDebridError: If request fails
"""
try:
response = self._request('hosts/domains')
if response.get('status') == 'success':
data = response.get('data', {})
# The API returns hosts keyed by domain
return data if isinstance(data, dict) else {}
return {}
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to get supported hosters: {exc}")
def magnet_add(self, magnet_uri: str) -> Dict[str, Any]:
"""Submit a magnet link or torrent hash to AllDebrid for processing.
AllDebrid will download the torrent content and store it in the account.
Processing time varies based on torrent size and availability.
Args:
magnet_uri: Magnet URI (magnet:?xt=urn:btih:...) or torrent hash
Returns:
Dict with magnet info:
- id: Magnet ID (int) - needed for status checks
- name: Torrent name
- hash: Torrent hash
- size: Total file size (bytes)
- ready: Boolean - True if already available
Raises:
AllDebridError: If submit fails (requires premium, invalid magnet, etc)
"""
if not magnet_uri:
raise AllDebridError("Magnet URI is empty")
try:
# API endpoint: POST /v4/magnet/upload
# Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
response = self._request('magnet/upload', {'magnets[]': magnet_uri})
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
if magnets and len(magnets) > 0:
magnet_info = magnets[0]
# Check for errors in the magnet response
if 'error' in magnet_info:
error = magnet_info['error']
error_msg = error.get('message', 'Unknown error')
raise AllDebridError(f"Magnet error: {error_msg}")
return magnet_info
raise AllDebridError("No magnet data in response")
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to submit magnet: {exc}")
def magnet_status(self, magnet_id: int, include_files: bool = False) -> Dict[str, Any]:
"""Get status of a magnet currently being processed or stored.
Status codes:
0-3: Processing (in queue, downloading, compressing, uploading)
4: Ready (files available for download)
5-15: Error (upload failed, not downloaded in 20min, too big, etc)
Args:
magnet_id: Magnet ID from magnet_add()
include_files: If True, includes file list in response
Returns:
Dict with status info:
- id: Magnet ID
- filename: Torrent name
- size: Total size (bytes)
- status: Human-readable status
- statusCode: Numeric code (0-15)
- downloaded: Bytes downloaded so far
- uploaded: Bytes uploaded so far
- seeders: Number of seeders
- downloadSpeed: Current speed (bytes/sec)
- uploadSpeed: Current speed (bytes/sec)
- files: (optional) Array of file objects when include_files=True
Each file: {n: name, s: size, l: download_link}
Raises:
AllDebridError: If status check fails
"""
if not isinstance(magnet_id, int) or magnet_id <= 0:
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
try:
# Use v4.1 endpoint for better response format
# Temporarily override base_url for this request
old_base = self.base_url
self.base_url = "https://api.alldebrid.com/v4.1"
try:
response = self._request('magnet/status', {'id': str(magnet_id)})
finally:
self.base_url = old_base
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', {})
# Handle both list and dict responses
if isinstance(magnets, list) and len(magnets) > 0:
return magnets[0]
elif isinstance(magnets, dict) and magnets:
return magnets
raise AllDebridError(f"No magnet found with ID {magnet_id}")
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to get magnet status: {exc}")
def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]:
"""Get live status of a magnet using delta sync mode.
The live mode endpoint provides real-time progress by only sending
deltas (changed fields) instead of full status on each call. This
reduces bandwidth and server load compared to regular polling.
Note: The "live" designation refers to the delta-sync mode where you
maintain state locally and apply diffs from the API, not a streaming
endpoint. Regular magnet_status() polling is simpler for single magnets.
Docs: https://docs.alldebrid.com/#get-status-live-mode
Args:
magnet_id: Magnet ID from magnet_add()
session: Session ID (use same ID across multiple calls). If None, will query current status
counter: Counter value from previous response (starts at 0)
Returns:
Dict with magnet status. May contain only changed fields if counter > 0.
For single-magnet tracking, use magnet_status() instead.
Raises:
AllDebridError: If request fails
"""
if not isinstance(magnet_id, int) or magnet_id <= 0:
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
try:
# For single magnet queries, just use regular endpoint with ID
# The "live mode" with session/counter is for multi-magnet dashboards
# where bandwidth savings from diffs matter
response = self._request('magnet/status', {'id': magnet_id})
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
# Handle list response
if isinstance(magnets, list) and len(magnets) > 0:
return magnets[0]
raise AllDebridError(f"No magnet found with ID {magnet_id}")
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to get magnet live status: {exc}")
def magnet_links(self, magnet_ids: list) -> Dict[str, Any]:
"""Get files and download links for one or more magnets.
Use this after magnet_status shows statusCode == 4 (Ready).
Returns the file tree structure with direct download links.
Args:
magnet_ids: List of magnet IDs to get files for
Returns:
Dict mapping magnet_id (as string) -> magnet_info:
- id: Magnet ID
- files: Array of file/folder objects
File: {n: name, s: size, l: direct_download_link}
Folder: {n: name, e: [sub_items]}
Raises:
AllDebridError: If request fails
"""
if not magnet_ids:
raise AllDebridError("No magnet IDs provided")
try:
# Build parameter: id[]=123&id[]=456 style
params = {}
for i, magnet_id in enumerate(magnet_ids):
params[f'id[{i}]'] = str(magnet_id)
response = self._request('magnet/files', params)
if response.get('status') == 'success':
data = response.get('data', {})
magnets = data.get('magnets', [])
# Convert list to dict keyed by ID (as string) for easier access
result = {}
for magnet_info in magnets:
magnet_id = magnet_info.get('id')
if magnet_id:
result[str(magnet_id)] = magnet_info
return result
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to get magnet files: {exc}")
def instant_available(self, magnet_hash: str) -> Optional[List[Dict[str, Any]]]:
"""Check if magnet is available for instant streaming without downloading.
AllDebrid's "instant" feature checks if a magnet can be streamed directly
without downloading all the data. Returns available video/audio files.
Args:
magnet_hash: Torrent hash (with or without magnet: prefix)
Returns:
List of available files for streaming, or None if not available
Each file: {n: name, s: size, e: extension, t: type}
Returns empty list if torrent not found or not available
Raises:
AllDebridError: If API request fails
"""
try:
# Parse magnet hash if needed
if magnet_hash.startswith('magnet:'):
# Extract hash from magnet URI
import re
match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
if not match:
return None
hash_value = match.group(1)
else:
hash_value = magnet_hash.strip()
if not hash_value or len(hash_value) < 32:
return None
response = self._request('magnet/instant', {'magnet': hash_value})
if response.get('status') == 'success':
data = response.get('data', {})
# Returns 'files' array if available, or empty
return data.get('files', [])
# Not available is not an error, just return empty list
return []
except AllDebridError:
raise
except Exception as exc:
logger.debug(f"[AllDebrid] instant_available check failed: {exc}")
return None
def magnet_delete(self, magnet_id: int) -> bool:
"""Delete a magnet from the AllDebrid account.
Args:
magnet_id: Magnet ID to delete
Returns:
True if deletion was successful
Raises:
AllDebridError: If deletion fails
"""
if not isinstance(magnet_id, int) or magnet_id <= 0:
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
try:
response = self._request('magnet/delete', {'id': str(magnet_id)})
if response.get('status') == 'success':
return True
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
except AllDebridError:
raise
except Exception as exc:
raise AllDebridError(f"Failed to delete magnet: {exc}")
def _get_cached_supported_hosters(api_key: str) -> Set[str]:
"""Get cached list of supported hoster domains.
Uses AllDebrid API to fetch the list once per hour,
caching the result to avoid repeated API calls.
Args:
api_key: AllDebrid API key
Returns:
Set of supported domain names (lowercased)
"""
global _SUPPORTED_HOSTERS_CACHE, _CACHE_TIMESTAMP
now = time.time()
# Return cached result if still valid
if _SUPPORTED_HOSTERS_CACHE is not None and (now - _CACHE_TIMESTAMP) < _CACHE_DURATION:
return set(_SUPPORTED_HOSTERS_CACHE.keys())
# Fetch fresh list from API
try:
client = AllDebridClient(api_key)
hosters_dict = client.get_supported_hosters()
if hosters_dict:
# API returns: hosts (list), streams (list), redirectors (list)
# Combine all into a single set
all_domains: Set[str] = set()
# Add hosts
if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
all_domains.update(hosters_dict['hosts'])
# Add streams
if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
all_domains.update(hosters_dict['streams'])
# Add redirectors
if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
all_domains.update(hosters_dict['redirectors'])
# Cache as dict for consistency
_SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
_CACHE_TIMESTAMP = now
if all_domains:
debug(f"✓ Cached {len(all_domains)} supported hosters")
return all_domains
except Exception as exc:
log(f"⚠ Failed to fetch supported hosters: {exc}", file=sys.stderr)
# Return any cached hosters even if expired
if _SUPPORTED_HOSTERS_CACHE:
return set(_SUPPORTED_HOSTERS_CACHE.keys())
# Fallback: empty set if no cache available
return set()
def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
"""Check if a URL is from a hoster that AllDebrid can unlock.
Intelligently queries the AllDebrid API to detect if the URL is
from a supported restricted hoster.
Args:
url: URL to check
api_key: AllDebrid API key
Returns:
True if URL is from a supported restrictable hoster
"""
if not url or not api_key:
return False
try:
# Extract domain from URL
parsed = urlparse(url)
domain = parsed.netloc.lower()
# Remove www. prefix for comparison
if domain.startswith('www.'):
domain = domain[4:]
# Get supported hosters (cached)
supported = _get_cached_supported_hosters(api_key)
if not supported:
# API check failed, fall back to manual detection
# Check for common restricted hosters
common_hosters = {
'uploadhaven.com', 'uploaded.to', 'uploaded.net',
'datafile.com', 'rapidfile.io', 'nitroflare.com',
'1fichier.com', 'mega.nz', 'mediafire.com'
}
return any(host in url.lower() for host in common_hosters)
# Check if domain is in supported list
# Need to check exact match and with/without www
return domain in supported or f"www.{domain}" in supported
except Exception as exc:
log(f"⚠ Hoster detection failed: {exc}", file=sys.stderr)
return False
def convert_link_with_debrid(link: str, api_key: str) -> Optional[str]:
"""Convert a restricted link to a direct download URL using AllDebrid.
Args:
link: Restricted link
api_key: AllDebrid API key
Returns:
Direct download URL, or original link if already unrestricted
"""
if not api_key:
return None
try:
client = AllDebridClient(api_key)
direct_link = client.unlock_link(link)
if direct_link and direct_link != link:
debug(f"✓ Converted link: {link[:60]}... → {direct_link[:60]}...")
return direct_link
return None
except AllDebridError as exc:
log(f"⚠ Failed to convert link: {exc}", file=sys.stderr)
return None
except Exception as exc:
log(f"⚠ Unexpected error: {exc}", file=sys.stderr)
return None
def is_magnet_link(uri: str) -> bool:
"""Check if a URI is a magnet link.
Magnet links start with 'magnet:?xt=urn:btih:' or just 'magnet:'
Args:
uri: URI to check
Returns:
True if URI is a magnet link
"""
if not uri:
return False
return uri.lower().startswith('magnet:')
def is_torrent_hash(text: str) -> bool:
"""Check if text looks like a torrent hash (40 or 64 hex characters).
Common formats:
- Info hash v1: 40 hex chars (SHA-1)
- Info hash v2: 64 hex chars (SHA-256)
Args:
text: Text to check
Returns:
True if text matches torrent hash format
"""
if not text or not isinstance(text, str):
return False
text = text.strip()
# Check if it's 40 hex chars (SHA-1) or 64 hex chars (SHA-256)
if len(text) not in (40, 64):
return False
try:
# Try to parse as hex
int(text, 16)
return True
except ValueError:
return False
def is_torrent_file(path: str) -> bool:
"""Check if a file path is a .torrent file.
Args:
path: File path to check
Returns:
True if file has .torrent extension
"""
if not path:
return False
return path.lower().endswith('.torrent')
def parse_magnet_or_hash(uri: str) -> Optional[str]:
"""Parse a magnet URI or hash into a format for AllDebrid API.
AllDebrid's magnet/upload endpoint accepts:
- Full magnet URIs: magnet:?xt=urn:btih:...
- Info hashes: 40 or 64 hex characters
Args:
uri: Magnet URI or hash
Returns:
Normalized input for AllDebrid API, or None if invalid
"""
if not uri:
return None
uri = uri.strip()
# Already a magnet link - just return it
if is_magnet_link(uri):
return uri
# Check if it's a valid hash
if is_torrent_hash(uri):
return uri
# Not a recognized format
return None
# ============================================================================
# Cmdlet: unlock_link
# ============================================================================
def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Unlock a restricted link using AllDebrid.
Converts free hosters and restricted links to direct download url.
Usage:
unlock-link <link>
unlock-link # Uses URL from pipeline result
Requires:
- AllDebrid API key in config under Debrid.All-debrid
Args:
result: Pipeline result object
args: Command arguments
config: Configuration dictionary
Returns:
0 on success, 1 on failure
"""
try:
from .link_utils import (
extract_link,
get_api_key,
add_direct_link_to_result,
)
except ImportError as e:
log(f"Required modules unavailable: {e}", file=sys.stderr)
return 1
# Get link from args or result
link = extract_link(result, args)
if not link:
log("No valid URL provided", file=sys.stderr)
return 1
# Get AllDebrid API key from config
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
if not api_key:
log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr)
return 1
# Try to unlock the link
debug(f"Unlocking: {link}")
direct_link = convert_link_with_debrid(link, api_key)
if direct_link:
debug(f"✓ Direct link: {direct_link}")
# Update result with direct link
add_direct_link_to_result(result, direct_link, link)
# Return the updated result via pipeline context
# Note: The cmdlet wrapper will handle emitting to pipeline
return 0
else:
log(f"❌ Failed to unlock link or already unrestricted", file=sys.stderr)
return 1
# ============================================================================
# Cmdlet Registration
# ============================================================================
def _register_unlock_link():
"""Register unlock-link command with cmdlet registry if available."""
try:
from cmdlets import register
@register(["unlock-link"])
def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Wrapper to make unlock_link_cmdlet available as cmdlet."""
import pipeline as ctx
ret_code = unlock_link_cmdlet(result, args, config)
# If successful, emit the result
if ret_code == 0:
ctx.emit(result)
return ret_code
return unlock_link_wrapper
except ImportError:
# If cmdlets module not available, just return None
return None
# Register when module is imported
_unlock_link_registration = _register_unlock_link()

584
API/archive_client.py Normal file
View File

@@ -0,0 +1,584 @@
"""Archive.org API client for borrowing and downloading books.
This module provides low-level functions for interacting with Archive.org:
- Authentication (login, credential management)
- Borrowing (loan, return_loan)
- Book metadata extraction (get_book_infos, get_book_metadata)
- Image downloading and deobfuscation
- PDF creation with metadata
Used by unified_book_downloader.py for the borrowing workflow.
"""
from __future__ import annotations
import base64
import hashlib
import logging
import os
import re
import sys
import time
from concurrent import futures
from typing import Any, Dict, List, Optional, Sequence, Tuple
import requests
from SYS.logger import log, debug
try:
from Crypto.Cipher import AES # type: ignore
from Crypto.Util import Counter # type: ignore
except ImportError:
AES = None # type: ignore
Counter = None # type: ignore
try:
from tqdm import tqdm # type: ignore
except ImportError:
tqdm = None # type: ignore
def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
"""Get OpenLibrary/Archive.org email and password from config.
Supports both formats:
- New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
- Old: {"Archive": {"email": "...", "password": "..."}}
{"archive_org_email": "...", "archive_org_password": "..."}
Returns: (email, password) tuple, each can be None
"""
if not isinstance(config, dict):
return None, None
# Try new format first
provider_config = config.get("provider", {})
if isinstance(provider_config, dict):
openlibrary_config = provider_config.get("openlibrary", {})
if isinstance(openlibrary_config, dict):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return email, password
# Try old nested format
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return email, password
# Fall back to old flat format
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return email, password
class BookNotAvailableError(Exception):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
pass
def display_error(response: requests.Response, message: str) -> None:
"""Display error and exit."""
log(message, file=sys.stderr)
log(response.text, file=sys.stderr)
sys.exit(1)
def login(email: str, password: str) -> requests.Session:
"""Login to archive.org.
Args:
email: Archive.org email
password: Archive.org password
Returns:
Authenticated requests.Session
Raises:
SystemExit on login failure
"""
session = requests.Session()
session.get("https://archive.org/account/login", timeout=30)
data = {"username": email, "password": password}
response = session.post("https://archive.org/account/login", data=data, timeout=30)
if "bad_login" in response.text:
log("Invalid credentials!", file=sys.stderr)
sys.exit(1)
if "Successful login" in response.text:
debug("Successful login")
return session
display_error(response, "[-] Error while login:")
sys.exit(1) # Unreachable but satisfies type checker
def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
"""Borrow a book from archive.org (14-day loan).
Args:
session: Authenticated requests.Session from login()
book_id: Archive.org book identifier (e.g., 'ia_book_id')
verbose: Whether to log messages
Returns:
Session with active loan
Raises:
SystemExit on loan failure
"""
data = {"action": "grant_access", "identifier": book_id}
response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
debug("Book is not available for borrowing (waitlisted or in use)")
raise BookNotAvailableError("Book is waitlisted or in use")
display_error(response, "Something went wrong when trying to borrow the book.")
except BookNotAvailableError:
raise
except:
display_error(response, "The book cannot be borrowed")
data["action"] = "create_token"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if "token" in response.text:
if verbose:
debug("Successful loan")
return session
display_error(response, "Something went wrong when trying to borrow the book.")
sys.exit(1) # Unreachable but satisfies type checker
def return_loan(session: requests.Session, book_id: str) -> None:
"""Return a borrowed book.
Args:
session: Authenticated requests.Session with active loan
book_id: Archive.org book identifier
"""
data = {"action": "return_loan", "identifier": book_id}
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 200 and response.json()["success"]:
debug("Book returned")
else:
display_error(response, "Something went wrong when trying to return the book")
def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract book information and page links from archive.org viewer.
Args:
session: Authenticated requests.Session
url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
Returns:
Tuple of (title, page_links, metadata)
Raises:
RuntimeError: If page data cannot be extracted
"""
r = session.get(url, timeout=30).text
# Try to extract the infos URL from the response
try:
# Look for the "url" field in the response using regex
# Matches "url":"//archive.org/..."
import re
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
if not match:
raise ValueError("No 'url' field found in response")
url_path = match.group(1)
if url_path.startswith("//"):
infos_url = "https:" + url_path
else:
infos_url = url_path
infos_url = infos_url.replace("\\u0026", "&")
except (IndexError, ValueError, AttributeError) as e:
# If URL extraction fails, raise with better error message
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
response = session.get(infos_url, timeout=30)
data = response.json()["data"]
title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
title = "".join(c for c in title if c not in '<>:"/\\|?*') # Filter forbidden chars
title = title[:150] # Trim to avoid long file names
metadata = data["metadata"]
links = []
# Safely extract page links from brOptions data
try:
br_data = data.get("brOptions", {}).get("data", [])
for item in br_data:
if isinstance(item, list):
for page in item:
if isinstance(page, dict) and "uri" in page:
links.append(page["uri"])
elif isinstance(item, dict) and "uri" in item:
links.append(item["uri"])
except (KeyError, IndexError, TypeError) as e:
log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
# Continue with whatever links we found
if len(links) > 1:
debug(f"Found {len(links)} pages")
return title, links, metadata
elif len(links) == 1:
debug(f"Found {len(links)} page")
return title, links, metadata
else:
log("Error while getting image links - no pages found", file=sys.stderr)
raise RuntimeError("No pages found in book data")
def image_name(pages: int, page: int, directory: str) -> str:
"""Generate image filename for page.
Args:
pages: Total number of pages
page: Current page number (0-indexed)
directory: Directory to save to
Returns:
Full path to image file
"""
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
"""Decrypt obfuscated image data using AES-CTR.
This handles Archive.org's image obfuscation for borrowed books.
Based on: https://github.com/justimm
Args:
image_data: Encrypted image bytes
link: Image URL (used to derive AES key)
obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
Returns:
Decrypted image bytes
"""
if not AES or not Counter:
raise RuntimeError("Crypto library not available")
try:
version, counter_b64 = obf_header.split("|")
except Exception as e:
raise ValueError("Invalid X-Obfuscate header format") from e
if version != "1":
raise ValueError("Unsupported obfuscation version: " + version)
# Derive AES key from URL
aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
key = sha1_digest[:16]
# Decode counter
counter_bytes = base64.b64decode(counter_b64)
if len(counter_bytes) != 16:
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
prefix = counter_bytes[:8]
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
# Create AES-CTR cipher
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
decrypted_part = cipher.decrypt(image_data[:1024])
new_data = decrypted_part + image_data[1024:]
return new_data
def download_one_image(
session: requests.Session,
link: str,
i: int,
directory: str,
book_id: str,
pages: int,
) -> None:
"""Download a single book page image.
Handles obfuscated images and re-borrowing on 403 errors.
Args:
session: Authenticated requests.Session
link: Direct image URL
i: Page index (0-based)
directory: Directory to save to
book_id: Archive.org book ID (for re-borrowing on 403)
pages: Total number of pages
"""
headers = {
"Referer": "https://archive.org/",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
"Sec-Fetch-Site": "same-site",
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Dest": "image",
}
retry = True
response = None
while retry:
try:
response = session.get(link, headers=headers, timeout=30)
if response.status_code == 403:
session = loan(session, book_id, verbose=False)
raise Exception("Borrow again")
if response.status_code == 200:
retry = False
except:
time.sleep(1)
image = image_name(pages, i, directory)
if response is None:
log(f"Failed to download page {i}", file=sys.stderr)
return
obf_header = response.headers.get("X-Obfuscate")
image_content = None
if obf_header:
try:
image_content = deobfuscate_image(response.content, link, obf_header)
except Exception as e:
log(f"Deobfuscation failed: {e}", file=sys.stderr)
return
else:
image_content = response.content
with open(image, "wb") as f:
f.write(image_content)
def download(
session: requests.Session,
n_threads: int,
directory: str,
links: List[str],
scale: int,
book_id: str,
) -> List[str]:
"""Download all book pages as images.
Uses thread pool for parallel downloads.
Args:
session: Authenticated requests.Session
n_threads: Number of download threads
directory: Directory to save images to
links: List of image url
scale: Image resolution (0=highest, 10=lowest)
book_id: Archive.org book ID (for re-borrowing)
Returns:
List of downloaded image file paths
"""
debug("Downloading pages...")
links = [f"{link}&rotate=0&scale={scale}" for link in links]
pages = len(links)
tasks = []
with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
for link in links:
i = links.index(link)
tasks.append(
executor.submit(
download_one_image,
session=session,
link=link,
i=i,
directory=directory,
book_id=book_id,
pages=pages,
)
)
if tqdm:
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
pass
else:
for _ in futures.as_completed(tasks):
pass
images = [image_name(pages, i, directory) for i in range(len(links))]
return images
def check_direct_download(book_id: str) -> Tuple[bool, str]:
"""Check if a book can be downloaded directly without borrowing.
Searches Archive.org metadata for downloadable PDF files.
Args:
book_id: Archive.org book identifier
Returns:
Tuple of (can_download: bool, pdf_url: str)
"""
try:
# First, try to get the metadata to find the actual PDF filename
metadata_url = f"https://archive.org/metadata/{book_id}"
response = requests.get(metadata_url, timeout=10)
response.raise_for_status()
metadata = response.json()
# Find PDF file in files list
if "files" in metadata:
for file_info in metadata["files"]:
filename = file_info.get("name", "")
if filename.endswith(".pdf") and file_info.get("source") == "original":
# Found the original PDF
pdf_filename = filename
pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
# Verify it's accessible
check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
return False, ""
except Exception as e:
log(f"Error checking direct download: {e}", file=sys.stderr)
return False, ""
def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
"""Fetch book data from OpenLibrary using ISBN.
Args:
isbn: ISBN-10 or ISBN-13 to search for
Returns:
Dictionary with book metadata from OpenLibrary
"""
try:
# Try ISBN API first
api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
response = requests.get(api_url, timeout=10)
response.raise_for_status()
data = response.json()
if data:
# Get first result
key = list(data.keys())[0]
return data[key]
return {}
except Exception as e:
log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
return {}
def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
"""Extract ISBN from archive.org metadata.
Looks for ISBN in various metadata fields.
Args:
metadata: Archive.org metadata dictionary
Returns:
ISBN string (clean, no hyphens) or empty string if not found
"""
# Try various common metadata fields
isbn_fields = [
"isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
"isbn-10", "isbn-13", "identifer_isbn"
]
for field in isbn_fields:
if field in metadata:
isbn_val = metadata[field]
if isinstance(isbn_val, list):
isbn_val = isbn_val[0] if isbn_val else None
if isbn_val and isinstance(isbn_val, str):
# Clean ISBN (remove hyphens, spaces)
isbn_clean = isbn_val.replace("-", "").replace(" ", "")
if len(isbn_clean) in [10, 13]:
return isbn_clean
return ""
def normalize_url(url: str) -> str:
"""Convert openlibrary.org URL to archive.org URL.
Looks up the actual Archive.org ID from OpenLibrary API.
Args:
url: Book URL (archive.org or openlibrary.org format)
Returns:
Normalized archive.org URL
"""
url = url.strip()
# Already archive.org format
if url.startswith("https://archive.org/details/"):
return url
# Convert openlibrary.org format by querying the OpenLibrary API
if "openlibrary.org/books/" in url:
try:
# Extract the book ID (e.g., OL6796852M)
parts = url.split("/books/")
if len(parts) > 1:
book_id = parts[1].split("/")[0]
# Query OpenLibrary API to get the book metadata
api_url = f"https://openlibrary.org/books/{book_id}.json"
response = requests.get(api_url, timeout=10)
response.raise_for_status()
data = response.json()
# Look for identifiers including internet_archive or ocaid
# First try ocaid (Open Content Alliance ID) - this is most common
if "ocaid" in data:
ocaid = data["ocaid"]
return f"https://archive.org/details/{ocaid}"
# Check for identifiers object
if "identifiers" in data:
identifiers = data["identifiers"]
# Look for internet_archive ID
if "internet_archive" in identifiers:
ia_ids = identifiers["internet_archive"]
if isinstance(ia_ids, list) and ia_ids:
ia_id = ia_ids[0]
else:
ia_id = ia_ids
return f"https://archive.org/details/{ia_id}"
# If no IA identifier found, use the book ID as fallback
log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
return f"https://archive.org/details/{book_id}"
except requests.RequestException as e:
log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
# Fallback to using the book ID directly
parts = url.split("/books/")
if len(parts) > 1:
book_id = parts[1].split("/")[0]
return f"https://archive.org/details/{book_id}"
except (KeyError, IndexError) as e:
log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
# Fallback to using the book ID directly
parts = url.split("/books/")
if len(parts) > 1:
book_id = parts[1].split("/")[0]
return f"https://archive.org/details/{book_id}"
# Return original if can't parse
return url

2315
API/folder.py Normal file

File diff suppressed because it is too large Load Diff