dfdsf
This commit is contained in:
@@ -1,91 +0,0 @@
|
||||
"""Helper modules for the downlow mpv integration."""
|
||||
from . import hydrus as _hydrus
|
||||
from . import download as _download
|
||||
from . import tasks as _tasks
|
||||
from . import utils as _utils
|
||||
|
||||
try: # Optional dependency on Playwright
|
||||
from . import webshot as _webshot
|
||||
except Exception as exc: # pragma: no cover - surfaced when Playwright is missing
|
||||
_webshot = None # type: ignore
|
||||
ScreenshotError = None # type: ignore[assignment]
|
||||
ScreenshotOptions = None # type: ignore[assignment]
|
||||
ScreenshotResult = None # type: ignore[assignment]
|
||||
capture_screenshot = None # type: ignore[assignment]
|
||||
ScreenshotImportError = exc # type: ignore[assignment]
|
||||
else:
|
||||
ScreenshotError = _webshot.ScreenshotError
|
||||
ScreenshotOptions = _webshot.ScreenshotOptions
|
||||
ScreenshotResult = _webshot.ScreenshotResult
|
||||
capture_screenshot = _webshot.capture_screenshot
|
||||
ScreenshotImportError = None
|
||||
# CBOR utilities
|
||||
decode_cbor = _utils.decode_cbor
|
||||
jsonify = _utils.jsonify
|
||||
# General utilities
|
||||
CHUNK_SIZE = _utils.CHUNK_SIZE
|
||||
ensure_directory = _utils.ensure_directory
|
||||
unique_path = _utils.unique_path
|
||||
download_hydrus_file = _hydrus.download_hydrus_file
|
||||
sanitize_metadata_value = _utils.sanitize_metadata_value
|
||||
unique_preserve_order = _utils.unique_preserve_order
|
||||
sha256_file = _utils.sha256_file
|
||||
create_metadata_sidecar = _utils.create_metadata_sidecar
|
||||
create_tags_sidecar = _utils.create_tags_sidecar
|
||||
# Format utilities
|
||||
format_bytes = _utils.format_bytes
|
||||
format_duration = _utils.format_duration
|
||||
format_timestamp = _utils.format_timestamp
|
||||
format_metadata_value = _utils.format_metadata_value
|
||||
# Link utilities
|
||||
extract_link = _utils.extract_link
|
||||
extract_link_from_args = _utils.extract_link_from_args
|
||||
extract_link_from_result = _utils.extract_link_from_result
|
||||
get_api_key = _utils.get_api_key
|
||||
add_direct_link_to_result = _utils.add_direct_link_to_result
|
||||
# URL policy utilities
|
||||
resolve_url_policy = _utils.resolve_url_policy
|
||||
UrlPolicy = _utils.UrlPolicy
|
||||
# Download utilities
|
||||
DownloadOptions = _download.DownloadOptions
|
||||
DownloadError = _download.DownloadError
|
||||
DownloadMediaResult = _download.DownloadMediaResult
|
||||
is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp
|
||||
probe_url = _download.probe_url
|
||||
# Hydrus utilities
|
||||
hydrus_request = _hydrus.hydrus_request
|
||||
hydrus_export = _hydrus.hydrus_export
|
||||
HydrusClient = _hydrus.HydrusClient
|
||||
HydrusRequestError = _hydrus.HydrusRequestError
|
||||
connect_ipc = _tasks.connect_ipc
|
||||
ipc_sender = _tasks.ipc_sender
|
||||
__all__ = [
|
||||
'decode_cbor',
|
||||
'jsonify',
|
||||
'CHUNK_SIZE',
|
||||
'ensure_directory',
|
||||
'unique_path',
|
||||
'download_hydrus_file',
|
||||
'sanitize_metadata_value',
|
||||
'unique_preserve_order',
|
||||
'sha256_file',
|
||||
'resolve_url_policy',
|
||||
'UrlPolicy',
|
||||
'ScreenshotError',
|
||||
'ScreenshotOptions',
|
||||
'ScreenshotResult',
|
||||
'capture_screenshot',
|
||||
'ScreenshotImportError',
|
||||
'DownloadOptions',
|
||||
'DownloadError',
|
||||
'DownloadMediaResult',
|
||||
'download_media',
|
||||
'is_url_supported_by_ytdlp',
|
||||
'probe_url',
|
||||
'HydrusClient',
|
||||
'HydrusRequestError',
|
||||
'hydrus_request',
|
||||
'hydrus_export',
|
||||
'connect_ipc',
|
||||
'ipc_sender',
|
||||
]
|
||||
@@ -1,130 +0,0 @@
|
||||
{
|
||||
"Occult": [
|
||||
"esoterica",
|
||||
"ritual",
|
||||
"alchemy",
|
||||
"magic",
|
||||
"hermetic",
|
||||
"divination",
|
||||
"grimoires",
|
||||
"symbolism",
|
||||
"ceremony"
|
||||
],
|
||||
"Philosophy": [
|
||||
"ethics",
|
||||
"metaphysics",
|
||||
"epistemology",
|
||||
"logic",
|
||||
"existentialism",
|
||||
"stoicism",
|
||||
"phenomenology",
|
||||
"dialectic",
|
||||
"aesthetics"
|
||||
],
|
||||
"Mystery": [
|
||||
"investigation",
|
||||
"crime",
|
||||
"detective",
|
||||
"noir",
|
||||
"thriller",
|
||||
"suspense",
|
||||
"conspiracy",
|
||||
"whodunit",
|
||||
"clues"
|
||||
],
|
||||
"Religion": [
|
||||
"scripture",
|
||||
"theology",
|
||||
"worship",
|
||||
"ritual",
|
||||
"doctrine",
|
||||
"faith",
|
||||
"tradition",
|
||||
"liturgy",
|
||||
"sacred"
|
||||
],
|
||||
"Mythology": [
|
||||
"gods",
|
||||
"creation",
|
||||
"heroes",
|
||||
"legends",
|
||||
"folklore",
|
||||
"pantheon",
|
||||
"epic",
|
||||
"mythic",
|
||||
"archetype"
|
||||
],
|
||||
"Science": [
|
||||
"research",
|
||||
"experiment",
|
||||
"theory",
|
||||
"biology",
|
||||
"physics",
|
||||
"chemistry",
|
||||
"data",
|
||||
"method",
|
||||
"innovation"
|
||||
],
|
||||
"Art": [
|
||||
"visual",
|
||||
"painting",
|
||||
"sculpture",
|
||||
"modernism",
|
||||
"technique",
|
||||
"studio",
|
||||
"curation",
|
||||
"expression",
|
||||
"composition"
|
||||
],
|
||||
"Literature": [
|
||||
"fiction",
|
||||
"poetry",
|
||||
"novel",
|
||||
"criticism",
|
||||
"narrative",
|
||||
"prose",
|
||||
"drama",
|
||||
"canonical",
|
||||
"translation"
|
||||
],
|
||||
"History": [
|
||||
"archaeology",
|
||||
"chronicle",
|
||||
"period",
|
||||
"empire",
|
||||
"revolution",
|
||||
"archive",
|
||||
"heritage",
|
||||
"historiography",
|
||||
"timeline"
|
||||
],
|
||||
"Psychology": [
|
||||
"cognition",
|
||||
"behavior",
|
||||
"therapy",
|
||||
"development",
|
||||
"neuroscience",
|
||||
"personality",
|
||||
"perception",
|
||||
"emotion",
|
||||
"motivation"
|
||||
],
|
||||
"gnostic": [
|
||||
"religion",
|
||||
"scripture",
|
||||
"gnostic",
|
||||
"gospel",
|
||||
"wisdom",
|
||||
"spirituality",
|
||||
"ancient",
|
||||
"philosophy",
|
||||
"esoteric",
|
||||
"mysticism",
|
||||
"mythology",
|
||||
"theology",
|
||||
"sacred",
|
||||
"divine",
|
||||
"apocrapha",
|
||||
"gnosticism"
|
||||
]
|
||||
}
|
||||
@@ -1,829 +0,0 @@
|
||||
"""AllDebrid API integration for converting free links to direct downloads.
|
||||
|
||||
AllDebrid is a debrid service that unlocks free file hosters and provides direct download links.
|
||||
API docs: https://docs.alldebrid.com/#general-informations
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Set, List, Sequence
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from .http_client import HTTPClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AllDebridError(Exception):
|
||||
"""Raised when AllDebrid API request fails."""
|
||||
pass
|
||||
|
||||
|
||||
# Cache for supported hosters (domain -> host info)
|
||||
_SUPPORTED_HOSTERS_CACHE: Optional[Dict[str, Dict[str, Any]]] = None
|
||||
_CACHE_TIMESTAMP: float = 0
|
||||
_CACHE_DURATION: float = 3600 # 1 hour
|
||||
|
||||
|
||||
class AllDebridClient:
|
||||
"""Client for AllDebrid API."""
|
||||
|
||||
# Try both v4 and v3 APIs
|
||||
BASE_url = [
|
||||
"https://api.alldebrid.com/v4",
|
||||
"https://api.alldebrid.com/v3",
|
||||
]
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
"""Initialize AllDebrid client with API key.
|
||||
|
||||
Args:
|
||||
api_key: AllDebrid API key from config
|
||||
"""
|
||||
self.api_key = api_key.strip()
|
||||
if not self.api_key:
|
||||
raise AllDebridError("AllDebrid API key is empty")
|
||||
self.base_url = self.BASE_url[0] # Start with v4
|
||||
|
||||
def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
||||
"""Make a request to AllDebrid API.
|
||||
|
||||
Args:
|
||||
endpoint: API endpoint (e.g., "user/profile", "link/unlock")
|
||||
params: Query parameters
|
||||
|
||||
Returns:
|
||||
Parsed JSON response
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails or API returns error
|
||||
"""
|
||||
if params is None:
|
||||
params = {}
|
||||
|
||||
# Add API key to params
|
||||
params['apikey'] = self.api_key
|
||||
|
||||
url = f"{self.base_url}/{endpoint}"
|
||||
query_string = urlencode(params)
|
||||
full_url = f"{url}?{query_string}"
|
||||
|
||||
logger.debug(f"[AllDebrid] {endpoint} request to {full_url[:80]}...")
|
||||
|
||||
try:
|
||||
# Pass timeout to HTTPClient init, not to get()
|
||||
with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client:
|
||||
try:
|
||||
response = client.get(full_url)
|
||||
response.raise_for_status()
|
||||
except Exception as req_err:
|
||||
# Log detailed error info
|
||||
logger.error(f"[AllDebrid] Request error to {full_url[:80]}: {req_err}", exc_info=True)
|
||||
if hasattr(req_err, 'response') and req_err.response is not None: # type: ignore
|
||||
try:
|
||||
error_body = req_err.response.content.decode('utf-8') # type: ignore
|
||||
logger.error(f"[AllDebrid] Response body: {error_body[:200]}")
|
||||
except:
|
||||
pass
|
||||
raise
|
||||
|
||||
data = json.loads(response.content.decode('utf-8'))
|
||||
logger.debug(f"[AllDebrid] Response status: {response.status_code}")
|
||||
|
||||
# Check for API errors
|
||||
if data.get('status') == 'error':
|
||||
error_msg = data.get('error', {}).get('message', 'Unknown error')
|
||||
logger.error(f"[AllDebrid] API error: {error_msg}")
|
||||
raise AllDebridError(f"AllDebrid API error: {error_msg}")
|
||||
|
||||
return data
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
error_msg = f"AllDebrid request failed: {exc}"
|
||||
logger.error(f"[AllDebrid] {error_msg}", exc_info=True)
|
||||
raise AllDebridError(error_msg)
|
||||
|
||||
def unlock_link(self, link: str) -> Optional[str]:
|
||||
"""Unlock a restricted link and get direct download URL.
|
||||
|
||||
Args:
|
||||
link: Restricted link to unlock
|
||||
|
||||
Returns:
|
||||
Direct download URL, or None if already unrestricted
|
||||
|
||||
Raises:
|
||||
AllDebridError: If unlock fails
|
||||
"""
|
||||
if not link.startswith(('http://', 'https://')):
|
||||
raise AllDebridError(f"Invalid URL: {link}")
|
||||
|
||||
try:
|
||||
response = self._request('link/unlock', {'link': link})
|
||||
|
||||
# Check if unlock was successful
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
|
||||
# AllDebrid returns the download info in 'link' field
|
||||
if 'link' in data:
|
||||
return data['link']
|
||||
|
||||
# Alternative: check for 'file' field
|
||||
if 'file' in data:
|
||||
return data['file']
|
||||
|
||||
# If no direct link, return the input link
|
||||
return link
|
||||
|
||||
return None
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to unlock link: {exc}")
|
||||
|
||||
def check_host(self, hostname: str) -> Dict[str, Any]:
|
||||
"""Check if a host is supported by AllDebrid.
|
||||
|
||||
Args:
|
||||
hostname: Hostname to check (e.g., "uploadhaven.com")
|
||||
|
||||
Returns:
|
||||
Host information dict with support status
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails
|
||||
"""
|
||||
try:
|
||||
response = self._request('host', {'name': hostname})
|
||||
|
||||
if response.get('status') == 'success':
|
||||
return response.get('data', {})
|
||||
|
||||
return {}
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to check host: {exc}")
|
||||
|
||||
def get_user_info(self) -> Dict[str, Any]:
|
||||
"""Get current user account information.
|
||||
|
||||
Returns:
|
||||
User information dict
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails
|
||||
"""
|
||||
try:
|
||||
response = self._request('user/profile')
|
||||
|
||||
if response.get('status') == 'success':
|
||||
return response.get('data', {})
|
||||
|
||||
return {}
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to get user info: {exc}")
|
||||
|
||||
def get_supported_hosters(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Get list of all supported hosters from AllDebrid API.
|
||||
|
||||
Returns:
|
||||
Dict mapping domain to host info (status, name, etc)
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails
|
||||
"""
|
||||
try:
|
||||
response = self._request('hosts/domains')
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
# The API returns hosts keyed by domain
|
||||
return data if isinstance(data, dict) else {}
|
||||
|
||||
return {}
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to get supported hosters: {exc}")
|
||||
|
||||
def magnet_add(self, magnet_uri: str) -> Dict[str, Any]:
|
||||
"""Submit a magnet link or torrent hash to AllDebrid for processing.
|
||||
|
||||
AllDebrid will download the torrent content and store it in the account.
|
||||
Processing time varies based on torrent size and availability.
|
||||
|
||||
Args:
|
||||
magnet_uri: Magnet URI (magnet:?xt=urn:btih:...) or torrent hash
|
||||
|
||||
Returns:
|
||||
Dict with magnet info:
|
||||
- id: Magnet ID (int) - needed for status checks
|
||||
- name: Torrent name
|
||||
- hash: Torrent hash
|
||||
- size: Total file size (bytes)
|
||||
- ready: Boolean - True if already available
|
||||
|
||||
Raises:
|
||||
AllDebridError: If submit fails (requires premium, invalid magnet, etc)
|
||||
"""
|
||||
if not magnet_uri:
|
||||
raise AllDebridError("Magnet URI is empty")
|
||||
|
||||
try:
|
||||
# API endpoint: POST /v4/magnet/upload
|
||||
# Format: /magnet/upload?apikey=key&magnets[]=magnet:?xt=...
|
||||
response = self._request('magnet/upload', {'magnets[]': magnet_uri})
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
magnets = data.get('magnets', [])
|
||||
|
||||
if magnets and len(magnets) > 0:
|
||||
magnet_info = magnets[0]
|
||||
|
||||
# Check for errors in the magnet response
|
||||
if 'error' in magnet_info:
|
||||
error = magnet_info['error']
|
||||
error_msg = error.get('message', 'Unknown error')
|
||||
raise AllDebridError(f"Magnet error: {error_msg}")
|
||||
|
||||
return magnet_info
|
||||
|
||||
raise AllDebridError("No magnet data in response")
|
||||
|
||||
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to submit magnet: {exc}")
|
||||
|
||||
def magnet_status(self, magnet_id: int, include_files: bool = False) -> Dict[str, Any]:
|
||||
"""Get status of a magnet currently being processed or stored.
|
||||
|
||||
Status codes:
|
||||
0-3: Processing (in queue, downloading, compressing, uploading)
|
||||
4: Ready (files available for download)
|
||||
5-15: Error (upload failed, not downloaded in 20min, too big, etc)
|
||||
|
||||
Args:
|
||||
magnet_id: Magnet ID from magnet_add()
|
||||
include_files: If True, includes file list in response
|
||||
|
||||
Returns:
|
||||
Dict with status info:
|
||||
- id: Magnet ID
|
||||
- filename: Torrent name
|
||||
- size: Total size (bytes)
|
||||
- status: Human-readable status
|
||||
- statusCode: Numeric code (0-15)
|
||||
- downloaded: Bytes downloaded so far
|
||||
- uploaded: Bytes uploaded so far
|
||||
- seeders: Number of seeders
|
||||
- downloadSpeed: Current speed (bytes/sec)
|
||||
- uploadSpeed: Current speed (bytes/sec)
|
||||
- files: (optional) Array of file objects when include_files=True
|
||||
Each file: {n: name, s: size, l: download_link}
|
||||
|
||||
Raises:
|
||||
AllDebridError: If status check fails
|
||||
"""
|
||||
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||
|
||||
try:
|
||||
# Use v4.1 endpoint for better response format
|
||||
# Temporarily override base_url for this request
|
||||
old_base = self.base_url
|
||||
self.base_url = "https://api.alldebrid.com/v4.1"
|
||||
|
||||
try:
|
||||
response = self._request('magnet/status', {'id': str(magnet_id)})
|
||||
finally:
|
||||
self.base_url = old_base
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
magnets = data.get('magnets', {})
|
||||
|
||||
# Handle both list and dict responses
|
||||
if isinstance(magnets, list) and len(magnets) > 0:
|
||||
return magnets[0]
|
||||
elif isinstance(magnets, dict) and magnets:
|
||||
return magnets
|
||||
|
||||
raise AllDebridError(f"No magnet found with ID {magnet_id}")
|
||||
|
||||
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to get magnet status: {exc}")
|
||||
|
||||
def magnet_status_live(self, magnet_id: int, session: int = None, counter: int = 0) -> Dict[str, Any]:
|
||||
"""Get live status of a magnet using delta sync mode.
|
||||
|
||||
The live mode endpoint provides real-time progress by only sending
|
||||
deltas (changed fields) instead of full status on each call. This
|
||||
reduces bandwidth and server load compared to regular polling.
|
||||
|
||||
Note: The "live" designation refers to the delta-sync mode where you
|
||||
maintain state locally and apply diffs from the API, not a streaming
|
||||
endpoint. Regular magnet_status() polling is simpler for single magnets.
|
||||
|
||||
Docs: https://docs.alldebrid.com/#get-status-live-mode
|
||||
|
||||
Args:
|
||||
magnet_id: Magnet ID from magnet_add()
|
||||
session: Session ID (use same ID across multiple calls). If None, will query current status
|
||||
counter: Counter value from previous response (starts at 0)
|
||||
|
||||
Returns:
|
||||
Dict with magnet status. May contain only changed fields if counter > 0.
|
||||
For single-magnet tracking, use magnet_status() instead.
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails
|
||||
"""
|
||||
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||
|
||||
try:
|
||||
# For single magnet queries, just use regular endpoint with ID
|
||||
# The "live mode" with session/counter is for multi-magnet dashboards
|
||||
# where bandwidth savings from diffs matter
|
||||
response = self._request('magnet/status', {'id': magnet_id})
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
magnets = data.get('magnets', [])
|
||||
|
||||
# Handle list response
|
||||
if isinstance(magnets, list) and len(magnets) > 0:
|
||||
return magnets[0]
|
||||
|
||||
raise AllDebridError(f"No magnet found with ID {magnet_id}")
|
||||
|
||||
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to get magnet live status: {exc}")
|
||||
|
||||
def magnet_links(self, magnet_ids: list) -> Dict[str, Any]:
|
||||
"""Get files and download links for one or more magnets.
|
||||
|
||||
Use this after magnet_status shows statusCode == 4 (Ready).
|
||||
Returns the file tree structure with direct download links.
|
||||
|
||||
Args:
|
||||
magnet_ids: List of magnet IDs to get files for
|
||||
|
||||
Returns:
|
||||
Dict mapping magnet_id (as string) -> magnet_info:
|
||||
- id: Magnet ID
|
||||
- files: Array of file/folder objects
|
||||
File: {n: name, s: size, l: direct_download_link}
|
||||
Folder: {n: name, e: [sub_items]}
|
||||
|
||||
Raises:
|
||||
AllDebridError: If request fails
|
||||
"""
|
||||
if not magnet_ids:
|
||||
raise AllDebridError("No magnet IDs provided")
|
||||
|
||||
try:
|
||||
# Build parameter: id[]=123&id[]=456 style
|
||||
params = {}
|
||||
for i, magnet_id in enumerate(magnet_ids):
|
||||
params[f'id[{i}]'] = str(magnet_id)
|
||||
|
||||
response = self._request('magnet/files', params)
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
magnets = data.get('magnets', [])
|
||||
|
||||
# Convert list to dict keyed by ID (as string) for easier access
|
||||
result = {}
|
||||
for magnet_info in magnets:
|
||||
magnet_id = magnet_info.get('id')
|
||||
if magnet_id:
|
||||
result[str(magnet_id)] = magnet_info
|
||||
|
||||
return result
|
||||
|
||||
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to get magnet files: {exc}")
|
||||
|
||||
def instant_available(self, magnet_hash: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Check if magnet is available for instant streaming without downloading.
|
||||
|
||||
AllDebrid's "instant" feature checks if a magnet can be streamed directly
|
||||
without downloading all the data. Returns available video/audio files.
|
||||
|
||||
Args:
|
||||
magnet_hash: Torrent hash (with or without magnet: prefix)
|
||||
|
||||
Returns:
|
||||
List of available files for streaming, or None if not available
|
||||
Each file: {n: name, s: size, e: extension, t: type}
|
||||
Returns empty list if torrent not found or not available
|
||||
|
||||
Raises:
|
||||
AllDebridError: If API request fails
|
||||
"""
|
||||
try:
|
||||
# Parse magnet hash if needed
|
||||
if magnet_hash.startswith('magnet:'):
|
||||
# Extract hash from magnet URI
|
||||
import re
|
||||
match = re.search(r'xt=urn:btih:([a-fA-F0-9]+)', magnet_hash)
|
||||
if not match:
|
||||
return None
|
||||
hash_value = match.group(1)
|
||||
else:
|
||||
hash_value = magnet_hash.strip()
|
||||
|
||||
if not hash_value or len(hash_value) < 32:
|
||||
return None
|
||||
|
||||
response = self._request('magnet/instant', {'magnet': hash_value})
|
||||
|
||||
if response.get('status') == 'success':
|
||||
data = response.get('data', {})
|
||||
# Returns 'files' array if available, or empty
|
||||
return data.get('files', [])
|
||||
|
||||
# Not available is not an error, just return empty list
|
||||
return []
|
||||
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug(f"[AllDebrid] instant_available check failed: {exc}")
|
||||
return None
|
||||
|
||||
def magnet_delete(self, magnet_id: int) -> bool:
|
||||
"""Delete a magnet from the AllDebrid account.
|
||||
|
||||
Args:
|
||||
magnet_id: Magnet ID to delete
|
||||
|
||||
Returns:
|
||||
True if deletion was successful
|
||||
|
||||
Raises:
|
||||
AllDebridError: If deletion fails
|
||||
"""
|
||||
if not isinstance(magnet_id, int) or magnet_id <= 0:
|
||||
raise AllDebridError(f"Invalid magnet ID: {magnet_id}")
|
||||
|
||||
try:
|
||||
response = self._request('magnet/delete', {'id': str(magnet_id)})
|
||||
|
||||
if response.get('status') == 'success':
|
||||
return True
|
||||
|
||||
raise AllDebridError(f"API error: {response.get('error', 'Unknown')}")
|
||||
except AllDebridError:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise AllDebridError(f"Failed to delete magnet: {exc}")
|
||||
|
||||
|
||||
def _get_cached_supported_hosters(api_key: str) -> Set[str]:
|
||||
"""Get cached list of supported hoster domains.
|
||||
|
||||
Uses AllDebrid API to fetch the list once per hour,
|
||||
caching the result to avoid repeated API calls.
|
||||
|
||||
Args:
|
||||
api_key: AllDebrid API key
|
||||
|
||||
Returns:
|
||||
Set of supported domain names (lowercased)
|
||||
"""
|
||||
global _SUPPORTED_HOSTERS_CACHE, _CACHE_TIMESTAMP
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Return cached result if still valid
|
||||
if _SUPPORTED_HOSTERS_CACHE is not None and (now - _CACHE_TIMESTAMP) < _CACHE_DURATION:
|
||||
return set(_SUPPORTED_HOSTERS_CACHE.keys())
|
||||
|
||||
# Fetch fresh list from API
|
||||
try:
|
||||
client = AllDebridClient(api_key)
|
||||
hosters_dict = client.get_supported_hosters()
|
||||
|
||||
if hosters_dict:
|
||||
# API returns: hosts (list), streams (list), redirectors (list)
|
||||
# Combine all into a single set
|
||||
all_domains: Set[str] = set()
|
||||
|
||||
# Add hosts
|
||||
if 'hosts' in hosters_dict and isinstance(hosters_dict['hosts'], list):
|
||||
all_domains.update(hosters_dict['hosts'])
|
||||
|
||||
# Add streams
|
||||
if 'streams' in hosters_dict and isinstance(hosters_dict['streams'], list):
|
||||
all_domains.update(hosters_dict['streams'])
|
||||
|
||||
# Add redirectors
|
||||
if 'redirectors' in hosters_dict and isinstance(hosters_dict['redirectors'], list):
|
||||
all_domains.update(hosters_dict['redirectors'])
|
||||
|
||||
# Cache as dict for consistency
|
||||
_SUPPORTED_HOSTERS_CACHE = {domain: {} for domain in all_domains}
|
||||
_CACHE_TIMESTAMP = now
|
||||
|
||||
if all_domains:
|
||||
debug(f"✓ Cached {len(all_domains)} supported hosters")
|
||||
|
||||
return all_domains
|
||||
except Exception as exc:
|
||||
log(f"⚠ Failed to fetch supported hosters: {exc}", file=sys.stderr)
|
||||
# Return any cached hosters even if expired
|
||||
if _SUPPORTED_HOSTERS_CACHE:
|
||||
return set(_SUPPORTED_HOSTERS_CACHE.keys())
|
||||
|
||||
# Fallback: empty set if no cache available
|
||||
return set()
|
||||
|
||||
|
||||
def is_link_restrictable_hoster(url: str, api_key: str) -> bool:
|
||||
"""Check if a URL is from a hoster that AllDebrid can unlock.
|
||||
|
||||
Intelligently queries the AllDebrid API to detect if the URL is
|
||||
from a supported restricted hoster.
|
||||
|
||||
Args:
|
||||
url: URL to check
|
||||
api_key: AllDebrid API key
|
||||
|
||||
Returns:
|
||||
True if URL is from a supported restrictable hoster
|
||||
"""
|
||||
if not url or not api_key:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Extract domain from URL
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
|
||||
# Remove www. prefix for comparison
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
|
||||
# Get supported hosters (cached)
|
||||
supported = _get_cached_supported_hosters(api_key)
|
||||
|
||||
if not supported:
|
||||
# API check failed, fall back to manual detection
|
||||
# Check for common restricted hosters
|
||||
common_hosters = {
|
||||
'uploadhaven.com', 'uploaded.to', 'uploaded.net',
|
||||
'datafile.com', 'rapidfile.io', 'nitroflare.com',
|
||||
'1fichier.com', 'mega.nz', 'mediafire.com'
|
||||
}
|
||||
return any(host in url.lower() for host in common_hosters)
|
||||
|
||||
# Check if domain is in supported list
|
||||
# Need to check exact match and with/without www
|
||||
return domain in supported or f"www.{domain}" in supported
|
||||
except Exception as exc:
|
||||
log(f"⚠ Hoster detection failed: {exc}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def convert_link_with_debrid(link: str, api_key: str) -> Optional[str]:
|
||||
"""Convert a restricted link to a direct download URL using AllDebrid.
|
||||
|
||||
Args:
|
||||
link: Restricted link
|
||||
api_key: AllDebrid API key
|
||||
|
||||
Returns:
|
||||
Direct download URL, or original link if already unrestricted
|
||||
"""
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
try:
|
||||
client = AllDebridClient(api_key)
|
||||
direct_link = client.unlock_link(link)
|
||||
|
||||
if direct_link and direct_link != link:
|
||||
debug(f"✓ Converted link: {link[:60]}... → {direct_link[:60]}...")
|
||||
return direct_link
|
||||
|
||||
return None
|
||||
except AllDebridError as exc:
|
||||
log(f"⚠ Failed to convert link: {exc}", file=sys.stderr)
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"⚠ Unexpected error: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def is_magnet_link(uri: str) -> bool:
|
||||
"""Check if a URI is a magnet link.
|
||||
|
||||
Magnet links start with 'magnet:?xt=urn:btih:' or just 'magnet:'
|
||||
|
||||
Args:
|
||||
uri: URI to check
|
||||
|
||||
Returns:
|
||||
True if URI is a magnet link
|
||||
"""
|
||||
if not uri:
|
||||
return False
|
||||
return uri.lower().startswith('magnet:')
|
||||
|
||||
|
||||
def is_torrent_hash(text: str) -> bool:
|
||||
"""Check if text looks like a torrent hash (40 or 64 hex characters).
|
||||
|
||||
Common formats:
|
||||
- Info hash v1: 40 hex chars (SHA-1)
|
||||
- Info hash v2: 64 hex chars (SHA-256)
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
|
||||
Returns:
|
||||
True if text matches torrent hash format
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return False
|
||||
|
||||
text = text.strip()
|
||||
|
||||
# Check if it's 40 hex chars (SHA-1) or 64 hex chars (SHA-256)
|
||||
if len(text) not in (40, 64):
|
||||
return False
|
||||
|
||||
try:
|
||||
# Try to parse as hex
|
||||
int(text, 16)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def is_torrent_file(path: str) -> bool:
|
||||
"""Check if a file path is a .torrent file.
|
||||
|
||||
Args:
|
||||
path: File path to check
|
||||
|
||||
Returns:
|
||||
True if file has .torrent extension
|
||||
"""
|
||||
if not path:
|
||||
return False
|
||||
return path.lower().endswith('.torrent')
|
||||
|
||||
|
||||
def parse_magnet_or_hash(uri: str) -> Optional[str]:
|
||||
"""Parse a magnet URI or hash into a format for AllDebrid API.
|
||||
|
||||
AllDebrid's magnet/upload endpoint accepts:
|
||||
- Full magnet URIs: magnet:?xt=urn:btih:...
|
||||
- Info hashes: 40 or 64 hex characters
|
||||
|
||||
Args:
|
||||
uri: Magnet URI or hash
|
||||
|
||||
Returns:
|
||||
Normalized input for AllDebrid API, or None if invalid
|
||||
"""
|
||||
if not uri:
|
||||
return None
|
||||
|
||||
uri = uri.strip()
|
||||
|
||||
# Already a magnet link - just return it
|
||||
if is_magnet_link(uri):
|
||||
return uri
|
||||
|
||||
# Check if it's a valid hash
|
||||
if is_torrent_hash(uri):
|
||||
return uri
|
||||
|
||||
# Not a recognized format
|
||||
return None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Cmdlet: unlock_link
|
||||
# ============================================================================
|
||||
|
||||
def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Unlock a restricted link using AllDebrid.
|
||||
|
||||
Converts free hosters and restricted links to direct download url.
|
||||
|
||||
Usage:
|
||||
unlock-link <link>
|
||||
unlock-link # Uses URL from pipeline result
|
||||
|
||||
Requires:
|
||||
- AllDebrid API key in config under Debrid.All-debrid
|
||||
|
||||
Args:
|
||||
result: Pipeline result object
|
||||
args: Command arguments
|
||||
config: Configuration dictionary
|
||||
|
||||
Returns:
|
||||
0 on success, 1 on failure
|
||||
"""
|
||||
try:
|
||||
from .link_utils import (
|
||||
extract_link,
|
||||
get_api_key,
|
||||
add_direct_link_to_result,
|
||||
)
|
||||
except ImportError as e:
|
||||
log(f"Required modules unavailable: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get link from args or result
|
||||
link = extract_link(result, args)
|
||||
|
||||
if not link:
|
||||
log("No valid URL provided", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get AllDebrid API key from config
|
||||
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
|
||||
|
||||
if not api_key:
|
||||
log("AllDebrid API key not configured in Debrid.All-debrid", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Try to unlock the link
|
||||
debug(f"Unlocking: {link}")
|
||||
direct_link = convert_link_with_debrid(link, api_key)
|
||||
|
||||
if direct_link:
|
||||
debug(f"✓ Direct link: {direct_link}")
|
||||
|
||||
# Update result with direct link
|
||||
add_direct_link_to_result(result, direct_link, link)
|
||||
|
||||
# Return the updated result via pipeline context
|
||||
# Note: The cmdlet wrapper will handle emitting to pipeline
|
||||
return 0
|
||||
else:
|
||||
log(f"❌ Failed to unlock link or already unrestricted", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Cmdlet Registration
|
||||
# ============================================================================
|
||||
|
||||
def _register_unlock_link():
|
||||
"""Register unlock-link command with cmdlet registry if available."""
|
||||
try:
|
||||
from cmdlets import register
|
||||
|
||||
@register(["unlock-link"])
|
||||
def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Wrapper to make unlock_link_cmdlet available as cmdlet."""
|
||||
import pipeline as ctx
|
||||
|
||||
ret_code = unlock_link_cmdlet(result, args, config)
|
||||
|
||||
# If successful, emit the result
|
||||
if ret_code == 0:
|
||||
ctx.emit(result)
|
||||
|
||||
return ret_code
|
||||
|
||||
return unlock_link_wrapper
|
||||
except ImportError:
|
||||
# If cmdlets module not available, just return None
|
||||
return None
|
||||
|
||||
|
||||
# Register when module is imported
|
||||
_unlock_link_registration = _register_unlock_link()
|
||||
@@ -1,584 +0,0 @@
|
||||
"""Archive.org API client for borrowing and downloading books.
|
||||
|
||||
This module provides low-level functions for interacting with Archive.org:
|
||||
- Authentication (login, credential management)
|
||||
- Borrowing (loan, return_loan)
|
||||
- Book metadata extraction (get_book_infos, get_book_metadata)
|
||||
- Image downloading and deobfuscation
|
||||
- PDF creation with metadata
|
||||
|
||||
Used by unified_book_downloader.py for the borrowing workflow.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from concurrent import futures
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
try:
|
||||
from Crypto.Cipher import AES # type: ignore
|
||||
from Crypto.Util import Counter # type: ignore
|
||||
except ImportError:
|
||||
AES = None # type: ignore
|
||||
Counter = None # type: ignore
|
||||
|
||||
try:
|
||||
from tqdm import tqdm # type: ignore
|
||||
except ImportError:
|
||||
tqdm = None # type: ignore
|
||||
|
||||
|
||||
def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Get OpenLibrary/Archive.org email and password from config.
|
||||
|
||||
Supports both formats:
|
||||
- New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
|
||||
- Old: {"Archive": {"email": "...", "password": "..."}}
|
||||
{"archive_org_email": "...", "archive_org_password": "..."}
|
||||
|
||||
Returns: (email, password) tuple, each can be None
|
||||
"""
|
||||
if not isinstance(config, dict):
|
||||
return None, None
|
||||
|
||||
# Try new format first
|
||||
provider_config = config.get("provider", {})
|
||||
if isinstance(provider_config, dict):
|
||||
openlibrary_config = provider_config.get("openlibrary", {})
|
||||
if isinstance(openlibrary_config, dict):
|
||||
email = openlibrary_config.get("email")
|
||||
password = openlibrary_config.get("password")
|
||||
if email or password:
|
||||
return email, password
|
||||
|
||||
# Try old nested format
|
||||
archive_config = config.get("Archive")
|
||||
if isinstance(archive_config, dict):
|
||||
email = archive_config.get("email")
|
||||
password = archive_config.get("password")
|
||||
if email or password:
|
||||
return email, password
|
||||
|
||||
# Fall back to old flat format
|
||||
email = config.get("archive_org_email")
|
||||
password = config.get("archive_org_password")
|
||||
return email, password
|
||||
|
||||
|
||||
class BookNotAvailableError(Exception):
|
||||
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||
pass
|
||||
|
||||
|
||||
def display_error(response: requests.Response, message: str) -> None:
|
||||
"""Display error and exit."""
|
||||
log(message, file=sys.stderr)
|
||||
log(response.text, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def login(email: str, password: str) -> requests.Session:
|
||||
"""Login to archive.org.
|
||||
|
||||
Args:
|
||||
email: Archive.org email
|
||||
password: Archive.org password
|
||||
|
||||
Returns:
|
||||
Authenticated requests.Session
|
||||
|
||||
Raises:
|
||||
SystemExit on login failure
|
||||
"""
|
||||
session = requests.Session()
|
||||
session.get("https://archive.org/account/login", timeout=30)
|
||||
|
||||
data = {"username": email, "password": password}
|
||||
response = session.post("https://archive.org/account/login", data=data, timeout=30)
|
||||
|
||||
if "bad_login" in response.text:
|
||||
log("Invalid credentials!", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if "Successful login" in response.text:
|
||||
debug("Successful login")
|
||||
return session
|
||||
display_error(response, "[-] Error while login:")
|
||||
sys.exit(1) # Unreachable but satisfies type checker
|
||||
|
||||
|
||||
def loan(session: requests.Session, book_id: str, verbose: bool = True) -> requests.Session:
|
||||
"""Borrow a book from archive.org (14-day loan).
|
||||
|
||||
Args:
|
||||
session: Authenticated requests.Session from login()
|
||||
book_id: Archive.org book identifier (e.g., 'ia_book_id')
|
||||
verbose: Whether to log messages
|
||||
|
||||
Returns:
|
||||
Session with active loan
|
||||
|
||||
Raises:
|
||||
SystemExit on loan failure
|
||||
"""
|
||||
data = {"action": "grant_access", "identifier": book_id}
|
||||
response = session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
|
||||
data["action"] = "browse_book"
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
|
||||
if response.status_code == 400:
|
||||
try:
|
||||
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
|
||||
debug("Book is not available for borrowing (waitlisted or in use)")
|
||||
raise BookNotAvailableError("Book is waitlisted or in use")
|
||||
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||
except BookNotAvailableError:
|
||||
raise
|
||||
except:
|
||||
display_error(response, "The book cannot be borrowed")
|
||||
|
||||
data["action"] = "create_token"
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
|
||||
if "token" in response.text:
|
||||
if verbose:
|
||||
debug("Successful loan")
|
||||
return session
|
||||
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||
sys.exit(1) # Unreachable but satisfies type checker
|
||||
|
||||
|
||||
def return_loan(session: requests.Session, book_id: str) -> None:
|
||||
"""Return a borrowed book.
|
||||
|
||||
Args:
|
||||
session: Authenticated requests.Session with active loan
|
||||
book_id: Archive.org book identifier
|
||||
"""
|
||||
data = {"action": "return_loan", "identifier": book_id}
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
if response.status_code == 200 and response.json()["success"]:
|
||||
debug("Book returned")
|
||||
else:
|
||||
display_error(response, "Something went wrong when trying to return the book")
|
||||
|
||||
|
||||
def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
|
||||
"""Extract book information and page links from archive.org viewer.
|
||||
|
||||
Args:
|
||||
session: Authenticated requests.Session
|
||||
url: Book URL (e.g., https://archive.org/borrow/book_id or /details/book_id)
|
||||
|
||||
Returns:
|
||||
Tuple of (title, page_links, metadata)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If page data cannot be extracted
|
||||
"""
|
||||
r = session.get(url, timeout=30).text
|
||||
|
||||
# Try to extract the infos URL from the response
|
||||
try:
|
||||
# Look for the "url" field in the response using regex
|
||||
# Matches "url":"//archive.org/..."
|
||||
import re
|
||||
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
|
||||
if not match:
|
||||
raise ValueError("No 'url' field found in response")
|
||||
|
||||
url_path = match.group(1)
|
||||
if url_path.startswith("//"):
|
||||
infos_url = "https:" + url_path
|
||||
else:
|
||||
infos_url = url_path
|
||||
|
||||
infos_url = infos_url.replace("\\u0026", "&")
|
||||
except (IndexError, ValueError, AttributeError) as e:
|
||||
# If URL extraction fails, raise with better error message
|
||||
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
|
||||
|
||||
response = session.get(infos_url, timeout=30)
|
||||
data = response.json()["data"]
|
||||
title = data["brOptions"]["bookTitle"].strip().replace(" ", "_")
|
||||
title = "".join(c for c in title if c not in '<>:"/\\|?*') # Filter forbidden chars
|
||||
title = title[:150] # Trim to avoid long file names
|
||||
metadata = data["metadata"]
|
||||
links = []
|
||||
|
||||
# Safely extract page links from brOptions data
|
||||
try:
|
||||
br_data = data.get("brOptions", {}).get("data", [])
|
||||
for item in br_data:
|
||||
if isinstance(item, list):
|
||||
for page in item:
|
||||
if isinstance(page, dict) and "uri" in page:
|
||||
links.append(page["uri"])
|
||||
elif isinstance(item, dict) and "uri" in item:
|
||||
links.append(item["uri"])
|
||||
except (KeyError, IndexError, TypeError) as e:
|
||||
log(f"Warning: Error parsing page links: {e}", file=sys.stderr)
|
||||
# Continue with whatever links we found
|
||||
|
||||
if len(links) > 1:
|
||||
debug(f"Found {len(links)} pages")
|
||||
return title, links, metadata
|
||||
elif len(links) == 1:
|
||||
debug(f"Found {len(links)} page")
|
||||
return title, links, metadata
|
||||
else:
|
||||
log("Error while getting image links - no pages found", file=sys.stderr)
|
||||
raise RuntimeError("No pages found in book data")
|
||||
|
||||
|
||||
def image_name(pages: int, page: int, directory: str) -> str:
|
||||
"""Generate image filename for page.
|
||||
|
||||
Args:
|
||||
pages: Total number of pages
|
||||
page: Current page number (0-indexed)
|
||||
directory: Directory to save to
|
||||
|
||||
Returns:
|
||||
Full path to image file
|
||||
"""
|
||||
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
|
||||
|
||||
|
||||
def deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
|
||||
"""Decrypt obfuscated image data using AES-CTR.
|
||||
|
||||
This handles Archive.org's image obfuscation for borrowed books.
|
||||
Based on: https://github.com/justimm
|
||||
|
||||
Args:
|
||||
image_data: Encrypted image bytes
|
||||
link: Image URL (used to derive AES key)
|
||||
obf_header: X-Obfuscate header value (format: "1|BASE64_COUNTER")
|
||||
|
||||
Returns:
|
||||
Decrypted image bytes
|
||||
"""
|
||||
if not AES or not Counter:
|
||||
raise RuntimeError("Crypto library not available")
|
||||
|
||||
try:
|
||||
version, counter_b64 = obf_header.split("|")
|
||||
except Exception as e:
|
||||
raise ValueError("Invalid X-Obfuscate header format") from e
|
||||
|
||||
if version != "1":
|
||||
raise ValueError("Unsupported obfuscation version: " + version)
|
||||
|
||||
# Derive AES key from URL
|
||||
aesKey = re.sub(r"^https?:\/\/.*?\/", "/", link)
|
||||
sha1_digest = hashlib.sha1(aesKey.encode("utf-8")).digest()
|
||||
key = sha1_digest[:16]
|
||||
|
||||
# Decode counter
|
||||
counter_bytes = base64.b64decode(counter_b64)
|
||||
if len(counter_bytes) != 16:
|
||||
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
|
||||
|
||||
prefix = counter_bytes[:8]
|
||||
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
|
||||
|
||||
# Create AES-CTR cipher
|
||||
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
|
||||
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
|
||||
|
||||
decrypted_part = cipher.decrypt(image_data[:1024])
|
||||
new_data = decrypted_part + image_data[1024:]
|
||||
return new_data
|
||||
|
||||
|
||||
def download_one_image(
|
||||
session: requests.Session,
|
||||
link: str,
|
||||
i: int,
|
||||
directory: str,
|
||||
book_id: str,
|
||||
pages: int,
|
||||
) -> None:
|
||||
"""Download a single book page image.
|
||||
|
||||
Handles obfuscated images and re-borrowing on 403 errors.
|
||||
|
||||
Args:
|
||||
session: Authenticated requests.Session
|
||||
link: Direct image URL
|
||||
i: Page index (0-based)
|
||||
directory: Directory to save to
|
||||
book_id: Archive.org book ID (for re-borrowing on 403)
|
||||
pages: Total number of pages
|
||||
"""
|
||||
headers = {
|
||||
"Referer": "https://archive.org/",
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
"Sec-Fetch-Site": "same-site",
|
||||
"Sec-Fetch-Mode": "no-cors",
|
||||
"Sec-Fetch-Dest": "image",
|
||||
}
|
||||
retry = True
|
||||
response = None
|
||||
while retry:
|
||||
try:
|
||||
response = session.get(link, headers=headers, timeout=30)
|
||||
if response.status_code == 403:
|
||||
session = loan(session, book_id, verbose=False)
|
||||
raise Exception("Borrow again")
|
||||
if response.status_code == 200:
|
||||
retry = False
|
||||
except:
|
||||
time.sleep(1)
|
||||
|
||||
image = image_name(pages, i, directory)
|
||||
|
||||
if response is None:
|
||||
log(f"Failed to download page {i}", file=sys.stderr)
|
||||
return
|
||||
|
||||
obf_header = response.headers.get("X-Obfuscate")
|
||||
image_content = None
|
||||
if obf_header:
|
||||
try:
|
||||
image_content = deobfuscate_image(response.content, link, obf_header)
|
||||
except Exception as e:
|
||||
log(f"Deobfuscation failed: {e}", file=sys.stderr)
|
||||
return
|
||||
else:
|
||||
image_content = response.content
|
||||
|
||||
with open(image, "wb") as f:
|
||||
f.write(image_content)
|
||||
|
||||
|
||||
def download(
|
||||
session: requests.Session,
|
||||
n_threads: int,
|
||||
directory: str,
|
||||
links: List[str],
|
||||
scale: int,
|
||||
book_id: str,
|
||||
) -> List[str]:
|
||||
"""Download all book pages as images.
|
||||
|
||||
Uses thread pool for parallel downloads.
|
||||
|
||||
Args:
|
||||
session: Authenticated requests.Session
|
||||
n_threads: Number of download threads
|
||||
directory: Directory to save images to
|
||||
links: List of image url
|
||||
scale: Image resolution (0=highest, 10=lowest)
|
||||
book_id: Archive.org book ID (for re-borrowing)
|
||||
|
||||
Returns:
|
||||
List of downloaded image file paths
|
||||
"""
|
||||
debug("Downloading pages...")
|
||||
links = [f"{link}&rotate=0&scale={scale}" for link in links]
|
||||
pages = len(links)
|
||||
|
||||
tasks = []
|
||||
with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
|
||||
for link in links:
|
||||
i = links.index(link)
|
||||
tasks.append(
|
||||
executor.submit(
|
||||
download_one_image,
|
||||
session=session,
|
||||
link=link,
|
||||
i=i,
|
||||
directory=directory,
|
||||
book_id=book_id,
|
||||
pages=pages,
|
||||
)
|
||||
)
|
||||
if tqdm:
|
||||
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
|
||||
pass
|
||||
else:
|
||||
for _ in futures.as_completed(tasks):
|
||||
pass
|
||||
|
||||
images = [image_name(pages, i, directory) for i in range(len(links))]
|
||||
return images
|
||||
|
||||
|
||||
def check_direct_download(book_id: str) -> Tuple[bool, str]:
|
||||
"""Check if a book can be downloaded directly without borrowing.
|
||||
|
||||
Searches Archive.org metadata for downloadable PDF files.
|
||||
|
||||
Args:
|
||||
book_id: Archive.org book identifier
|
||||
|
||||
Returns:
|
||||
Tuple of (can_download: bool, pdf_url: str)
|
||||
"""
|
||||
try:
|
||||
# First, try to get the metadata to find the actual PDF filename
|
||||
metadata_url = f"https://archive.org/metadata/{book_id}"
|
||||
response = requests.get(metadata_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
metadata = response.json()
|
||||
|
||||
# Find PDF file in files list
|
||||
if "files" in metadata:
|
||||
for file_info in metadata["files"]:
|
||||
filename = file_info.get("name", "")
|
||||
if filename.endswith(".pdf") and file_info.get("source") == "original":
|
||||
# Found the original PDF
|
||||
pdf_filename = filename
|
||||
pdf_url = f"https://archive.org/download/{book_id}/{pdf_filename.replace(' ', '%20')}"
|
||||
|
||||
# Verify it's accessible
|
||||
check_response = requests.head(pdf_url, timeout=5, allow_redirects=True)
|
||||
if check_response.status_code == 200:
|
||||
return True, pdf_url
|
||||
|
||||
return False, ""
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error checking direct download: {e}", file=sys.stderr)
|
||||
return False, ""
|
||||
|
||||
|
||||
def get_openlibrary_by_isbn(isbn: str) -> Dict[str, Any]:
|
||||
"""Fetch book data from OpenLibrary using ISBN.
|
||||
|
||||
Args:
|
||||
isbn: ISBN-10 or ISBN-13 to search for
|
||||
|
||||
Returns:
|
||||
Dictionary with book metadata from OpenLibrary
|
||||
"""
|
||||
try:
|
||||
# Try ISBN API first
|
||||
api_url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn}&jscmd=data&format=json"
|
||||
response = requests.get(api_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if data:
|
||||
# Get first result
|
||||
key = list(data.keys())[0]
|
||||
return data[key]
|
||||
return {}
|
||||
except Exception as e:
|
||||
log(f"Error fetching OpenLibrary data by ISBN: {e}", file=sys.stderr)
|
||||
return {}
|
||||
|
||||
|
||||
def extract_isbn_from_metadata(metadata: Dict[str, Any]) -> str:
|
||||
"""Extract ISBN from archive.org metadata.
|
||||
|
||||
Looks for ISBN in various metadata fields.
|
||||
|
||||
Args:
|
||||
metadata: Archive.org metadata dictionary
|
||||
|
||||
Returns:
|
||||
ISBN string (clean, no hyphens) or empty string if not found
|
||||
"""
|
||||
# Try various common metadata fields
|
||||
isbn_fields = [
|
||||
"isbn", "ISBN", "isbn_13", "isbn_10", "isbns",
|
||||
"isbn-10", "isbn-13", "identifer_isbn"
|
||||
]
|
||||
|
||||
for field in isbn_fields:
|
||||
if field in metadata:
|
||||
isbn_val = metadata[field]
|
||||
if isinstance(isbn_val, list):
|
||||
isbn_val = isbn_val[0] if isbn_val else None
|
||||
if isbn_val and isinstance(isbn_val, str):
|
||||
# Clean ISBN (remove hyphens, spaces)
|
||||
isbn_clean = isbn_val.replace("-", "").replace(" ", "")
|
||||
if len(isbn_clean) in [10, 13]:
|
||||
return isbn_clean
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_url(url: str) -> str:
|
||||
"""Convert openlibrary.org URL to archive.org URL.
|
||||
|
||||
Looks up the actual Archive.org ID from OpenLibrary API.
|
||||
|
||||
Args:
|
||||
url: Book URL (archive.org or openlibrary.org format)
|
||||
|
||||
Returns:
|
||||
Normalized archive.org URL
|
||||
"""
|
||||
url = url.strip()
|
||||
|
||||
# Already archive.org format
|
||||
if url.startswith("https://archive.org/details/"):
|
||||
return url
|
||||
|
||||
# Convert openlibrary.org format by querying the OpenLibrary API
|
||||
if "openlibrary.org/books/" in url:
|
||||
try:
|
||||
# Extract the book ID (e.g., OL6796852M)
|
||||
parts = url.split("/books/")
|
||||
if len(parts) > 1:
|
||||
book_id = parts[1].split("/")[0]
|
||||
|
||||
# Query OpenLibrary API to get the book metadata
|
||||
api_url = f"https://openlibrary.org/books/{book_id}.json"
|
||||
response = requests.get(api_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Look for identifiers including internet_archive or ocaid
|
||||
# First try ocaid (Open Content Alliance ID) - this is most common
|
||||
if "ocaid" in data:
|
||||
ocaid = data["ocaid"]
|
||||
return f"https://archive.org/details/{ocaid}"
|
||||
|
||||
# Check for identifiers object
|
||||
if "identifiers" in data:
|
||||
identifiers = data["identifiers"]
|
||||
|
||||
# Look for internet_archive ID
|
||||
if "internet_archive" in identifiers:
|
||||
ia_ids = identifiers["internet_archive"]
|
||||
if isinstance(ia_ids, list) and ia_ids:
|
||||
ia_id = ia_ids[0]
|
||||
else:
|
||||
ia_id = ia_ids
|
||||
return f"https://archive.org/details/{ia_id}"
|
||||
|
||||
# If no IA identifier found, use the book ID as fallback
|
||||
log(f"No Internet Archive ID found for {book_id}. Attempting with OpenLibrary ID.", file=sys.stderr)
|
||||
return f"https://archive.org/details/{book_id}"
|
||||
|
||||
except requests.RequestException as e:
|
||||
log(f"Could not fetch OpenLibrary metadata: {e}", file=sys.stderr)
|
||||
# Fallback to using the book ID directly
|
||||
parts = url.split("/books/")
|
||||
if len(parts) > 1:
|
||||
book_id = parts[1].split("/")[0]
|
||||
return f"https://archive.org/details/{book_id}"
|
||||
except (KeyError, IndexError) as e:
|
||||
log(f"Error parsing OpenLibrary response: {e}", file=sys.stderr)
|
||||
# Fallback to using the book ID directly
|
||||
parts = url.split("/books/")
|
||||
if len(parts) > 1:
|
||||
book_id = parts[1].split("/")[0]
|
||||
return f"https://archive.org/details/{book_id}"
|
||||
|
||||
# Return original if can't parse
|
||||
return url
|
||||
@@ -1,195 +0,0 @@
|
||||
"""Lightweight console notifier for background WorkerManager tasks.
|
||||
|
||||
Registers a refresh callback on WorkerManager and prints concise updates when
|
||||
workers start, progress, or finish. Intended for CLI background workflows.
|
||||
|
||||
Filters to show only workers related to the current pipeline session to avoid
|
||||
cluttering the terminal with workers from previous sessions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, Optional, Set
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
|
||||
class BackgroundNotifier:
|
||||
"""Simple notifier that prints worker status changes for a session."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> None:
|
||||
self.manager = manager
|
||||
self.output = output
|
||||
self.session_worker_ids = session_worker_ids if session_worker_ids is not None else set()
|
||||
self.only_terminal_updates = only_terminal_updates
|
||||
self.overlay_mode = overlay_mode
|
||||
self._filter_enabled = session_worker_ids is not None
|
||||
self._last_state: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
self.manager.add_refresh_callback(self._on_refresh)
|
||||
self.manager.start_auto_refresh()
|
||||
except Exception as exc: # pragma: no cover - best effort
|
||||
debug(f"[notifier] Could not attach refresh callback: {exc}")
|
||||
|
||||
def _render_line(self, worker: Dict[str, Any]) -> Optional[str]:
|
||||
# Use worker_id (the actual worker ID we set) for filtering and display
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
return None
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent")
|
||||
progress = ""
|
||||
if isinstance(progress_val, (int, float)):
|
||||
progress = f" {progress_val:.1f}%"
|
||||
elif progress_val:
|
||||
progress = f" {progress_val}"
|
||||
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
parts = [f"[worker:{worker_id}] {status}{progress}"]
|
||||
if step:
|
||||
parts.append(step)
|
||||
return " - ".join(parts)
|
||||
|
||||
def _on_refresh(self, workers: list[Dict[str, Any]]) -> None:
|
||||
overlay_active_workers = 0
|
||||
|
||||
for worker in workers:
|
||||
# Use worker_id (the actual worker ID we set) for filtering
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
continue
|
||||
|
||||
# If filtering is enabled, skip workers not in this session
|
||||
if self._filter_enabled and worker_id not in self.session_worker_ids:
|
||||
continue
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
|
||||
# Overlay mode: only emit on completion; suppress start/progress spam
|
||||
if self.overlay_mode:
|
||||
if status in ("completed", "finished", "error"):
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# For terminal-only mode, emit once when the worker finishes and skip intermediate updates
|
||||
if self.only_terminal_updates:
|
||||
if status in ("completed", "finished", "error"):
|
||||
if self._last_state.get(worker_id) == status:
|
||||
continue
|
||||
self._last_state[worker_id] = status
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
# Stop tracking this worker after terminal notification
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# Skip finished workers after showing them once (standard verbose mode)
|
||||
if status in ("completed", "finished", "error"):
|
||||
if worker_id in self._last_state:
|
||||
# Already shown, remove from tracking
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self.overlay_mode:
|
||||
try:
|
||||
# If nothing active for this session, clear the overlay text
|
||||
if overlay_active_workers == 0:
|
||||
self.output("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def ensure_background_notifier(
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> Optional[BackgroundNotifier]:
|
||||
"""Attach a BackgroundNotifier to a WorkerManager if not already present.
|
||||
|
||||
Args:
|
||||
manager: WorkerManager instance
|
||||
output: Function to call for printing updates
|
||||
session_worker_ids: Set of worker IDs belonging to this pipeline session.
|
||||
If None, show all workers. If a set (even empty), only show workers in that set.
|
||||
"""
|
||||
if manager is None:
|
||||
return None
|
||||
|
||||
existing = getattr(manager, "_background_notifier", None)
|
||||
if isinstance(existing, BackgroundNotifier):
|
||||
# Update session IDs if provided
|
||||
if session_worker_ids is not None:
|
||||
existing._filter_enabled = True
|
||||
existing.session_worker_ids.update(session_worker_ids)
|
||||
# Respect the most restrictive setting for terminal-only updates
|
||||
if only_terminal_updates:
|
||||
existing.only_terminal_updates = True
|
||||
# Enable overlay mode if requested later
|
||||
if overlay_mode:
|
||||
existing.overlay_mode = True
|
||||
return existing
|
||||
|
||||
notifier = BackgroundNotifier(
|
||||
manager,
|
||||
output,
|
||||
session_worker_ids=session_worker_ids,
|
||||
only_terminal_updates=only_terminal_updates,
|
||||
overlay_mode=overlay_mode,
|
||||
)
|
||||
try:
|
||||
manager._background_notifier = notifier # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
return notifier
|
||||
@@ -1,223 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from importlib import import_module
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
from cmdlets import REGISTRY
|
||||
except Exception:
|
||||
REGISTRY = {} # type: ignore
|
||||
|
||||
try:
|
||||
from cmdnats import register_native_commands as _register_native_commands
|
||||
except Exception:
|
||||
_register_native_commands = None
|
||||
|
||||
|
||||
def ensure_registry_loaded() -> None:
|
||||
"""Ensure native commands are registered into REGISTRY (idempotent)."""
|
||||
if _register_native_commands and REGISTRY is not None:
|
||||
try:
|
||||
_register_native_commands(REGISTRY)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _normalize_mod_name(mod_name: str) -> str:
|
||||
"""Normalize a command/module name for import resolution."""
|
||||
normalized = (mod_name or "").strip()
|
||||
if normalized.startswith('.'):
|
||||
normalized = normalized.lstrip('.')
|
||||
normalized = normalized.replace('-', '_')
|
||||
return normalized
|
||||
|
||||
|
||||
def import_cmd_module(mod_name: str):
|
||||
"""Import a cmdlet/native module from cmdnats or cmdlets packages."""
|
||||
normalized = _normalize_mod_name(mod_name)
|
||||
if not normalized:
|
||||
return None
|
||||
for package in ("cmdnats", "cmdlets", None):
|
||||
try:
|
||||
qualified = f"{package}.{normalized}" if package else normalized
|
||||
return import_module(qualified)
|
||||
except ModuleNotFoundError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_arg(arg: Any) -> Dict[str, Any]:
|
||||
"""Convert a CmdletArg/dict into a plain metadata dict."""
|
||||
if isinstance(arg, dict):
|
||||
name = arg.get("name", "")
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": arg.get("type", "string"),
|
||||
"required": bool(arg.get("required", False)),
|
||||
"description": arg.get("description", ""),
|
||||
"choices": arg.get("choices", []) or [],
|
||||
"alias": arg.get("alias", ""),
|
||||
"variadic": arg.get("variadic", False),
|
||||
}
|
||||
|
||||
name = getattr(arg, "name", "") or ""
|
||||
return {
|
||||
"name": str(name).lstrip("-"),
|
||||
"type": getattr(arg, "type", "string"),
|
||||
"required": bool(getattr(arg, "required", False)),
|
||||
"description": getattr(arg, "description", ""),
|
||||
"choices": getattr(arg, "choices", []) or [],
|
||||
"alias": getattr(arg, "alias", ""),
|
||||
"variadic": getattr(arg, "variadic", False),
|
||||
}
|
||||
|
||||
|
||||
def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return normalized metadata for a cmdlet, if available (aliases supported)."""
|
||||
ensure_registry_loaded()
|
||||
normalized = cmd_name.replace("-", "_")
|
||||
mod = import_cmd_module(normalized)
|
||||
data = getattr(mod, "CMDLET", None) if mod else None
|
||||
|
||||
# Fallback: resolve via registered function's module (covers aliases)
|
||||
if data is None:
|
||||
try:
|
||||
reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower())
|
||||
if reg_fn:
|
||||
owner_mod = getattr(reg_fn, "__module__", "")
|
||||
if owner_mod:
|
||||
owner = import_module(owner_mod)
|
||||
data = getattr(owner, "CMDLET", None)
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
if not data:
|
||||
return None
|
||||
|
||||
if hasattr(data, "to_dict"):
|
||||
base = data.to_dict()
|
||||
elif isinstance(data, dict):
|
||||
base = data
|
||||
else:
|
||||
base = {}
|
||||
|
||||
name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name
|
||||
aliases = getattr(data, "aliases", base.get("aliases", [])) or []
|
||||
usage = getattr(data, "usage", base.get("usage", ""))
|
||||
summary = getattr(data, "summary", base.get("summary", ""))
|
||||
details = getattr(data, "details", base.get("details", [])) or []
|
||||
args_list = getattr(data, "args", base.get("args", [])) or []
|
||||
args = [_normalize_arg(arg) for arg in args_list]
|
||||
|
||||
return {
|
||||
"name": str(name).replace("_", "-").lower(),
|
||||
"aliases": [str(a).replace("_", "-").lower() for a in aliases if a],
|
||||
"usage": usage,
|
||||
"summary": summary,
|
||||
"details": details,
|
||||
"args": args,
|
||||
"raw": data,
|
||||
}
|
||||
|
||||
|
||||
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
|
||||
"""Collect metadata for all registered cmdlets keyed by canonical name."""
|
||||
ensure_registry_loaded()
|
||||
entries: Dict[str, Dict[str, Any]] = {}
|
||||
for reg_name in (REGISTRY or {}).keys():
|
||||
meta = get_cmdlet_metadata(reg_name)
|
||||
canonical = str(reg_name).replace("_", "-").lower()
|
||||
|
||||
if meta:
|
||||
canonical = meta.get("name", canonical)
|
||||
aliases = meta.get("aliases", [])
|
||||
base = entries.get(
|
||||
canonical,
|
||||
{
|
||||
"name": canonical,
|
||||
"aliases": [],
|
||||
"usage": "",
|
||||
"summary": "",
|
||||
"details": [],
|
||||
"args": [],
|
||||
"raw": meta.get("raw"),
|
||||
},
|
||||
)
|
||||
merged_aliases = set(base.get("aliases", [])) | set(aliases)
|
||||
if canonical != reg_name:
|
||||
merged_aliases.add(reg_name)
|
||||
base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical)
|
||||
if not base.get("usage") and meta.get("usage"):
|
||||
base["usage"] = meta["usage"]
|
||||
if not base.get("summary") and meta.get("summary"):
|
||||
base["summary"] = meta["summary"]
|
||||
if not base.get("details") and meta.get("details"):
|
||||
base["details"] = meta["details"]
|
||||
if not base.get("args") and meta.get("args"):
|
||||
base["args"] = meta["args"]
|
||||
if not base.get("raw"):
|
||||
base["raw"] = meta.get("raw")
|
||||
entries[canonical] = base
|
||||
else:
|
||||
entries.setdefault(
|
||||
canonical,
|
||||
{"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None},
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def list_cmdlet_names(include_aliases: bool = True) -> List[str]:
|
||||
"""Return sorted cmdlet names (optionally including aliases)."""
|
||||
ensure_registry_loaded()
|
||||
entries = list_cmdlet_metadata()
|
||||
names = set()
|
||||
for meta in entries.values():
|
||||
names.add(meta.get("name", ""))
|
||||
if include_aliases:
|
||||
for alias in meta.get("aliases", []):
|
||||
names.add(alias)
|
||||
return sorted(n for n in names if n)
|
||||
|
||||
|
||||
def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
|
||||
"""Return flag variants for cmdlet arguments (e.g., -name/--name)."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
|
||||
raw = meta.get("raw")
|
||||
if raw and hasattr(raw, "build_flag_registry"):
|
||||
try:
|
||||
registry = raw.build_flag_registry()
|
||||
flags: List[str] = []
|
||||
for flag_set in registry.values():
|
||||
flags.extend(flag_set)
|
||||
return sorted(set(flags))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
flags: List[str] = []
|
||||
for arg in meta.get("args", []):
|
||||
name = arg.get("name")
|
||||
if not name:
|
||||
continue
|
||||
flags.append(f"-{name}")
|
||||
flags.append(f"--{name}")
|
||||
alias = arg.get("alias")
|
||||
if alias:
|
||||
flags.append(f"-{alias}")
|
||||
return flags
|
||||
|
||||
|
||||
def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
|
||||
"""Return declared choices for a cmdlet argument."""
|
||||
meta = get_cmdlet_metadata(cmd_name)
|
||||
if not meta:
|
||||
return []
|
||||
target = arg_name.lstrip("-")
|
||||
for arg in meta.get("args", []):
|
||||
if arg.get("name") == target:
|
||||
return list(arg.get("choices", []) or [])
|
||||
return []
|
||||
@@ -1,767 +0,0 @@
|
||||
"""Download media files using yt-dlp with support for direct file downloads.
|
||||
|
||||
Lean, focused downloader without event infrastructure overhead.
|
||||
- yt-dlp integration for streaming sites
|
||||
- Direct file download fallback for PDFs, images, documents
|
||||
- Tag extraction via metadata.extract_ytdlp_tags()
|
||||
- Logging via helper.logger.log()
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import glob # noqa: F401
|
||||
import hashlib
|
||||
import json # noqa: F401
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from helper.logger import log, debug
|
||||
from .utils import ensure_directory, sha256_file
|
||||
from .http_client import HTTPClient
|
||||
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
||||
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
from yt_dlp.extractor import gen_extractors # type: ignore
|
||||
except Exception as exc:
|
||||
yt_dlp = None # type: ignore
|
||||
YTDLP_IMPORT_ERROR = exc
|
||||
else:
|
||||
YTDLP_IMPORT_ERROR = None
|
||||
|
||||
try:
|
||||
from metadata import extract_ytdlp_tags
|
||||
except ImportError:
|
||||
extract_ytdlp_tags = None
|
||||
|
||||
_EXTRACTOR_CACHE: List[Any] | None = None
|
||||
|
||||
|
||||
def _ensure_yt_dlp_ready() -> None:
|
||||
"""Verify yt-dlp is available, raise if not."""
|
||||
if yt_dlp is not None:
|
||||
return
|
||||
detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
|
||||
raise DownloadError(f"yt-dlp module not available: {detail}")
|
||||
|
||||
|
||||
def _progress_callback(status: Dict[str, Any]) -> None:
|
||||
"""Simple progress callback using logger."""
|
||||
event = status.get("status")
|
||||
if event == "downloading":
|
||||
percent = status.get("_percent_str", "?")
|
||||
speed = status.get("_speed_str", "?")
|
||||
eta = status.get("_eta_str", "?")
|
||||
sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ")
|
||||
sys.stdout.flush()
|
||||
elif event == "finished":
|
||||
sys.stdout.write("\r" + " " * 70 + "\r")
|
||||
sys.stdout.flush()
|
||||
debug(f"✓ Download finished: {status.get('filename')}")
|
||||
elif event in ("postprocessing", "processing"):
|
||||
debug(f"Post-processing: {status.get('postprocessor')}")
|
||||
|
||||
|
||||
def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
"""Check if URL is supported by yt-dlp."""
|
||||
if yt_dlp is None:
|
||||
return False
|
||||
global _EXTRACTOR_CACHE
|
||||
if _EXTRACTOR_CACHE is None:
|
||||
try:
|
||||
_EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type]
|
||||
except Exception:
|
||||
_EXTRACTOR_CACHE = []
|
||||
for extractor in _EXTRACTOR_CACHE:
|
||||
try:
|
||||
if not extractor.suitable(url):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
name = getattr(extractor, "IE_NAME", "")
|
||||
if name.lower() == "generic":
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get list of available formats for a URL using yt-dlp."""
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
try:
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 30,
|
||||
}
|
||||
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
if playlist_items:
|
||||
ydl_opts["playlist_items"] = playlist_items
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
debug(f"Fetching format list for: {url}")
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
formats = info.get("formats", [])
|
||||
if not formats:
|
||||
log("No formats available", file=sys.stderr)
|
||||
return None
|
||||
|
||||
result_formats = []
|
||||
for fmt in formats:
|
||||
result_formats.append({
|
||||
"format_id": fmt.get("format_id", ""),
|
||||
"format": fmt.get("format", ""),
|
||||
"ext": fmt.get("ext", ""),
|
||||
"resolution": fmt.get("resolution", ""),
|
||||
"width": fmt.get("width"),
|
||||
"height": fmt.get("height"),
|
||||
"fps": fmt.get("fps"),
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
})
|
||||
|
||||
debug(f"Found {len(result_formats)} available formats")
|
||||
return result_formats
|
||||
|
||||
except Exception as e:
|
||||
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
We need separate files for merge-file, so download each section individually.
|
||||
|
||||
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
|
||||
thinking sections are already downloaded. The title is extracted and stored in tags.
|
||||
|
||||
Returns:
|
||||
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
|
||||
"""
|
||||
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
return "", {}
|
||||
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
first_section_info = None
|
||||
title_from_first = None
|
||||
|
||||
# Download each section separately with unique output template using session ID
|
||||
for section_idx, section in enumerate(sections_list, 1):
|
||||
# Build unique output template for this section using session-based filename
|
||||
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
|
||||
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
||||
output_dir_path = Path(base_outtmpl).parent
|
||||
|
||||
# Use session_id + section index for temp filename
|
||||
# e.g., "/path/{session_id}_1.%(ext)s"
|
||||
filename_tmpl = f"{session_id}_{section_idx}"
|
||||
if base_outtmpl.endswith(".%(ext)s"):
|
||||
filename_tmpl += ".%(ext)s"
|
||||
|
||||
# Use Path to handle separators correctly for the OS
|
||||
section_outtmpl = str(output_dir_path / filename_tmpl)
|
||||
|
||||
# For the first section, extract metadata first (separate call)
|
||||
if section_idx == 1:
|
||||
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
||||
if ytdl_options.get("cookiefile"):
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
metadata_cmd.extend(["--cookies", cookies_path])
|
||||
if ytdl_options.get("noplaylist"):
|
||||
metadata_cmd.append("--no-playlist")
|
||||
metadata_cmd.append(url)
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
if not quiet:
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
if not quiet:
|
||||
debug("Could not parse JSON metadata")
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
|
||||
# Add format
|
||||
if ytdl_options.get("format"):
|
||||
cmd.extend(["-f", ytdl_options["format"]])
|
||||
|
||||
# Add ONLY this section (not all sections)
|
||||
cmd.extend(["--download-sections", section])
|
||||
|
||||
# Add force-keyframes-at-cuts if specified
|
||||
if ytdl_options.get("force_keyframes_at_cuts"):
|
||||
cmd.append("--force-keyframes-at-cuts")
|
||||
|
||||
# Add output template for this section
|
||||
cmd.extend(["-o", section_outtmpl])
|
||||
|
||||
# Add cookies file if present
|
||||
if ytdl_options.get("cookiefile"):
|
||||
# Convert backslashes to forward slashes for better compatibility
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
cmd.extend(["--cookies", cookies_path])
|
||||
|
||||
# Add no-playlist if specified
|
||||
if ytdl_options.get("noplaylist"):
|
||||
cmd.append("--no-playlist")
|
||||
|
||||
# Add the URL
|
||||
cmd.append(url)
|
||||
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
result = subprocess.run(cmd)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
|
||||
except Exception as exc:
|
||||
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
|
||||
|
||||
return session_id, first_section_info or {}
|
||||
|
||||
|
||||
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
"""Build yt-dlp download options."""
|
||||
ensure_directory(opts.output_dir)
|
||||
|
||||
# Build output template
|
||||
# When downloading sections, each section will have .section_N_of_M added by _download_with_sections_via_cli
|
||||
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
|
||||
|
||||
base_options: Dict[str, Any] = {
|
||||
"outtmpl": outtmpl,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 30,
|
||||
"retries": 10,
|
||||
"fragment_retries": 10,
|
||||
"http_chunk_size": 10_485_760,
|
||||
"restrictfilenames": True,
|
||||
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
||||
}
|
||||
|
||||
if opts.cookies_path and opts.cookies_path.is_file():
|
||||
base_options["cookiefile"] = str(opts.cookies_path)
|
||||
else:
|
||||
# Check global cookies file lazily to avoid import cycles
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
base_options["cookiefile"] = global_cookies
|
||||
else:
|
||||
# Fallback to browser cookies
|
||||
base_options["cookiesfrombrowser"] = ("chrome",)
|
||||
|
||||
# Add no-playlist option if specified (for single video from playlist url)
|
||||
if opts.no_playlist:
|
||||
base_options["noplaylist"] = True
|
||||
|
||||
# Configure based on mode
|
||||
if opts.mode == "audio":
|
||||
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
|
||||
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
|
||||
else: # video
|
||||
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
||||
base_options["format_sort"] = [
|
||||
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
|
||||
]
|
||||
|
||||
# Add clip sections if provided (yt-dlp will download only these sections)
|
||||
if opts.clip_sections:
|
||||
# Parse section ranges like "48-65,120-152,196-205" (seconds)
|
||||
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
|
||||
sections = []
|
||||
for section_range in opts.clip_sections.split(','):
|
||||
try:
|
||||
start_str, end_str = section_range.strip().split('-')
|
||||
start_sec = float(start_str)
|
||||
end_sec = float(end_str)
|
||||
|
||||
# Convert seconds to HH:MM:SS format
|
||||
def sec_to_hhmmss(seconds):
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
|
||||
start_time = sec_to_hhmmss(start_sec)
|
||||
end_time = sec_to_hhmmss(end_sec)
|
||||
sections.append(f"*{start_time}-{end_time}")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
if sections:
|
||||
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
|
||||
base_options["download_sections"] = sections
|
||||
debug(f"Download sections configured: {', '.join(sections)}")
|
||||
# Note: Not using --force-keyframes-at-cuts to avoid re-encoding
|
||||
# This may result in less precise cuts but faster downloads
|
||||
|
||||
# Add playlist items selection if provided
|
||||
if opts.playlist_items:
|
||||
base_options["playlist_items"] = opts.playlist_items
|
||||
|
||||
if not opts.quiet:
|
||||
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
||||
return base_options
|
||||
|
||||
|
||||
def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate through download entries, handling playlists."""
|
||||
queue: List[Dict[str, Any]] = [info]
|
||||
seen: set[int] = set()
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
obj_id = id(current)
|
||||
if obj_id in seen:
|
||||
continue
|
||||
seen.add(obj_id)
|
||||
entries = current.get("entries")
|
||||
if isinstance(entries, list):
|
||||
for entry in entries:
|
||||
if isinstance(entry, dict):
|
||||
queue.append(entry)
|
||||
if current.get("requested_downloads") or not entries:
|
||||
yield current
|
||||
|
||||
|
||||
def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
||||
"""Get candidate file paths for downloaded media."""
|
||||
requested = entry.get("requested_downloads")
|
||||
if isinstance(requested, list):
|
||||
for item in requested:
|
||||
if isinstance(item, dict):
|
||||
for key in ("filepath", "_filename", "filename"):
|
||||
value = item.get(key)
|
||||
if value:
|
||||
yield Path(value)
|
||||
for key in ("filepath", "_filename", "filename"):
|
||||
value = entry.get(key)
|
||||
if value:
|
||||
yield Path(value)
|
||||
if entry.get("filename"):
|
||||
yield output_dir / entry["filename"]
|
||||
|
||||
|
||||
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
||||
"""Find downloaded file in yt-dlp metadata."""
|
||||
for entry in _iter_download_entries(info):
|
||||
for candidate in _candidate_paths(entry, output_dir):
|
||||
if candidate.is_file():
|
||||
return entry, candidate
|
||||
if not candidate.is_absolute():
|
||||
resolved = output_dir / candidate
|
||||
if resolved.is_file():
|
||||
return entry, resolved
|
||||
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
|
||||
|
||||
|
||||
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract SHA256 hash from yt-dlp metadata."""
|
||||
for payload in [info] + info.get("entries", []):
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
hashes = payload.get("hashes")
|
||||
if isinstance(hashes, dict):
|
||||
for key in ("sha256", "sha-256", "sha_256"):
|
||||
value = hashes.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip().lower()
|
||||
for key in ("sha256", "sha-256", "sha_256"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip().lower()
|
||||
return None
|
||||
|
||||
|
||||
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
"""Extract the actual download link from LibGen redirect URL.
|
||||
|
||||
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
|
||||
actual mirror url. This follows the redirect chain to get the real file.
|
||||
|
||||
Args:
|
||||
libgen_url: LibGen file.php URL
|
||||
|
||||
Returns:
|
||||
Actual download URL or None if extraction fails
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Check if this is a LibGen URL
|
||||
parsed = urlparse(libgen_url)
|
||||
if 'libgen' not in parsed.netloc.lower():
|
||||
return None
|
||||
|
||||
if '/file.php' not in parsed.path.lower():
|
||||
return None
|
||||
|
||||
# LibGen redirects to actual mirrors, follow redirects to get final URL
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
})
|
||||
|
||||
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
||||
|
||||
# First, get the page and look for direct download link
|
||||
try:
|
||||
response = session.get(libgen_url, timeout=10, allow_redirects=True)
|
||||
final_url = response.url
|
||||
|
||||
# Try to find actual download link in the page
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
# Look for download links - LibGen typically has forms with download buttons
|
||||
# Look for all links and forms that might lead to download
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href')
|
||||
if href and isinstance(href, str):
|
||||
# Look for direct file links or get.php redirects
|
||||
if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
|
||||
download_url = href if href.startswith('http') else urljoin(final_url, href)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
except ImportError:
|
||||
pass # BeautifulSoup not available
|
||||
|
||||
# If we followed redirects successfully, return the final URL
|
||||
# This handles cases where libgen redirects to a direct download mirror
|
||||
if final_url != libgen_url:
|
||||
debug(f"LibGen resolved to mirror: {final_url}")
|
||||
return final_url
|
||||
|
||||
except requests.RequestException as e:
|
||||
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
|
||||
# Try head request as fallback
|
||||
try:
|
||||
response = session.head(libgen_url, allow_redirects=True, timeout=10)
|
||||
if response.url != libgen_url:
|
||||
debug(f"LibGen HEAD resolved to: {response.url}")
|
||||
return response.url
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def _download_direct_file(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
quiet: bool = False,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download a direct file (PDF, image, document, etc.) without yt-dlp."""
|
||||
ensure_directory(output_dir)
|
||||
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
import re
|
||||
|
||||
# Extract filename from URL
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
|
||||
# Try to get filename from query parameters first (for LibGen and similar services)
|
||||
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
|
||||
filename = None
|
||||
if parsed_url.query:
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
for param_name in ('filename', 'download', 'file', 'name'):
|
||||
if param_name in query_params and query_params[param_name]:
|
||||
filename = query_params[param_name][0]
|
||||
filename = unquote(filename)
|
||||
break
|
||||
|
||||
# If not found in query params, extract from URL path
|
||||
if not filename or not filename.strip():
|
||||
filename = url_path.split("/")[-1] if url_path else ""
|
||||
filename = unquote(filename)
|
||||
|
||||
# Remove query strings from filename if any
|
||||
if "?" in filename:
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
# Try to get real filename from Content-Disposition header (HEAD request)
|
||||
try:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
# Extract filename from Content-Disposition header
|
||||
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
||||
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||
if match:
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
filename = unquote(extracted_name)
|
||||
if not quiet:
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
||||
|
||||
# Fallback if we still don't have a good filename
|
||||
if not filename or "." not in filename:
|
||||
filename = "downloaded_file.bin"
|
||||
|
||||
file_path = output_dir / filename
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
if not quiet:
|
||||
debug(f"Direct download: {filename}")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
downloaded_bytes = [0]
|
||||
total_bytes = [0]
|
||||
last_progress_time = [start_time]
|
||||
|
||||
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
||||
downloaded_bytes[0] = bytes_downloaded
|
||||
total_bytes[0] = content_length
|
||||
|
||||
now = time.time()
|
||||
if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
|
||||
elapsed = now - start_time
|
||||
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
|
||||
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
|
||||
eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
|
||||
|
||||
speed_str = progress_bar.format_bytes(speed) + "/s"
|
||||
minutes, seconds = divmod(int(eta_seconds), 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
|
||||
progress_line = progress_bar.format_progress(
|
||||
percent_str=f"{percent:.1f}%",
|
||||
downloaded=bytes_downloaded,
|
||||
total=content_length,
|
||||
speed_str=speed_str,
|
||||
eta_str=eta_str,
|
||||
)
|
||||
if not quiet:
|
||||
debug(progress_line)
|
||||
last_progress_time[0] = now
|
||||
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
client.download(url, str(file_path), progress_callback=progress_callback)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
||||
if not quiet:
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
||||
|
||||
# For direct file downloads, create minimal info dict without filename as title
|
||||
# This prevents creating duplicate title: tags when filename gets auto-generated
|
||||
# We'll add title back later only if we couldn't extract meaningful tags
|
||||
info = {
|
||||
"id": filename.rsplit(".", 1)[0],
|
||||
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
|
||||
"webpage_url": url,
|
||||
}
|
||||
|
||||
hash_value = None
|
||||
try:
|
||||
hash_value = sha256_file(file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as e:
|
||||
log(f"Error extracting tags: {e}", file=sys.stderr)
|
||||
|
||||
# Only use filename as a title tag if we couldn't extract any meaningful tags
|
||||
# This prevents duplicate title: tags when the filename could be mistaken for metadata
|
||||
if not any(t.startswith('title:') for t in tags):
|
||||
# Re-extract tags with filename as title only if needed
|
||||
info['title'] = filename
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as e:
|
||||
log(f"Error extracting tags with filename: {e}", file=sys.stderr)
|
||||
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"direct-file-downloaded",
|
||||
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=file_path,
|
||||
info=info,
|
||||
tags=tags,
|
||||
source_url=url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
|
||||
except (httpx.HTTPError, httpx.RequestError) as exc:
|
||||
log(f"Download error: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||
except Exception as exc:
|
||||
log(f"Error downloading file: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{
|
||||
"phase": "direct-file",
|
||||
"url": url,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
},
|
||||
)
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
|
||||
"""Probe URL to extract metadata WITHOUT downloading.
|
||||
|
||||
Args:
|
||||
url: URL to probe
|
||||
no_playlist: If True, ignore playlists and probe only the single video
|
||||
timeout_seconds: Max seconds to wait for probe (default 15s)
|
||||
|
||||
Returns:
|
||||
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
||||
Returns None if not supported by yt-dlp or on timeout.
|
||||
"""
|
||||
if not is_url_supported_by_ytdlp(url):
|
||||
return None
|
||||
|
||||
# Wrap probe in timeout to prevent hanging on large playlists
|
||||
import threading
|
||||
from typing import cast
|
||||
|
||||
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
||||
|
||||
def _do_probe() -> None:
|
||||
try:
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
assert yt_dlp is not None
|
||||
# Extract info without downloading
|
||||
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||
ydl_opts = {
|
||||
"quiet": True, # Suppress all output
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 10,
|
||||
"retries": 2, # Reduce retries for faster timeout
|
||||
"skip_download": True, # Don't actually download
|
||||
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||
"noprogress": True, # No progress bars
|
||||
}
|
||||
|
||||
# Add cookies if available (lazy import to avoid circular dependency)
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
ydl_opts["cookiefile"] = global_cookies
|
||||
|
||||
# Add no_playlist option if specified
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
if not isinstance(info, dict):
|
||||
result_container[0] = None
|
||||
return
|
||||
|
||||
# Extract relevant fields
|
||||
result_container[0] = {
|
||||
"extractor": info.get("extractor", ""),
|
||||
"title": info.get("title", ""),
|
||||
"entries": info.get("entries", []), # Will be populated if playlist
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"description": info.get("description"),
|
||||
"url": url,
|
||||
}
|
||||
except Exception as exc:
|
||||
log(f"Probe error for {url}: {exc}")
|
||||
result_container[1] = exc
|
||||
|
||||
thread = threading.Thread(target=_do_probe, daemon=False)
|
||||
thread.start()
|
||||
thread.join(timeout=timeout_seconds)
|
||||
|
||||
if thread.is_alive():
|
||||
# Probe timed out - return None to fall back to direct download
|
||||
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
||||
return None
|
||||
|
||||
if result_container[1] is not None:
|
||||
# Probe error - return None to proceed anyway
|
||||
return None
|
||||
|
||||
return cast(Optional[Dict[str, Any]], result_container[0])
|
||||
|
||||
|
||||
__all__ = [
|
||||
"is_url_supported_by_ytdlp",
|
||||
"list_formats",
|
||||
"probe_url",
|
||||
"DownloadError",
|
||||
"DownloadOptions",
|
||||
"DownloadMediaResult",
|
||||
]
|
||||
|
||||
@@ -1,180 +0,0 @@
|
||||
"""Simple HTTP file server for serving files in web mode."""
|
||||
|
||||
import threading
|
||||
import socket
|
||||
import logging
|
||||
from http.server import HTTPServer, SimpleHTTPRequestHandler
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import mimetypes
|
||||
import urllib.parse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global server instance
|
||||
_file_server: Optional[HTTPServer] = None
|
||||
_server_thread: Optional[threading.Thread] = None
|
||||
_server_port: int = 8001
|
||||
|
||||
|
||||
class FileServerHandler(SimpleHTTPRequestHandler):
|
||||
"""HTTP request handler for file serving."""
|
||||
|
||||
def do_GET(self):
|
||||
"""Handle GET requests."""
|
||||
# Parse the path
|
||||
parsed_path = urllib.parse.urlparse(self.path)
|
||||
file_path = urllib.parse.unquote(parsed_path.path)
|
||||
|
||||
# Remove leading slash
|
||||
if file_path.startswith('/'):
|
||||
file_path = file_path[1:]
|
||||
|
||||
# Decode the file path (it's URL encoded)
|
||||
try:
|
||||
full_path = Path(file_path).resolve()
|
||||
|
||||
# Security check: ensure the path is within allowed directories
|
||||
# For now, allow all paths (can be restricted later)
|
||||
|
||||
if full_path.is_file() and full_path.exists():
|
||||
# Serve the file
|
||||
logger.debug(f"Serving file: {full_path}")
|
||||
|
||||
# Determine content type
|
||||
content_type, _ = mimetypes.guess_type(str(full_path))
|
||||
if content_type is None:
|
||||
content_type = 'application/octet-stream'
|
||||
|
||||
try:
|
||||
with open(full_path, 'rb') as f:
|
||||
file_content = f.read()
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', content_type)
|
||||
self.send_header('Content-Length', str(len(file_content)))
|
||||
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
|
||||
self.end_headers()
|
||||
self.wfile.write(file_content)
|
||||
logger.info(f"Successfully served file: {full_path.name}")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"Error serving file: {e}")
|
||||
self.send_error(500, "Internal server error")
|
||||
return
|
||||
else:
|
||||
logger.warning(f"File not found: {full_path}")
|
||||
self.send_error(404, "File not found")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling request: {e}")
|
||||
self.send_error(400, "Bad request")
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Override to use our logger instead of stderr."""
|
||||
logger.debug(format % args)
|
||||
|
||||
|
||||
def get_local_ip() -> Optional[str]:
|
||||
"""Get the local IP address that's accessible from other devices."""
|
||||
try:
|
||||
# Connect to a remote server to determine local IP
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80))
|
||||
ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return ip
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to determine local IP: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def start_file_server(port: int = 8001) -> Optional[str]:
|
||||
"""Start the HTTP file server.
|
||||
|
||||
Args:
|
||||
port: Port to serve on
|
||||
|
||||
Returns:
|
||||
Server URL if successful, None otherwise
|
||||
"""
|
||||
global _file_server, _server_thread, _server_port
|
||||
|
||||
if _file_server is not None:
|
||||
logger.debug(f"File server already running on port {_server_port}")
|
||||
local_ip = get_local_ip()
|
||||
if local_ip:
|
||||
return f"http://{local_ip}:{_server_port}"
|
||||
return None
|
||||
|
||||
try:
|
||||
_server_port = port
|
||||
|
||||
# Create server
|
||||
server_address = ('', port)
|
||||
_file_server = HTTPServer(server_address, FileServerHandler)
|
||||
|
||||
# Start in daemon thread
|
||||
_server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
|
||||
_server_thread.start()
|
||||
|
||||
logger.info(f"File server started on port {port}")
|
||||
|
||||
# Get local IP
|
||||
local_ip = get_local_ip()
|
||||
if local_ip:
|
||||
server_url = f"http://{local_ip}:{port}"
|
||||
logger.info(f"File server accessible at: {server_url}")
|
||||
return server_url
|
||||
else:
|
||||
logger.warning("Could not determine local IP")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start file server: {e}")
|
||||
_file_server = None
|
||||
_server_thread = None
|
||||
return None
|
||||
|
||||
|
||||
def stop_file_server():
|
||||
"""Stop the HTTP file server."""
|
||||
global _file_server, _server_thread
|
||||
|
||||
if _file_server is not None:
|
||||
try:
|
||||
_file_server.shutdown()
|
||||
_file_server.server_close()
|
||||
logger.info("File server stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping file server: {e}")
|
||||
finally:
|
||||
_file_server = None
|
||||
_server_thread = None
|
||||
|
||||
|
||||
def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
|
||||
"""Get the HTTP URL for a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
server_url: Base server URL (gets determined if None)
|
||||
|
||||
Returns:
|
||||
HTTP URL to the file, or None if server not running
|
||||
"""
|
||||
if not file_path.exists():
|
||||
logger.warning(f"File does not exist: {file_path}")
|
||||
return None
|
||||
|
||||
if server_url is None:
|
||||
local_ip = get_local_ip()
|
||||
if not local_ip:
|
||||
logger.error("Cannot determine local IP for file URL")
|
||||
return None
|
||||
server_url = f"http://{local_ip}:{_server_port}"
|
||||
|
||||
# URL encode the file path
|
||||
encoded_path = urllib.parse.quote(str(file_path.resolve()))
|
||||
return f"{server_url}/{encoded_path}"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,579 +0,0 @@
|
||||
"""
|
||||
Unified HTTP client for downlow using httpx.
|
||||
|
||||
Provides synchronous and asynchronous HTTP operations with:
|
||||
- Automatic retries on transient failures
|
||||
- Configurable timeouts and headers
|
||||
- Built-in progress tracking for downloads
|
||||
- Request/response logging support
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default configuration
|
||||
DEFAULT_TIMEOUT = 30.0
|
||||
DEFAULT_RETRIES = 3
|
||||
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
|
||||
|
||||
class HTTPClient:
|
||||
"""Unified HTTP client with sync support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: float = DEFAULT_TIMEOUT,
|
||||
retries: int = DEFAULT_RETRIES,
|
||||
user_agent: str = DEFAULT_USER_AGENT,
|
||||
verify_ssl: bool = True,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize HTTP client.
|
||||
|
||||
Args:
|
||||
timeout: Request timeout in seconds
|
||||
retries: Number of retries on transient failures
|
||||
user_agent: User-Agent header value
|
||||
verify_ssl: Whether to verify SSL certificates
|
||||
headers: Additional headers to include in all requests
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
self.user_agent = user_agent
|
||||
self.verify_ssl = verify_ssl
|
||||
self.base_headers = headers or {}
|
||||
self._client: Optional[httpx.Client] = None
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
self._client = httpx.Client(
|
||||
timeout=self.timeout,
|
||||
verify=self.verify_ssl,
|
||||
headers=self._get_headers(),
|
||||
)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with user-agent."""
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
headers.update(self.base_headers)
|
||||
return headers
|
||||
|
||||
def get(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a GET request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
params: Query parameters
|
||||
headers: Additional headers
|
||||
allow_redirects: Follow redirects
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"GET",
|
||||
url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
follow_redirects=allow_redirects,
|
||||
)
|
||||
|
||||
def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
files: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a POST request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
files: Files to upload
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"POST",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
files=files,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def put(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
content: Optional[Any] = None,
|
||||
files: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a PUT request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
content: Raw content
|
||||
files: Files to upload
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"PUT",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
content=content,
|
||||
files=files,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def delete(
|
||||
self,
|
||||
url: str,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a DELETE request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"DELETE",
|
||||
url,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a generic HTTP request.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(method, url, **kwargs)
|
||||
|
||||
def download(
|
||||
self,
|
||||
url: str,
|
||||
file_path: str,
|
||||
chunk_size: int = 8192,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Path:
|
||||
"""
|
||||
Download a file from URL with optional progress tracking.
|
||||
|
||||
Args:
|
||||
url: File URL
|
||||
file_path: Local file path to save to
|
||||
chunk_size: Download chunk size
|
||||
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
Path object of downloaded file
|
||||
"""
|
||||
path = Path(file_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
|
||||
response.raise_for_status()
|
||||
total_bytes = int(response.headers.get("content-length", 0))
|
||||
bytes_downloaded = 0
|
||||
|
||||
with open(path, "wb") as f:
|
||||
for chunk in response.iter_bytes(chunk_size):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
bytes_downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(bytes_downloaded, total_bytes)
|
||||
|
||||
return path
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an HTTP request with automatic retries.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments for httpx.Client.request()
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
if not self._client:
|
||||
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
response = self._client.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.TimeoutException as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Don't retry on 4xx errors
|
||||
if 400 <= e.response.status_code < 500:
|
||||
try:
|
||||
response_text = e.response.text[:500]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||
raise
|
||||
last_exception = e
|
||||
try:
|
||||
response_text = e.response.text[:200]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
|
||||
if last_exception:
|
||||
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||
raise last_exception
|
||||
|
||||
raise RuntimeError("Request failed after retries")
|
||||
|
||||
def _request_stream(self, method: str, url: str, **kwargs):
|
||||
"""Make a streaming request."""
|
||||
if not self._client:
|
||||
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
class AsyncHTTPClient:
|
||||
"""Unified async HTTP client with asyncio support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: float = DEFAULT_TIMEOUT,
|
||||
retries: int = DEFAULT_RETRIES,
|
||||
user_agent: str = DEFAULT_USER_AGENT,
|
||||
verify_ssl: bool = True,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize async HTTP client.
|
||||
|
||||
Args:
|
||||
timeout: Request timeout in seconds
|
||||
retries: Number of retries on transient failures
|
||||
user_agent: User-Agent header value
|
||||
verify_ssl: Whether to verify SSL certificates
|
||||
headers: Additional headers to include in all requests
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
self.user_agent = user_agent
|
||||
self.verify_ssl = verify_ssl
|
||||
self.base_headers = headers or {}
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
self._client = httpx.AsyncClient(
|
||||
timeout=self.timeout,
|
||||
verify=self.verify_ssl,
|
||||
headers=self._get_headers(),
|
||||
)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with user-agent."""
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
headers.update(self.base_headers)
|
||||
return headers
|
||||
|
||||
async def get(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async GET request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
params: Query parameters
|
||||
headers: Additional headers
|
||||
allow_redirects: Follow redirects
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return await self._request(
|
||||
"GET",
|
||||
url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
follow_redirects=allow_redirects,
|
||||
)
|
||||
|
||||
async def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async POST request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return await self._request(
|
||||
"POST",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
async def download(
|
||||
self,
|
||||
url: str,
|
||||
file_path: str,
|
||||
chunk_size: int = 8192,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Path:
|
||||
"""
|
||||
Download a file from URL asynchronously with optional progress tracking.
|
||||
|
||||
Args:
|
||||
url: File URL
|
||||
file_path: Local file path to save to
|
||||
chunk_size: Download chunk size
|
||||
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
Path object of downloaded file
|
||||
"""
|
||||
path = Path(file_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async with self._request_stream("GET", url, headers=headers) as response:
|
||||
response.raise_for_status()
|
||||
total_bytes = int(response.headers.get("content-length", 0))
|
||||
bytes_downloaded = 0
|
||||
|
||||
with open(path, "wb") as f:
|
||||
async for chunk in response.aiter_bytes(chunk_size):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
bytes_downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(bytes_downloaded, total_bytes)
|
||||
|
||||
return path
|
||||
|
||||
async def _request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async HTTP request with automatic retries.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments for httpx.AsyncClient.request()
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
if not self._client:
|
||||
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
response = await self._client.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.TimeoutException as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5) # Brief delay before retry
|
||||
continue
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Don't retry on 4xx errors
|
||||
if 400 <= e.response.status_code < 500:
|
||||
try:
|
||||
response_text = e.response.text[:500]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||
raise
|
||||
last_exception = e
|
||||
try:
|
||||
response_text = e.response.text[:200]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
|
||||
if last_exception:
|
||||
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||
raise last_exception
|
||||
|
||||
raise RuntimeError("Request failed after retries")
|
||||
|
||||
def _request_stream(self, method: str, url: str, **kwargs):
|
||||
"""Make a streaming request."""
|
||||
if not self._client:
|
||||
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
# Convenience function for quick sync requests
|
||||
def get(url: str, **kwargs) -> httpx.Response:
|
||||
"""Quick GET request without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.get(url, **kwargs)
|
||||
|
||||
|
||||
def post(url: str, **kwargs) -> httpx.Response:
|
||||
"""Quick POST request without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.post(url, **kwargs)
|
||||
|
||||
|
||||
def download(
|
||||
url: str,
|
||||
file_path: str,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
**kwargs
|
||||
) -> Path:
|
||||
"""Quick file download without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.download(url, file_path, progress_callback=progress_callback, **kwargs)
|
||||
1570
helper/hydrus.py
1570
helper/hydrus.py
File diff suppressed because it is too large
Load Diff
@@ -1,523 +0,0 @@
|
||||
"""Shared Library Genesis search and download helpers.
|
||||
|
||||
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
|
||||
Targets libgen.is/rs/st mirrors and parses the results table directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, urljoin, urlparse, unquote
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
BeautifulSoup = None
|
||||
|
||||
LogFn = Optional[Callable[[str], None]]
|
||||
ErrorFn = Optional[Callable[[str], None]]
|
||||
|
||||
DEFAULT_TIMEOUT = 20.0
|
||||
DEFAULT_LIMIT = 50
|
||||
|
||||
# Mirrors to try in order
|
||||
MIRRORS = [
|
||||
"https://libgen.is",
|
||||
"https://libgen.rs",
|
||||
"https://libgen.st",
|
||||
"http://libgen.is",
|
||||
"http://libgen.rs",
|
||||
"http://libgen.st",
|
||||
"https://libgen.li", # Different structure, fallback
|
||||
"http://libgen.li",
|
||||
"https://libgen.gl", # Different structure, fallback
|
||||
"http://libgen.gl",
|
||||
]
|
||||
|
||||
logging.getLogger(__name__).setLevel(logging.INFO)
|
||||
|
||||
|
||||
def _call(logger: LogFn, message: str) -> None:
|
||||
if logger:
|
||||
logger(message)
|
||||
|
||||
|
||||
class LibgenSearch:
|
||||
"""Robust LibGen searcher."""
|
||||
|
||||
def __init__(self, session: Optional[requests.Session] = None):
|
||||
self.session = session or requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
})
|
||||
|
||||
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
|
||||
"""Search LibGen mirrors."""
|
||||
if not BeautifulSoup:
|
||||
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
|
||||
return []
|
||||
|
||||
for mirror in MIRRORS:
|
||||
try:
|
||||
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||
results = self._search_libgen_li(mirror, query, limit)
|
||||
else:
|
||||
results = self._search_libgen_rs(mirror, query, limit)
|
||||
|
||||
if results:
|
||||
return results
|
||||
except Exception as e:
|
||||
logging.debug(f"Mirror {mirror} failed: {e}")
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.rs/is/st style mirrors."""
|
||||
# Search URL: /search.php?req=QUERY&res=100&column=def
|
||||
url = f"{mirror}/search.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100, # Request more to filter later
|
||||
"column": "def",
|
||||
"open": 0,
|
||||
"view": "simple",
|
||||
"phrase": 1,
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
# Find the table with results. usually class 'c'
|
||||
table = soup.find("table", {"class": "c"})
|
||||
if not table:
|
||||
# Try finding by structure (table with many rows)
|
||||
tables = soup.find_all("table")
|
||||
for t in tables:
|
||||
if len(t.find_all("tr")) > 5:
|
||||
table = t
|
||||
break
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results = []
|
||||
# Skip header row
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
# Columns:
|
||||
# 0: ID
|
||||
# 1: Author(s)
|
||||
# 2: Title
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Pages
|
||||
# 6: Language
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9+: Mirrors
|
||||
|
||||
try:
|
||||
libgen_id = cols[0].get_text(strip=True)
|
||||
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
|
||||
if not authors:
|
||||
authors = [cols[1].get_text(strip=True)]
|
||||
|
||||
title_tag = cols[2].find("a")
|
||||
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
|
||||
|
||||
# Extract MD5 from title link if possible (often in href)
|
||||
# href='book/index.php?md5=...'
|
||||
md5 = ""
|
||||
if title_tag and title_tag.has_attr("href"):
|
||||
href = title_tag["href"]
|
||||
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
|
||||
if match:
|
||||
md5 = match.group(1)
|
||||
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
pages = cols[5].get_text(strip=True)
|
||||
language = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirrors
|
||||
# Usually col 9 is http://library.lol/main/MD5
|
||||
mirror_links = []
|
||||
for i in range(9, len(cols)):
|
||||
a = cols[i].find("a")
|
||||
if a and a.has_attr("href"):
|
||||
mirror_links.append(a["href"])
|
||||
|
||||
# Construct direct download page link (library.lol)
|
||||
# If we have MD5, we can guess it: http://library.lol/main/{md5}
|
||||
if md5:
|
||||
download_link = f"http://library.lol/main/{md5}"
|
||||
elif mirror_links:
|
||||
download_link = mirror_links[0]
|
||||
else:
|
||||
download_link = ""
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": ", ".join(authors),
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": md5,
|
||||
"mirror_url": download_link,
|
||||
"cover": "", # Could extract from hover if needed
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.debug(f"Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.li/gl style mirrors."""
|
||||
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
|
||||
url = f"{mirror}/index.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100,
|
||||
"covers": "on",
|
||||
"filesuns": "all",
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
table = soup.find("table", {"id": "tablelibgen"})
|
||||
if not table:
|
||||
table = soup.find("table", {"class": "table table-striped"})
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results = []
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Structure is different
|
||||
# 0: Cover
|
||||
# 1: Title (with link to file.php?id=...)
|
||||
# 2: Author
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Language
|
||||
# 6: Pages
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9: Mirrors
|
||||
|
||||
title_col = cols[1]
|
||||
title_link = title_col.find("a")
|
||||
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
|
||||
|
||||
# Extract ID from link
|
||||
libgen_id = ""
|
||||
if title_link and title_link.has_attr("href"):
|
||||
href = title_link["href"]
|
||||
# href is usually "file.php?id=..." or "edition.php?id=..."
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
if match:
|
||||
libgen_id = match.group(1)
|
||||
|
||||
authors = cols[2].get_text(strip=True)
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
language = cols[5].get_text(strip=True)
|
||||
pages = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirror link
|
||||
# Usually in col 9 or title link
|
||||
mirror_url = ""
|
||||
if title_link:
|
||||
href = title_link["href"]
|
||||
if href.startswith("/"):
|
||||
mirror_url = mirror + href
|
||||
else:
|
||||
mirror_url = urljoin(mirror, href)
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": authors,
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": "", # .li doesn't show MD5 easily in table
|
||||
"mirror_url": mirror_url,
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def search_libgen(
|
||||
query: str,
|
||||
limit: int = DEFAULT_LIMIT,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Search Libgen using the robust scraper."""
|
||||
searcher = LibgenSearch(session=session)
|
||||
try:
|
||||
results = searcher.search(query, limit=limit)
|
||||
_call(log_info, f"[libgen] Found {len(results)} results")
|
||||
return results
|
||||
except Exception as e:
|
||||
_call(log_error, f"[libgen] Search failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _resolve_download_url(
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
log_info: LogFn = None
|
||||
) -> Optional[str]:
|
||||
"""Resolve the final download URL by following the LibGen chain."""
|
||||
current_url = url
|
||||
visited = set()
|
||||
|
||||
# Max hops to prevent infinite loops
|
||||
for _ in range(6):
|
||||
if current_url in visited:
|
||||
break
|
||||
visited.add(current_url)
|
||||
|
||||
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||
|
||||
# Simple heuristic: if it looks like a file, return it
|
||||
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
|
||||
return current_url
|
||||
|
||||
try:
|
||||
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
|
||||
# So we'll just GET with stream=True to peek headers/content without downloading everything
|
||||
with session.get(current_url, stream=True, timeout=30) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = resp.headers.get("Content-Type", "").lower()
|
||||
|
||||
if "text/html" not in ct:
|
||||
# It's a binary file
|
||||
return current_url
|
||||
|
||||
# It's HTML, read content
|
||||
content = resp.text
|
||||
except Exception as e:
|
||||
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
# 1. Check for "GET" link (library.lol / ads.php style)
|
||||
# Usually <h2>GET</h2> inside <a> or just text "GET"
|
||||
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if not get_link:
|
||||
# Try finding <a> containing <h2>GET</h2>
|
||||
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if h2_get and h2_get.parent.name == "a":
|
||||
get_link = h2_get.parent
|
||||
|
||||
if get_link and get_link.has_attr("href"):
|
||||
return urljoin(current_url, get_link["href"])
|
||||
|
||||
# 2. Check for "series.php" -> "edition.php"
|
||||
if "series.php" in current_url:
|
||||
# Find first edition link
|
||||
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
|
||||
if edition_link:
|
||||
current_url = urljoin(current_url, edition_link["href"])
|
||||
continue
|
||||
|
||||
# 3. Check for "edition.php" -> "file.php"
|
||||
if "edition.php" in current_url:
|
||||
file_link = soup.find("a", href=re.compile(r"file\.php"))
|
||||
if file_link:
|
||||
current_url = urljoin(current_url, file_link["href"])
|
||||
continue
|
||||
|
||||
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
|
||||
if "file.php" in current_url:
|
||||
# Look for link with title="libgen" or text "Libgen"
|
||||
libgen_link = soup.find("a", title="libgen")
|
||||
if not libgen_link:
|
||||
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
|
||||
|
||||
if libgen_link and libgen_link.has_attr("href"):
|
||||
current_url = urljoin(current_url, libgen_link["href"])
|
||||
continue
|
||||
|
||||
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
|
||||
if "ads.php" in current_url:
|
||||
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
|
||||
if get_php_link:
|
||||
return urljoin(current_url, get_php_link["href"])
|
||||
|
||||
# 6. Library.lol / generic fallback
|
||||
for text in ["Cloudflare", "IPFS.io", "Infura"]:
|
||||
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
|
||||
if link and link.has_attr("href"):
|
||||
return urljoin(current_url, link["href"])
|
||||
|
||||
# If we found nothing new, stop
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
suffix = Path(filename).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
parsed = urlparse(download_url)
|
||||
suffix = Path(parsed.path).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
content_type = headers.get('content-type', '').lower()
|
||||
mime_map = {
|
||||
'application/pdf': 'pdf',
|
||||
'application/epub+zip': 'epub',
|
||||
'application/x-mobipocket-ebook': 'mobi',
|
||||
'application/x-cbr': 'cbr',
|
||||
'application/x-cbz': 'cbz',
|
||||
'application/zip': 'zip',
|
||||
}
|
||||
|
||||
for mime, ext in mime_map.items():
|
||||
if mime in content_type:
|
||||
return ext
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
|
||||
"""Rename the path to match the detected extension, if needed."""
|
||||
if not extension:
|
||||
return path
|
||||
|
||||
suffix = extension if extension.startswith('.') else f'.{extension}'
|
||||
if path.suffix.lower() == suffix.lower():
|
||||
return path
|
||||
|
||||
candidate = path.with_suffix(suffix)
|
||||
base_stem = path.stem
|
||||
counter = 1
|
||||
while candidate.exists() and counter < 100:
|
||||
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
|
||||
counter += 1
|
||||
|
||||
try:
|
||||
path.replace(candidate)
|
||||
return candidate
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: Path,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
|
||||
|
||||
download_url = _resolve_download_url(session, mirror_url, log_info)
|
||||
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False, None
|
||||
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
headers = dict(r.headers)
|
||||
|
||||
# Verify it's not HTML (error page)
|
||||
ct = headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False, None
|
||||
|
||||
total_size = int(headers.get("content-length", 0) or 0)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
final_extension = _guess_filename_extension(download_url, headers)
|
||||
final_path = _apply_extension(output_path, final_extension)
|
||||
|
||||
if progress_callback and total_size > 0:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
_call(log_info, f"[download] Saved to {final_path}")
|
||||
return True, final_path
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False, None
|
||||
104
helper/logger.py
104
helper/logger.py
@@ -1,104 +0,0 @@
|
||||
"""Unified logging utility for automatic file and function name tracking."""
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
_DEBUG_ENABLED = False
|
||||
_thread_local = threading.local()
|
||||
|
||||
def set_thread_stream(stream):
|
||||
"""Set a custom output stream for the current thread."""
|
||||
_thread_local.stream = stream
|
||||
|
||||
def get_thread_stream():
|
||||
"""Get the custom output stream for the current thread, if any."""
|
||||
return getattr(_thread_local, 'stream', None)
|
||||
|
||||
def set_debug(enabled: bool) -> None:
|
||||
"""Enable or disable debug logging."""
|
||||
global _DEBUG_ENABLED
|
||||
_DEBUG_ENABLED = enabled
|
||||
|
||||
def is_debug_enabled() -> bool:
|
||||
"""Check if debug logging is enabled."""
|
||||
return _DEBUG_ENABLED
|
||||
|
||||
def debug(*args, **kwargs) -> None:
|
||||
"""Print debug message if debug logging is enabled.
|
||||
|
||||
Automatically prepends [filename.function_name] to all output.
|
||||
"""
|
||||
if not _DEBUG_ENABLED:
|
||||
return
|
||||
|
||||
# Check if stderr has been redirected to /dev/null (quiet mode)
|
||||
# If so, skip output to avoid queuing in background worker's capture
|
||||
try:
|
||||
stderr_name = getattr(sys.stderr, 'name', '')
|
||||
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stderr for debug messages
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stderr
|
||||
|
||||
# Prepend DEBUG label
|
||||
args = ("DEBUG:", *args)
|
||||
|
||||
# Use the same logic as log()
|
||||
log(*args, **kwargs)
|
||||
|
||||
def log(*args, **kwargs) -> None:
|
||||
"""Print with automatic file.function prefix.
|
||||
|
||||
Automatically prepends [filename.function_name] to all output.
|
||||
Defaults to stdout if not specified.
|
||||
|
||||
Example:
|
||||
log("Upload started") # Output: [add_file.run] Upload started
|
||||
"""
|
||||
# When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
|
||||
add_prefix = _DEBUG_ENABLED
|
||||
|
||||
# Get the calling frame
|
||||
frame = inspect.currentframe()
|
||||
if frame is None:
|
||||
print(*args, **kwargs)
|
||||
return
|
||||
|
||||
caller_frame = frame.f_back
|
||||
if caller_frame is None:
|
||||
print(*args, **kwargs)
|
||||
return
|
||||
|
||||
try:
|
||||
# Get file name without extension
|
||||
file_name = Path(caller_frame.f_code.co_filename).stem
|
||||
|
||||
# Get function name
|
||||
func_name = caller_frame.f_code.co_name
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stdout if not specified
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stdout
|
||||
|
||||
if add_prefix:
|
||||
prefix = f"[{file_name}.{func_name}]"
|
||||
print(prefix, *args, **kwargs)
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
finally:
|
||||
del frame
|
||||
del caller_frame
|
||||
@@ -1,389 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
import requests
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
try: # Optional dependency
|
||||
import musicbrainzngs # type: ignore
|
||||
except ImportError: # pragma: no cover - optional
|
||||
musicbrainzngs = None
|
||||
|
||||
|
||||
class MetadataProvider(ABC):
|
||||
"""Base class for metadata providers (music, movies, books, etc.)."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
self.config = config or {}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.__class__.__name__.replace("Provider", "").lower()
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Return a list of candidate metadata records."""
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
"""Convert a result item into a list of tags."""
|
||||
tags: List[str] = []
|
||||
title = item.get("title")
|
||||
artist = item.get("artist")
|
||||
album = item.get("album")
|
||||
year = item.get("year")
|
||||
|
||||
if title:
|
||||
tags.append(f"title:{title}")
|
||||
if artist:
|
||||
tags.append(f"artist:{artist}")
|
||||
if album:
|
||||
tags.append(f"album:{album}")
|
||||
if year:
|
||||
tags.append(f"year:{year}")
|
||||
|
||||
tags.append(f"source:{self.name}")
|
||||
return tags
|
||||
|
||||
|
||||
class ITunesProvider(MetadataProvider):
|
||||
"""Metadata provider using the iTunes Search API."""
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
|
||||
try:
|
||||
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
|
||||
resp.raise_for_status()
|
||||
results = resp.json().get("results", [])
|
||||
except Exception as exc:
|
||||
log(f"iTunes search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for r in results:
|
||||
item = {
|
||||
"title": r.get("trackName"),
|
||||
"artist": r.get("artistName"),
|
||||
"album": r.get("collectionName"),
|
||||
"year": str(r.get("releaseDate", ""))[:4],
|
||||
"provider": self.name,
|
||||
"raw": r,
|
||||
}
|
||||
items.append(item)
|
||||
debug(f"iTunes returned {len(items)} items for '{query}'")
|
||||
return items
|
||||
|
||||
|
||||
class OpenLibraryMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider for OpenLibrary book metadata."""
|
||||
|
||||
@property
|
||||
def name(self) -> str: # type: ignore[override]
|
||||
return "openlibrary"
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
query_clean = (query or "").strip()
|
||||
if not query_clean:
|
||||
return []
|
||||
|
||||
try:
|
||||
# Prefer ISBN-specific search when the query looks like one
|
||||
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
|
||||
q = f"isbn:{query_clean.replace('-', '')}"
|
||||
else:
|
||||
q = query_clean
|
||||
|
||||
resp = requests.get(
|
||||
"https://openlibrary.org/search.json",
|
||||
params={"q": q, "limit": limit},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
log(f"OpenLibrary search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for doc in data.get("docs", [])[:limit]:
|
||||
authors = doc.get("author_name") or []
|
||||
publisher = ""
|
||||
publishers = doc.get("publisher") or []
|
||||
if isinstance(publishers, list) and publishers:
|
||||
publisher = publishers[0]
|
||||
|
||||
# Prefer 13-digit ISBN when available, otherwise 10-digit
|
||||
isbn_list = doc.get("isbn") or []
|
||||
isbn_13 = next((i for i in isbn_list if len(str(i)) == 13), None)
|
||||
isbn_10 = next((i for i in isbn_list if len(str(i)) == 10), None)
|
||||
|
||||
# Derive OLID from key
|
||||
olid = ""
|
||||
key = doc.get("key", "")
|
||||
if isinstance(key, str) and key:
|
||||
olid = key.split("/")[-1]
|
||||
|
||||
items.append({
|
||||
"title": doc.get("title") or "",
|
||||
"artist": ", ".join(authors) if authors else "",
|
||||
"album": publisher,
|
||||
"year": str(doc.get("first_publish_year") or ""),
|
||||
"provider": self.name,
|
||||
"authors": authors,
|
||||
"publisher": publisher,
|
||||
"identifiers": {
|
||||
"isbn_13": isbn_13,
|
||||
"isbn_10": isbn_10,
|
||||
"openlibrary": olid,
|
||||
"oclc": (doc.get("oclc_numbers") or [None])[0],
|
||||
"lccn": (doc.get("lccn") or [None])[0],
|
||||
},
|
||||
"description": None,
|
||||
})
|
||||
|
||||
return items
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
tags: List[str] = []
|
||||
title = item.get("title")
|
||||
authors = item.get("authors") or []
|
||||
publisher = item.get("publisher")
|
||||
year = item.get("year")
|
||||
description = item.get("description") or ""
|
||||
|
||||
if title:
|
||||
tags.append(f"title:{title}")
|
||||
for author in authors:
|
||||
if author:
|
||||
tags.append(f"author:{author}")
|
||||
if publisher:
|
||||
tags.append(f"publisher:{publisher}")
|
||||
if year:
|
||||
tags.append(f"year:{year}")
|
||||
if description:
|
||||
tags.append(f"description:{description[:200]}")
|
||||
|
||||
identifiers = item.get("identifiers") or {}
|
||||
for key, value in identifiers.items():
|
||||
if value:
|
||||
tags.append(f"{key}:{value}")
|
||||
|
||||
tags.append(f"source:{self.name}")
|
||||
return tags
|
||||
|
||||
|
||||
class GoogleBooksMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider for Google Books volumes API."""
|
||||
|
||||
@property
|
||||
def name(self) -> str: # type: ignore[override]
|
||||
return "googlebooks"
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
query_clean = (query or "").strip()
|
||||
if not query_clean:
|
||||
return []
|
||||
|
||||
# Prefer ISBN queries when possible
|
||||
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
|
||||
q = f"isbn:{query_clean.replace('-', '')}"
|
||||
else:
|
||||
q = query_clean
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://www.googleapis.com/books/v1/volumes",
|
||||
params={"q": q, "maxResults": limit},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
except Exception as exc:
|
||||
log(f"Google Books search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for volume in payload.get("items", [])[:limit]:
|
||||
info = volume.get("volumeInfo") or {}
|
||||
authors = info.get("authors") or []
|
||||
publisher = info.get("publisher", "")
|
||||
published_date = info.get("publishedDate", "")
|
||||
year = str(published_date)[:4] if published_date else ""
|
||||
|
||||
identifiers_raw = info.get("industryIdentifiers") or []
|
||||
identifiers: Dict[str, Optional[str]] = {"googlebooks": volume.get("id")}
|
||||
for ident in identifiers_raw:
|
||||
if not isinstance(ident, dict):
|
||||
continue
|
||||
ident_type = ident.get("type", "").lower()
|
||||
ident_value = ident.get("identifier")
|
||||
if not ident_value:
|
||||
continue
|
||||
if ident_type == "isbn_13":
|
||||
identifiers.setdefault("isbn_13", ident_value)
|
||||
elif ident_type == "isbn_10":
|
||||
identifiers.setdefault("isbn_10", ident_value)
|
||||
else:
|
||||
identifiers.setdefault(ident_type, ident_value)
|
||||
|
||||
items.append({
|
||||
"title": info.get("title") or "",
|
||||
"artist": ", ".join(authors) if authors else "",
|
||||
"album": publisher,
|
||||
"year": year,
|
||||
"provider": self.name,
|
||||
"authors": authors,
|
||||
"publisher": publisher,
|
||||
"identifiers": identifiers,
|
||||
"description": info.get("description", ""),
|
||||
})
|
||||
|
||||
return items
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
tags: List[str] = []
|
||||
title = item.get("title")
|
||||
authors = item.get("authors") or []
|
||||
publisher = item.get("publisher")
|
||||
year = item.get("year")
|
||||
description = item.get("description") or ""
|
||||
|
||||
if title:
|
||||
tags.append(f"title:{title}")
|
||||
for author in authors:
|
||||
if author:
|
||||
tags.append(f"author:{author}")
|
||||
if publisher:
|
||||
tags.append(f"publisher:{publisher}")
|
||||
if year:
|
||||
tags.append(f"year:{year}")
|
||||
if description:
|
||||
tags.append(f"description:{description[:200]}")
|
||||
|
||||
identifiers = item.get("identifiers") or {}
|
||||
for key, value in identifiers.items():
|
||||
if value:
|
||||
tags.append(f"{key}:{value}")
|
||||
|
||||
tags.append(f"source:{self.name}")
|
||||
return tags
|
||||
|
||||
|
||||
class MusicBrainzMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider for MusicBrainz recordings."""
|
||||
|
||||
@property
|
||||
def name(self) -> str: # type: ignore[override]
|
||||
return "musicbrainz"
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
if not musicbrainzngs:
|
||||
log("musicbrainzngs is not installed; skipping MusicBrainz scrape", file=sys.stderr)
|
||||
return []
|
||||
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
try:
|
||||
# Ensure user agent is set (required by MusicBrainz)
|
||||
musicbrainzngs.set_useragent("Medeia-Macina", "0.1")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
resp = musicbrainzngs.search_recordings(query=q, limit=limit)
|
||||
recordings = resp.get("recording-list") or resp.get("recordings") or []
|
||||
except Exception as exc:
|
||||
log(f"MusicBrainz search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for rec in recordings[:limit]:
|
||||
if not isinstance(rec, dict):
|
||||
continue
|
||||
title = rec.get("title") or ""
|
||||
|
||||
artist = ""
|
||||
artist_credit = rec.get("artist-credit") or rec.get("artist_credit")
|
||||
if isinstance(artist_credit, list) and artist_credit:
|
||||
first = artist_credit[0]
|
||||
if isinstance(first, dict):
|
||||
artist = first.get("name") or first.get("artist", {}).get("name", "")
|
||||
elif isinstance(first, str):
|
||||
artist = first
|
||||
|
||||
album = ""
|
||||
release_list = rec.get("release-list") or rec.get("releases") or rec.get("release")
|
||||
if isinstance(release_list, list) and release_list:
|
||||
first_rel = release_list[0]
|
||||
if isinstance(first_rel, dict):
|
||||
album = first_rel.get("title", "") or ""
|
||||
release_date = first_rel.get("date") or ""
|
||||
else:
|
||||
album = str(first_rel)
|
||||
release_date = ""
|
||||
else:
|
||||
release_date = rec.get("first-release-date") or ""
|
||||
|
||||
year = str(release_date)[:4] if release_date else ""
|
||||
mbid = rec.get("id") or ""
|
||||
|
||||
items.append({
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"album": album,
|
||||
"year": year,
|
||||
"provider": self.name,
|
||||
"mbid": mbid,
|
||||
"raw": rec,
|
||||
})
|
||||
|
||||
return items
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
tags = super().to_tags(item)
|
||||
mbid = item.get("mbid")
|
||||
if mbid:
|
||||
tags.append(f"musicbrainz:{mbid}")
|
||||
return tags
|
||||
|
||||
|
||||
# Registry ---------------------------------------------------------------
|
||||
|
||||
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
|
||||
"itunes": ITunesProvider,
|
||||
"openlibrary": OpenLibraryMetadataProvider,
|
||||
"googlebooks": GoogleBooksMetadataProvider,
|
||||
"google": GoogleBooksMetadataProvider,
|
||||
"musicbrainz": MusicBrainzMetadataProvider,
|
||||
}
|
||||
|
||||
|
||||
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
|
||||
_METADATA_PROVIDERS[name.lower()] = provider_cls
|
||||
|
||||
|
||||
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
availability: Dict[str, bool] = {}
|
||||
for name, cls in _METADATA_PROVIDERS.items():
|
||||
try:
|
||||
provider = cls(config)
|
||||
# Basic availability check: perform lightweight validation if defined
|
||||
availability[name] = True
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
|
||||
cls = _METADATA_PROVIDERS.get(name.lower())
|
||||
if not cls:
|
||||
return None
|
||||
try:
|
||||
return cls(config)
|
||||
except Exception as exc:
|
||||
log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
|
||||
return None
|
||||
@@ -1,951 +0,0 @@
|
||||
"""MPV file metadata aggregation helpers."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||
from urllib.parse import parse_qs, urlparse, unquote
|
||||
|
||||
from config import get_hydrus_url
|
||||
from helper.utils import sha256_file, unique_preserve_order
|
||||
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||
|
||||
import metadata
|
||||
|
||||
|
||||
class MPVFileError(RuntimeError):
|
||||
"""Raised when we cannot construct an MPV file snapshot."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DebridMagnet:
|
||||
"""Represents a magnet result from AllDebrid search.
|
||||
|
||||
This class matches the structure expected by the TUI (like Hydrus results)
|
||||
with title, target, media_kind attributes for compatibility.
|
||||
"""
|
||||
magnet_id: str
|
||||
title: str
|
||||
size: int
|
||||
status_code: int
|
||||
status_text: str
|
||||
progress: float
|
||||
downloaded: int
|
||||
seeders: int
|
||||
dl_speed: int
|
||||
tag_summary: Optional[str] = None
|
||||
metadata: Optional[Dict[str, Any]] = None # Complete magnet file metadata from AllDebrid API
|
||||
|
||||
@property
|
||||
def target(self) -> str:
|
||||
"""Return the target URI for this magnet (used by TUI for access operations)."""
|
||||
return f"alldebrid://{self.magnet_id}"
|
||||
|
||||
@property
|
||||
def media_kind(self) -> str:
|
||||
"""Return media kind for display."""
|
||||
return "magnet"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for metadata display."""
|
||||
return {
|
||||
"magnet_id": self.magnet_id,
|
||||
"title": self.title,
|
||||
"size": self.size,
|
||||
"status_code": self.status_code,
|
||||
"status_text": self.status_text,
|
||||
"progress": f"{self.progress:.1f}%",
|
||||
"downloaded": self.downloaded,
|
||||
"seeders": self.seeders,
|
||||
"dl_speed": self.dl_speed,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class HydrusSettings:
|
||||
base_url: Optional[str]
|
||||
access_key: Optional[str]
|
||||
timeout: float
|
||||
prefer_service_name: Optional[str]
|
||||
include_relationships: bool
|
||||
|
||||
def as_metadata_options(self) -> Dict[str, Any]:
|
||||
options: Dict[str, Any] = {
|
||||
"timeout": self.timeout,
|
||||
"include_relationships": self.include_relationships,
|
||||
}
|
||||
if self.prefer_service_name:
|
||||
options["prefer_service_name"] = self.prefer_service_name
|
||||
return options
|
||||
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MPVfile:
|
||||
path: Optional[str] = None
|
||||
filename: Optional[str] = None
|
||||
type: str = "unknown"
|
||||
hash: Optional[str] = None
|
||||
local_path: Optional[str] = None
|
||||
mpv_metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
remote_metadata: Optional[Dict[str, Any]] = None
|
||||
relationships: Optional[Dict[str, Any]] = None
|
||||
relationship_metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
original_tags: Dict[str, str] = field(default_factory=dict)
|
||||
url: List[str] = field(default_factory=list)
|
||||
title: Optional[str] = None
|
||||
source_url: Optional[str] = None
|
||||
clip_time: Optional[str] = None
|
||||
duration: Optional[float] = None
|
||||
filesize_mb: Optional[float] = None
|
||||
is_video: bool = False
|
||||
is_audio: bool = False
|
||||
is_deleted: Optional[bool] = None
|
||||
is_local: Optional[bool] = None
|
||||
has_current_file_service: Optional[bool] = None
|
||||
tag_service_key: Optional[str] = None
|
||||
swap_recommended: bool = False
|
||||
warnings: List[str] = field(default_factory=list)
|
||||
# New relationship fields for menu
|
||||
king: Optional[str] = None
|
||||
alts: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"path": self.path,
|
||||
"filename": self.filename,
|
||||
"type": self.type,
|
||||
"hash": self.hash,
|
||||
"local_path": self.local_path,
|
||||
"mpv_metadata": self.mpv_metadata,
|
||||
"metadata": self.metadata,
|
||||
"remote_metadata": self.remote_metadata,
|
||||
"relationships": self.relationships,
|
||||
"relationship_metadata": self.relationship_metadata,
|
||||
"tags": self.tags,
|
||||
"original_tags": self.original_tags,
|
||||
"url": self.url,
|
||||
"title": self.title,
|
||||
"source_url": self.source_url,
|
||||
"clip_time": self.clip_time,
|
||||
"duration": self.duration,
|
||||
"filesize_mb": self.filesize_mb,
|
||||
"is_video": self.is_video,
|
||||
"is_audio": self.is_audio,
|
||||
"is_deleted": self.is_deleted,
|
||||
"is_local": self.is_local,
|
||||
"has_current_file_service": self.has_current_file_service,
|
||||
"tag_service_key": self.tag_service_key,
|
||||
"swap_recommended": self.swap_recommended,
|
||||
"warnings": self.warnings,
|
||||
# relationship summary fields for easier Lua consumption
|
||||
"king": self.king,
|
||||
"alts": self.alts,
|
||||
}
|
||||
# Remove empty optional values for terser payloads.
|
||||
for key in list(payload.keys()):
|
||||
value = payload[key]
|
||||
if value in (None, [], {}, ""):
|
||||
del payload[key]
|
||||
return payload
|
||||
|
||||
|
||||
def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
|
||||
if not values:
|
||||
return []
|
||||
seen: set[str] = set()
|
||||
result: List[str] = []
|
||||
for value in values:
|
||||
if value is None:
|
||||
continue
|
||||
text = str(value).strip()
|
||||
if not text or text in seen:
|
||||
continue
|
||||
seen.add(text)
|
||||
result.append(text)
|
||||
return result
|
||||
|
||||
|
||||
def _looks_like_hash(value: Optional[str]) -> bool:
|
||||
if not value:
|
||||
return False
|
||||
candidate = value.strip().lower()
|
||||
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
|
||||
|
||||
|
||||
class MPVFileBuilder:
|
||||
def __init__(self, payload: Dict[str, Any], config: Dict[str, Any]):
|
||||
self.payload = payload or {}
|
||||
self.config = config or {}
|
||||
self.state = MPVfile()
|
||||
self.hydrus_settings = self._resolve_hydrus_settings()
|
||||
self.remote_options = self._resolve_remote_options()
|
||||
self.include_relationships = bool(self.payload.get("include_relationships", True))
|
||||
self.last_url = self._normalise_url(self.payload.get("last_url"))
|
||||
self._initialise_identity()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def build(self) -> Dict[str, Any]:
|
||||
if self.state.type == "hydrus":
|
||||
self._populate_hydrus_by_hash()
|
||||
elif self.state.type == "local":
|
||||
self._populate_local()
|
||||
elif self.state.type == "remote":
|
||||
self._populate_remote()
|
||||
else:
|
||||
# Attempt best effort resolution even for unknown types.
|
||||
self._populate_local(best_effort=True)
|
||||
self._finalise()
|
||||
result = self.state.to_dict()
|
||||
# Append King and Alts info to mpv_metadata for info menu
|
||||
king = self.state.king
|
||||
alts = self.state.alts
|
||||
if king:
|
||||
result.setdefault("mpv_metadata", {})["King"] = king
|
||||
if alts:
|
||||
result.setdefault("mpv_metadata", {})["Alts"] = ", ".join(alts)
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# configuration helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _resolve_hydrus_settings(self) -> HydrusSettings:
|
||||
overrides = self.payload.get("hydrus")
|
||||
overrides = overrides if isinstance(overrides, dict) else {}
|
||||
base_url = overrides.get("url") or overrides.get("base_url")
|
||||
access_key = overrides.get("access_key")
|
||||
timeout_raw = overrides.get("timeout") or overrides.get("hydrus_timeout")
|
||||
prefer_service = overrides.get("prefer_service_name")
|
||||
include_relationships = overrides.get("include_relationships")
|
||||
if base_url is None:
|
||||
base_url = get_hydrus_url(self.config)
|
||||
if access_key is None:
|
||||
raw_key = self.config.get("HydrusNetwork_Access_Key")
|
||||
access_key = str(raw_key) if raw_key is not None else None
|
||||
if timeout_raw is None:
|
||||
timeout_raw = self.config.get("HydrusNetwork_Request_Timeout")
|
||||
try:
|
||||
timeout = float(timeout_raw) if timeout_raw is not None else 60.0
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
if prefer_service is None:
|
||||
prefer_service = self.config.get("Hydrus_Tag_Service")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip() or None
|
||||
if include_relationships is None:
|
||||
include_relationships = self.payload.get("include_relationships")
|
||||
include_relationships = bool(True if include_relationships is None else include_relationships)
|
||||
base_url = base_url.strip() if isinstance(base_url, str) else None
|
||||
access_key = access_key.strip() if isinstance(access_key, str) else None
|
||||
return HydrusSettings(
|
||||
base_url=base_url or None,
|
||||
access_key=access_key or None,
|
||||
timeout=timeout,
|
||||
prefer_service_name=prefer_service,
|
||||
include_relationships=include_relationships,
|
||||
)
|
||||
|
||||
def _resolve_remote_options(self) -> Dict[str, Any]:
|
||||
remote_payload = self.payload.get("remote")
|
||||
remote_payload = remote_payload if isinstance(remote_payload, dict) else {}
|
||||
options = remote_payload.get("options")
|
||||
options = options if isinstance(options, dict) else {}
|
||||
ytdlp_args = options.get("ytdlp_args")
|
||||
if not ytdlp_args:
|
||||
options["ytdlp_args"] = ["--no-playlist", "--skip-download", "--no-warnings"]
|
||||
existing_timeout = options.get("timeout")
|
||||
if existing_timeout is None:
|
||||
options["timeout"] = min(90.0, max(10.0, float(self.payload.get("remote_timeout") or 45.0)))
|
||||
return options
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# initialisation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _initialise_identity(self) -> None:
|
||||
s = self.state
|
||||
p = self.payload
|
||||
|
||||
def _str_or_none(v):
|
||||
return str(v) if v is not None and v != "" else None
|
||||
|
||||
def _copy_dict_if_dict(v):
|
||||
return dict(v) if isinstance(v, dict) else {}
|
||||
|
||||
# path and filename
|
||||
s.path = _str_or_none(p.get("path"))
|
||||
s.filename = _str_or_none(p.get("filename"))
|
||||
|
||||
# mpv metadata
|
||||
s.mpv_metadata = _copy_dict_if_dict(p.get("mpv_metadata"))
|
||||
|
||||
# tags (support both "tags" and legacy "existing_tags")
|
||||
existing_tags = p.get("tags") or p.get("existing_tags")
|
||||
s.tags = _normalise_string_list(existing_tags)
|
||||
if s.tags:
|
||||
s.original_tags = {tag: tag for tag in s.tags}
|
||||
|
||||
# known url + last_url
|
||||
s.url = _normalise_string_list(p.get("url"))
|
||||
if self.last_url and self.last_url not in s.url:
|
||||
s.url.append(self.last_url)
|
||||
|
||||
# source URL (explicit or fallback to last_url)
|
||||
explicit_source = p.get("source_url")
|
||||
s.source_url = self._normalise_url(explicit_source) or self.last_url
|
||||
|
||||
# hash (validate looks-like-hash)
|
||||
hash_candidate = p.get("hash")
|
||||
if isinstance(hash_candidate, str):
|
||||
candidate = hash_candidate.strip().lower()
|
||||
if _looks_like_hash(candidate):
|
||||
s.hash = candidate
|
||||
|
||||
# local_path (non-empty string)
|
||||
local_path_override = p.get("local_path")
|
||||
if isinstance(local_path_override, str):
|
||||
lp = local_path_override.strip()
|
||||
if lp:
|
||||
s.local_path = lp
|
||||
|
||||
# derive remaining fields from path/filename/type
|
||||
self._derive_filename_from_path()
|
||||
self._determine_type()
|
||||
|
||||
|
||||
def _derive_filename_from_path(self) -> None:
|
||||
if self.state.filename or not self.state.path:
|
||||
return
|
||||
parsed = urlparse(self.state.path)
|
||||
if parsed.scheme in ("http", "https", "ytdl") and parsed.path:
|
||||
candidate = Path(parsed.path).name
|
||||
if candidate:
|
||||
self.state.filename = candidate
|
||||
elif parsed.scheme == "file":
|
||||
decoded = self._decode_file_url(self.state.path)
|
||||
if decoded:
|
||||
self.state.filename = Path(decoded).name
|
||||
else:
|
||||
try:
|
||||
self.state.filename = Path(self.state.path).name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _determine_type(self) -> None:
|
||||
s = self.state
|
||||
p = self.payload
|
||||
|
||||
def _set_local_from_path(pth: str | None):
|
||||
if not pth:
|
||||
return
|
||||
# Prefer resolved local path when available
|
||||
resolved = self._resolve_local_path(pth)
|
||||
s.local_path = resolved if resolved else pth
|
||||
s.type = "local"
|
||||
|
||||
# 1) Respect explicit type when valid
|
||||
explicit = p.get("type")
|
||||
if isinstance(explicit, str):
|
||||
lowered = explicit.strip().lower()
|
||||
if lowered in {"local", "hydrus", "remote"}:
|
||||
s.type = lowered
|
||||
if lowered == "local":
|
||||
s.local_path = self._resolve_local_path(s.path)
|
||||
return
|
||||
|
||||
# 2) Work from path
|
||||
path = s.path or ""
|
||||
if not path:
|
||||
s.type = "unknown"
|
||||
return
|
||||
|
||||
# 3) Hydrus-specific quick checks
|
||||
if self._looks_like_hydrus_url(path):
|
||||
s.type = "hydrus"
|
||||
return
|
||||
|
||||
parsed = urlparse(path)
|
||||
scheme = (parsed.scheme or "").lower()
|
||||
|
||||
# 4) scheme-based handling
|
||||
if scheme == "hydrus":
|
||||
s.type = "hydrus"
|
||||
return
|
||||
|
||||
if scheme in {"http", "https", "rtmp", "rtsp", "magnet", "ytdl"}:
|
||||
s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
|
||||
return
|
||||
|
||||
if scheme == "file":
|
||||
decoded = self._decode_file_url(path)
|
||||
if decoded:
|
||||
s.local_path = decoded
|
||||
s.type = "local"
|
||||
return
|
||||
|
||||
# 5) Windows/UNC absolute paths
|
||||
if re.match(r"^[A-Za-z]:[\\/]", path) or path.startswith(("\\\\", "//")):
|
||||
s.type = "local"
|
||||
s.local_path = path
|
||||
return
|
||||
|
||||
# 6) Fallback: if it looks like a URL with a scheme separator treat as remote/hydrus
|
||||
if "://" in path:
|
||||
s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote"
|
||||
return
|
||||
|
||||
# 7) Otherwise treat as a local path
|
||||
_set_local_from_path(path)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# population helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _populate_local(self, best_effort: bool = False) -> None:
|
||||
local_path = self.state.local_path or self._resolve_local_path(self.state.path)
|
||||
if local_path:
|
||||
self.state.local_path = local_path
|
||||
self._load_sidecar_tags(local_path)
|
||||
if not self.state.hash:
|
||||
self._compute_local_hash(local_path)
|
||||
# If Hydrus is configured and we have a hash, enrich from Hydrus; otherwise keep local tags only
|
||||
if self.state.hash and self.hydrus_settings.base_url and self.hydrus_settings.access_key:
|
||||
self._populate_hydrus_by_hash()
|
||||
elif best_effort and self.hydrus_settings.base_url and self.state.source_url and self.hydrus_settings.access_key:
|
||||
self._populate_hydrus_by_url(self.state.source_url)
|
||||
|
||||
# (helpers for resolving local path and loading sidecars already exist below)
|
||||
|
||||
def _populate_remote(self) -> None:
|
||||
source_url = self.state.source_url or self.last_url or self.state.path
|
||||
source_url = self._normalise_url(source_url)
|
||||
if source_url:
|
||||
self.state.source_url = source_url
|
||||
remote_payload = {
|
||||
"source_url": self.state.source_url,
|
||||
"existing_tags": self.state.tags,
|
||||
"metadata": self.payload.get("remote_metadata"),
|
||||
"mpv_metadata": self.state.mpv_metadata,
|
||||
"options": self.remote_options,
|
||||
}
|
||||
try:
|
||||
remote_result = metadata.resolve_remote_metadata(remote_payload)
|
||||
except Exception as exc: # pragma: no cover - surfaced to the caller
|
||||
self.state.warnings.append(str(exc))
|
||||
remote_result = None
|
||||
if remote_result:
|
||||
tags = remote_result.get("tags") or []
|
||||
self._merge_tags(tags)
|
||||
self.state.remote_metadata = remote_result.get("metadata")
|
||||
self.state.title = remote_result.get("title") or self.state.title
|
||||
self.state.duration = remote_result.get("duration") or self.state.duration
|
||||
self.state.source_url = remote_result.get("source_url") or self.state.source_url
|
||||
warnings = remote_result.get("warnings") or []
|
||||
if warnings:
|
||||
self.state.warnings.extend(warnings)
|
||||
if self.hydrus_settings.base_url and self.state.source_url:
|
||||
self._populate_hydrus_by_url(self.state.source_url)
|
||||
|
||||
def _populate_hydrus_by_hash(self) -> None:
|
||||
hash_hex = self.state.hash or self._extract_hash_from_path(self.state.path)
|
||||
if hash_hex and not _looks_like_hash(hash_hex):
|
||||
hash_hex = None
|
||||
if not hash_hex:
|
||||
return
|
||||
self.state.hash = hash_hex
|
||||
if not self.hydrus_settings.base_url:
|
||||
return
|
||||
payload: Dict[str, Any] = {
|
||||
"api_url": self.hydrus_settings.base_url,
|
||||
"access_key": self.hydrus_settings.access_key or "",
|
||||
"options": self.hydrus_settings.as_metadata_options(),
|
||||
"hash": hash_hex,
|
||||
}
|
||||
try:
|
||||
result = metadata.fetch_hydrus_metadata(payload)
|
||||
except Exception as exc: # pragma: no cover - surfaced to caller
|
||||
self.state.warnings.append(str(exc))
|
||||
return
|
||||
self._apply_hydrus_result(result)
|
||||
# Enrich relationships using the dedicated Hydrus endpoint (robust GET)
|
||||
if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
|
||||
self._enrich_relationships_from_api(self.state.hash)
|
||||
|
||||
def _populate_hydrus_by_url(self, url: str) -> None:
|
||||
if not self.hydrus_settings.base_url:
|
||||
return
|
||||
payload: Dict[str, Any] = {
|
||||
"api_url": self.hydrus_settings.base_url,
|
||||
"access_key": self.hydrus_settings.access_key or "",
|
||||
"options": self.hydrus_settings.as_metadata_options(),
|
||||
"url": url,
|
||||
}
|
||||
try:
|
||||
result = metadata.fetch_hydrus_metadata_by_url(payload)
|
||||
except Exception as exc: # pragma: no cover - surfaced to caller
|
||||
self.state.warnings.append(str(exc))
|
||||
return
|
||||
if result.get("error") == "not_found":
|
||||
self.state.warnings.extend(result.get("warnings") or [])
|
||||
return
|
||||
self._apply_hydrus_result(result)
|
||||
self.state.type = "hydrus"
|
||||
matched_url = result.get("matched_url") or result.get("url")
|
||||
if matched_url and matched_url not in self.state.url:
|
||||
self.state.url.append(matched_url)
|
||||
# Enrich relationships once we know the hash
|
||||
if self.include_relationships and self.state.hash and self.hydrus_settings.base_url:
|
||||
self._enrich_relationships_from_api(self.state.hash)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# state modification helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def _apply_hydrus_result(self, result: Dict[str, Any]) -> None:
|
||||
metadata_payload = result.get("metadata")
|
||||
if isinstance(metadata_payload, dict):
|
||||
# Process mime into type for Lua
|
||||
mime = metadata_payload.get("mime")
|
||||
if isinstance(mime, str):
|
||||
if mime.startswith("video/"):
|
||||
metadata_payload["type"] = "video"
|
||||
elif mime.startswith("audio/"):
|
||||
metadata_payload["type"] = "audio"
|
||||
elif mime.startswith("image/"):
|
||||
metadata_payload["type"] = "image"
|
||||
else:
|
||||
metadata_payload["type"] = "other"
|
||||
self.state.metadata = metadata_payload
|
||||
# Do NOT overwrite MPVfile.type with metadata.type
|
||||
self._merge_url(metadata_payload.get("url") or metadata_payload.get("url_set"))
|
||||
source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url")
|
||||
if source_url and not self.state.source_url:
|
||||
self.state.source_url = self._normalise_url(source_url)
|
||||
# If file_relationships are embedded in metadata, capture as relationships when missing
|
||||
if self.state.relationships is None:
|
||||
embedded = metadata_payload.get("file_relationships")
|
||||
if isinstance(embedded, dict) and embedded:
|
||||
self.state.relationships = embedded
|
||||
tags = result.get("tags") or []
|
||||
self._merge_tags(tags)
|
||||
hash_value = result.get("hash") or result.get("matched_hash")
|
||||
if isinstance(hash_value, str) and _looks_like_hash(hash_value):
|
||||
self.state.hash = hash_value.lower()
|
||||
self.state.tag_service_key = result.get("tag_service_key") or self.state.tag_service_key
|
||||
self.state.duration = result.get("duration") or self.state.duration
|
||||
self.state.filesize_mb = result.get("filesize_mb") or self.state.filesize_mb
|
||||
self.state.is_video = bool(result.get("is_video") or self.state.is_video)
|
||||
self.state.is_audio = bool(result.get("is_audio") or self.state.is_audio)
|
||||
if result.get("is_deleted") is not None:
|
||||
self.state.is_deleted = bool(result.get("is_deleted"))
|
||||
if result.get("is_local") is not None:
|
||||
self.state.is_local = bool(result.get("is_local"))
|
||||
if result.get("has_current_file_service") is not None:
|
||||
self.state.has_current_file_service = bool(result.get("has_current_file_service"))
|
||||
# Consolidate relationships from explicit result or embedded metadata
|
||||
relationships_obj: Optional[Dict[str, Any]] = None
|
||||
if isinstance(result.get("relationships"), dict):
|
||||
relationships_obj = result["relationships"]
|
||||
self.state.relationships = relationships_obj
|
||||
elif isinstance(self.state.relationships, dict):
|
||||
relationships_obj = self.state.relationships
|
||||
|
||||
# Helper to flatten any hashes from the relationships object
|
||||
def _collect_hashes(obj: Any, acc: set[str]) -> None:
|
||||
if obj is None:
|
||||
return
|
||||
if isinstance(obj, dict):
|
||||
for v in obj.values():
|
||||
_collect_hashes(v, acc)
|
||||
elif isinstance(obj, (list, tuple, set)):
|
||||
for v in obj:
|
||||
_collect_hashes(v, acc)
|
||||
elif isinstance(obj, str) and _looks_like_hash(obj):
|
||||
acc.add(obj.lower())
|
||||
|
||||
# Derive king and alts robustly from available data
|
||||
king: Optional[str] = None
|
||||
alts: list[str] = []
|
||||
|
||||
# 1) Try direct king fields on relationships object
|
||||
rels = relationships_obj or {}
|
||||
if isinstance(rels, dict):
|
||||
# Common variants
|
||||
for key in ("king", "king_hash", "duplicate_king", "best", "best_hash"):
|
||||
val = rels.get(key)
|
||||
if isinstance(val, str) and _looks_like_hash(val):
|
||||
king = val.lower()
|
||||
break
|
||||
if isinstance(val, list):
|
||||
for h in val:
|
||||
if isinstance(h, str) and _looks_like_hash(h):
|
||||
king = h.lower()
|
||||
break
|
||||
if king:
|
||||
break
|
||||
# 2) Extract alternates from known fields: numeric "3" (clips), or textual synonyms
|
||||
for alt_key in ("3", "alternates", "alts", "clips"):
|
||||
val = rels.get(alt_key)
|
||||
if isinstance(val, list):
|
||||
for h in val:
|
||||
if isinstance(h, str) and _looks_like_hash(h):
|
||||
h_low = h.lower()
|
||||
if not king or h_low != king:
|
||||
alts.append(h_low)
|
||||
# some APIs might nest
|
||||
elif isinstance(val, dict):
|
||||
tmp: set[str] = set()
|
||||
_collect_hashes(val, tmp)
|
||||
for h in sorted(tmp):
|
||||
if not king or h != king:
|
||||
alts.append(h)
|
||||
|
||||
# 3) Use relationship_metadata keys as additional alternates and king hint
|
||||
rel_meta = result.get("relationship_metadata")
|
||||
if isinstance(rel_meta, dict):
|
||||
# prefer king candidate with no clip_time if not set
|
||||
if not king:
|
||||
for h, meta in rel_meta.items():
|
||||
if isinstance(h, str) and _looks_like_hash(h) and isinstance(meta, dict):
|
||||
if not meta.get("clip_time"):
|
||||
king = h.lower()
|
||||
break
|
||||
for h in rel_meta.keys():
|
||||
if isinstance(h, str) and _looks_like_hash(h):
|
||||
h_low = h.lower()
|
||||
if not king or h_low != king:
|
||||
alts.append(h_low)
|
||||
|
||||
# 4) As a last resort, flatten all relationship hashes
|
||||
if not alts and relationships_obj:
|
||||
tmp: set[str] = set()
|
||||
_collect_hashes(relationships_obj, tmp)
|
||||
for h in sorted(tmp):
|
||||
if not king or h != king:
|
||||
alts.append(h)
|
||||
|
||||
# 5) Include current file when appropriate
|
||||
if self.state.hash and (not king or self.state.hash != king) and self.state.hash not in alts:
|
||||
alts.append(self.state.hash)
|
||||
|
||||
# 6) Sort alternates by clip start time when available
|
||||
rel_meta_all = result.get("relationship_metadata") if isinstance(result.get("relationship_metadata"), dict) else {}
|
||||
def _clip_start_for(h: str) -> float:
|
||||
meta = rel_meta_all.get(h) if isinstance(rel_meta_all, dict) else None
|
||||
clip = meta.get("clip_time") if isinstance(meta, dict) else None
|
||||
if isinstance(clip, str):
|
||||
m = re.match(r"^(\d+)-(\d+)$", clip)
|
||||
if m:
|
||||
try:
|
||||
return float(m.group(1))
|
||||
except Exception:
|
||||
return float("inf")
|
||||
return float("inf")
|
||||
|
||||
if alts:
|
||||
# de-duplicate while preserving earliest clip time ordering
|
||||
seen: set[str] = set()
|
||||
alts = [h for h in sorted(alts, key=_clip_start_for) if (h not in seen and not seen.add(h))]
|
||||
|
||||
self.state.king = king
|
||||
self.state.alts = alts
|
||||
if isinstance(result.get("relationship_metadata"), dict):
|
||||
self.state.relationship_metadata = result["relationship_metadata"]
|
||||
self.state.title = result.get("title") or self.state.title
|
||||
self.state.clip_time = result.get("clip_time") or self.state.clip_time
|
||||
if result.get("swap_recommended"):
|
||||
self.state.swap_recommended = True
|
||||
warnings = result.get("warnings") or []
|
||||
if warnings:
|
||||
self.state.warnings.extend(warnings)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# relationships enrichment (Hydrus endpoint + alt metadata)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _enrich_relationships_from_api(self, file_hash: str) -> None:
|
||||
"""Fetch relationships for the given hash and enrich state's king/alts and alt metadata.
|
||||
|
||||
- Uses GET /manage_file_relationships/get_file_relationships?hash=...
|
||||
- If alts exist, batch-fetch their metadata via GET /get_files/file_metadata?hashes=[...]
|
||||
- Extracts title, duration, size, tags (cleaned: title: kept with namespace, others stripped)
|
||||
"""
|
||||
base_url = self.hydrus_settings.base_url or ""
|
||||
access_key = self.hydrus_settings.access_key or ""
|
||||
if not base_url:
|
||||
return
|
||||
try:
|
||||
client = HydrusClient(base_url, access_key, timeout=self.hydrus_settings.timeout)
|
||||
except Exception as exc: # pragma: no cover - construction should rarely fail
|
||||
self.state.warnings.append(f"Hydrus client init failed: {exc}")
|
||||
return
|
||||
try:
|
||||
rel_resp = client.get_file_relationships(file_hash)
|
||||
except HydrusRequestError as hre: # pragma: no cover - surfaced but non-fatal
|
||||
self.state.warnings.append(f"relationships api: {hre}")
|
||||
return
|
||||
except Exception as exc: # pragma: no cover
|
||||
self.state.warnings.append(f"relationships api: {exc}")
|
||||
return
|
||||
|
||||
rel_map = rel_resp.get("file_relationships") or {}
|
||||
rel_obj = None
|
||||
if isinstance(rel_map, dict):
|
||||
rel_obj = rel_map.get(file_hash) or next((v for v in rel_map.values() if isinstance(v, dict)), None)
|
||||
if isinstance(rel_obj, dict):
|
||||
# Preserve the full relationships object
|
||||
self.state.relationships = rel_obj
|
||||
# Update king and alts from canonical fields
|
||||
king = rel_obj.get("king")
|
||||
alts = rel_obj.get("3") or []
|
||||
if isinstance(king, str) and _looks_like_hash(king):
|
||||
self.state.king = king.lower()
|
||||
if isinstance(alts, list):
|
||||
self.state.alts = [h.lower() for h in alts if isinstance(h, str) and _looks_like_hash(h)]
|
||||
|
||||
# Fetch alt metadata if we have alts
|
||||
if not self.state.alts:
|
||||
return
|
||||
try:
|
||||
meta_resp = client.fetch_file_metadata(
|
||||
hashes=self.state.alts,
|
||||
include_service_keys_to_tags=True,
|
||||
include_duration=True,
|
||||
include_size=True,
|
||||
include_file_url=False,
|
||||
include_mime=False,
|
||||
)
|
||||
except HydrusRequestError as hre: # pragma: no cover
|
||||
self.state.warnings.append(f"metadata api: {hre}")
|
||||
return
|
||||
except Exception as exc: # pragma: no cover
|
||||
self.state.warnings.append(f"metadata api: {exc}")
|
||||
return
|
||||
|
||||
if not isinstance(meta_resp, dict):
|
||||
return
|
||||
entries = meta_resp.get("metadata") or []
|
||||
if not isinstance(entries, list):
|
||||
return
|
||||
|
||||
def _extract_tags(meta: Dict[str, Any]) -> list[str]:
|
||||
tags: list[str] = []
|
||||
tag_root = meta.get("tags") or meta.get("service_keys_to_statuses_to_tags") or {}
|
||||
if isinstance(tag_root, dict):
|
||||
for service_dict in tag_root.values():
|
||||
if not isinstance(service_dict, dict):
|
||||
continue
|
||||
# Prefer storage_tags but fall back to any list values under known keys
|
||||
storage = service_dict.get("storage_tags")
|
||||
if isinstance(storage, dict):
|
||||
for vals in storage.values():
|
||||
if isinstance(vals, list):
|
||||
tags.extend([str(t) for t in vals if isinstance(t, str)])
|
||||
else:
|
||||
# fall back: inspect lists directly under service_dict
|
||||
for vals in service_dict.values():
|
||||
if isinstance(vals, list):
|
||||
tags.extend([str(t) for t in vals if isinstance(t, str)])
|
||||
return tags
|
||||
|
||||
def _clean_tags_and_title(all_tags: list[str]) -> tuple[Optional[str], list[str]]:
|
||||
title_val: Optional[str] = None
|
||||
cleaned: list[str] = []
|
||||
for tag in all_tags:
|
||||
if not isinstance(tag, str):
|
||||
continue
|
||||
if tag.startswith("title:"):
|
||||
if title_val is None:
|
||||
title_val = tag.split(":", 1)[1]
|
||||
cleaned.append(tag) # keep namespaced title
|
||||
else:
|
||||
if ":" in tag:
|
||||
cleaned.append(tag.split(":", 1)[1])
|
||||
else:
|
||||
cleaned.append(tag)
|
||||
return title_val, cleaned
|
||||
|
||||
for meta in entries:
|
||||
if not isinstance(meta, dict):
|
||||
continue
|
||||
h = meta.get("hash")
|
||||
if not (isinstance(h, str) and _looks_like_hash(h)):
|
||||
continue
|
||||
tags_all = _extract_tags(meta)
|
||||
title_val, tags_clean = _clean_tags_and_title(tags_all)
|
||||
alt_info = {
|
||||
"title": title_val,
|
||||
"duration": meta.get("duration"),
|
||||
"size": meta.get("size"),
|
||||
"tags": tags_clean,
|
||||
}
|
||||
self.state.relationship_metadata[h.lower()] = alt_info
|
||||
|
||||
def _merge_tags(self, tags: Sequence[Any]) -> None:
|
||||
incoming = _normalise_string_list(tags)
|
||||
if not incoming:
|
||||
return
|
||||
combined = list(self.state.tags or []) + incoming
|
||||
self.state.tags = unique_preserve_order(combined)
|
||||
for tag in incoming:
|
||||
if tag not in self.state.original_tags:
|
||||
self.state.original_tags[tag] = tag
|
||||
|
||||
def _merge_url(self, url: Optional[Iterable[Any]]) -> None:
|
||||
if not url:
|
||||
return
|
||||
combined = list(self.state.url or []) + _normalise_string_list(url)
|
||||
self.state.url = unique_preserve_order(combined)
|
||||
|
||||
def _load_sidecar_tags(self, local_path: str) -> None:
|
||||
try:
|
||||
media_path = Path(local_path)
|
||||
except Exception:
|
||||
return
|
||||
if not media_path.exists():
|
||||
return
|
||||
candidates = [media_path.with_suffix(".tags"), media_path.with_suffix(".tags.txt")]
|
||||
for candidate in candidates:
|
||||
if candidate.exists():
|
||||
hash_value, tags, known = self._read_sidecar(candidate)
|
||||
if hash_value and not self.state.hash and _looks_like_hash(hash_value):
|
||||
self.state.hash = hash_value.lower()
|
||||
self._merge_tags(tags)
|
||||
self._merge_url(known)
|
||||
break
|
||||
|
||||
def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
|
||||
try:
|
||||
raw = sidecar_path.read_text(encoding="utf-8", errors="ignore")
|
||||
except OSError:
|
||||
return None, [], []
|
||||
hash_value: Optional[str] = None
|
||||
tags: List[str] = []
|
||||
url: List[str] = []
|
||||
for line in raw.splitlines():
|
||||
trimmed = line.strip()
|
||||
if not trimmed:
|
||||
continue
|
||||
lowered = trimmed.lower()
|
||||
if lowered.startswith("hash:"):
|
||||
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||
if candidate:
|
||||
hash_value = candidate
|
||||
elif lowered.startswith("url:") or lowered.startswith("url:"):
|
||||
candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else ""
|
||||
if candidate:
|
||||
url.append(candidate)
|
||||
else:
|
||||
tags.append(trimmed)
|
||||
return hash_value, tags, url
|
||||
|
||||
def _compute_local_hash(self, local_path: str) -> None:
|
||||
try:
|
||||
digest = sha256_file(Path(local_path))
|
||||
except OSError as exc:
|
||||
self.state.warnings.append(f"sha256 failed: {exc}")
|
||||
return
|
||||
self.state.hash = digest.lower()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# finalisation helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _finalise(self) -> None:
|
||||
if self.state.tags:
|
||||
self.state.tags = unique_preserve_order(self.state.tags)
|
||||
if self.state.url:
|
||||
self.state.url = unique_preserve_order(self.state.url)
|
||||
# Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type
|
||||
if not self.state.title:
|
||||
if self.state.metadata.get("title"):
|
||||
self.state.title = str(self.state.metadata["title"]).strip()
|
||||
elif self.state.filename:
|
||||
self.state.title = self.state.filename
|
||||
if self.state.hash and not _looks_like_hash(self.state.hash):
|
||||
self.state.hash = None
|
||||
if self.state.relationship_metadata is None:
|
||||
self.state.relationship_metadata = {}
|
||||
if self.state.relationships is not None and not isinstance(self.state.relationships, dict):
|
||||
self.state.relationships = None
|
||||
if self.state.original_tags is None:
|
||||
self.state.original_tags = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# util helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _normalise_url(value: Any) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
return None
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _resolve_local_path(path: Optional[str]) -> Optional[str]:
|
||||
if not path:
|
||||
return None
|
||||
parsed = urlparse(path)
|
||||
if parsed.scheme == "file":
|
||||
decoded = MPVFileBuilder._decode_file_url(path)
|
||||
return decoded
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def _decode_file_url(value: str) -> Optional[str]:
|
||||
parsed = urlparse(value)
|
||||
if parsed.scheme != "file":
|
||||
return None
|
||||
netloc = parsed.netloc or ""
|
||||
path = unquote(parsed.path or "")
|
||||
if netloc:
|
||||
path = f"//{netloc}{path}"
|
||||
if os.name == "nt" and path.startswith("/") and re.match(r"/[A-Za-z]:", path):
|
||||
path = path[1:]
|
||||
path = path.replace("/", os.sep)
|
||||
return path
|
||||
|
||||
def _looks_like_hydrus_url(self, url: str) -> bool:
|
||||
if not url:
|
||||
return False
|
||||
if url.startswith("hydrus://"):
|
||||
return True
|
||||
if "Hydrus-Client-API-Access-Key=" in url:
|
||||
return True
|
||||
base = self.hydrus_settings.base_url
|
||||
if base and url.startswith(base) and "/get_files/" in url:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _extract_hash_from_path(path: Optional[str]) -> Optional[str]:
|
||||
if not path:
|
||||
return None
|
||||
parsed = urlparse(path)
|
||||
query = parse_qs(parsed.query)
|
||||
if "hash" in query and query["hash"]:
|
||||
candidate = query["hash"][0].strip()
|
||||
if candidate:
|
||||
return candidate.lower()
|
||||
match = re.search(r"hash=([0-9a-fA-F]{64})", path)
|
||||
if match:
|
||||
return match.group(1).lower()
|
||||
return None
|
||||
|
||||
|
||||
def build_mpv_file_state(payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
builder = MPVFileBuilder(payload or {}, config or {})
|
||||
return builder.build()
|
||||
@@ -1,404 +0,0 @@
|
||||
"""MPV IPC client for cross-platform communication.
|
||||
|
||||
This module provides a cross-platform interface to communicate with mpv
|
||||
using either named pipes (Windows) or Unix domain sockets (Linux/macOS).
|
||||
|
||||
This is the central hub for all Python-mpv IPC communication. The Lua script
|
||||
should use the Python CLI, which uses this module to manage mpv connections.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import socket
|
||||
import time as _time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, List
|
||||
|
||||
from helper.logger import debug
|
||||
|
||||
|
||||
# Fixed pipe name for persistent MPV connection across all Python sessions
|
||||
FIXED_IPC_PIPE_NAME = "mpv-medeia-macina"
|
||||
MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent.parent / "LUA" / "main.lua")
|
||||
|
||||
|
||||
class MPVIPCError(Exception):
|
||||
"""Raised when MPV IPC communication fails."""
|
||||
pass
|
||||
|
||||
|
||||
def get_ipc_pipe_path() -> str:
|
||||
"""Get the fixed IPC pipe/socket path for persistent MPV connection.
|
||||
|
||||
Uses a fixed name so all playback sessions connect to the same MPV
|
||||
window/process instead of creating new instances.
|
||||
|
||||
Returns:
|
||||
Path to IPC pipe (Windows) or socket (Linux/macOS)
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Windows":
|
||||
return f"\\\\.\\pipe\\{FIXED_IPC_PIPE_NAME}"
|
||||
elif system == "Darwin": # macOS
|
||||
return f"/tmp/{FIXED_IPC_PIPE_NAME}.sock"
|
||||
else: # Linux and others
|
||||
return f"/tmp/{FIXED_IPC_PIPE_NAME}.sock"
|
||||
|
||||
|
||||
def _unwrap_memory_target(text: Optional[str]) -> Optional[str]:
|
||||
"""Return the real target from a memory:// M3U payload if present."""
|
||||
if not isinstance(text, str) or not text.startswith("memory://"):
|
||||
return text
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#') or line.startswith('memory://'):
|
||||
continue
|
||||
return line
|
||||
return text
|
||||
|
||||
|
||||
def _normalize_target(text: Optional[str]) -> Optional[str]:
|
||||
"""Normalize playlist targets for deduping across raw/memory:// wrappers."""
|
||||
if not text:
|
||||
return None
|
||||
real = _unwrap_memory_target(text)
|
||||
if not real:
|
||||
return None
|
||||
real = real.strip()
|
||||
if not real:
|
||||
return None
|
||||
|
||||
lower = real.lower()
|
||||
# Hydrus bare hash
|
||||
if len(lower) == 64 and all(ch in "0123456789abcdef" for ch in lower):
|
||||
return lower
|
||||
|
||||
# Hydrus file URL with hash query
|
||||
try:
|
||||
parsed = __import__("urllib.parse").parse.urlparse(real)
|
||||
qs = __import__("urllib.parse").parse.parse_qs(parsed.query)
|
||||
hash_qs = qs.get("hash", [None])[0]
|
||||
if hash_qs and len(hash_qs) == 64 and all(ch in "0123456789abcdef" for ch in hash_qs.lower()):
|
||||
return hash_qs.lower()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Normalize paths/url for comparison
|
||||
return lower.replace('\\', '\\')
|
||||
|
||||
|
||||
class MPVIPCClient:
|
||||
"""Client for communicating with mpv via IPC socket/pipe.
|
||||
|
||||
This is the unified interface for all Python code to communicate with mpv.
|
||||
It handles platform-specific differences (Windows named pipes vs Unix sockets).
|
||||
"""
|
||||
|
||||
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0):
|
||||
"""Initialize MPV IPC client.
|
||||
|
||||
Args:
|
||||
socket_path: Path to IPC socket/pipe. If None, uses the fixed persistent path.
|
||||
timeout: Socket timeout in seconds.
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.socket_path = socket_path or get_ipc_pipe_path()
|
||||
self.sock = None
|
||||
self.is_windows = platform.system() == "Windows"
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""Connect to mpv IPC socket.
|
||||
|
||||
Returns:
|
||||
True if connection successful, False otherwise.
|
||||
"""
|
||||
try:
|
||||
if self.is_windows:
|
||||
# Windows named pipes
|
||||
try:
|
||||
# Try to open the named pipe
|
||||
self.sock = open(self.socket_path, 'r+b', buffering=0)
|
||||
return True
|
||||
except (OSError, IOError) as exc:
|
||||
debug(f"Failed to connect to MPV named pipe: {exc}")
|
||||
return False
|
||||
else:
|
||||
# Unix domain socket (Linux, macOS)
|
||||
if not os.path.exists(self.socket_path):
|
||||
debug(f"IPC socket not found: {self.socket_path}")
|
||||
return False
|
||||
|
||||
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
self.sock.settimeout(self.timeout)
|
||||
self.sock.connect(self.socket_path)
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Failed to connect to MPV IPC: {exc}")
|
||||
self.sock = None
|
||||
return False
|
||||
|
||||
def send_command(self, command_data: Dict[str, Any] | List[Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Send a command to mpv and get response.
|
||||
|
||||
Args:
|
||||
command_data: Command dict (e.g. {"command": [...]}) or list (e.g. ["loadfile", ...])
|
||||
|
||||
Returns:
|
||||
Response dict with 'error' key (value 'success' on success), or None on error.
|
||||
"""
|
||||
if not self.sock:
|
||||
if not self.connect():
|
||||
return None
|
||||
|
||||
try:
|
||||
# Format command as JSON (mpv IPC protocol)
|
||||
if isinstance(command_data, list):
|
||||
request = {"command": command_data}
|
||||
else:
|
||||
request = command_data
|
||||
|
||||
# Add request_id if not present to match response
|
||||
if "request_id" not in request:
|
||||
request["request_id"] = int(_time.time() * 1000) % 100000
|
||||
|
||||
payload = json.dumps(request) + "\n"
|
||||
|
||||
# Debug: log the command being sent
|
||||
from helper.logger import debug as _debug
|
||||
_debug(f"[IPC] Sending: {payload.strip()}")
|
||||
|
||||
# Send command
|
||||
if self.is_windows:
|
||||
self.sock.write(payload.encode('utf-8'))
|
||||
self.sock.flush()
|
||||
else:
|
||||
self.sock.sendall(payload.encode('utf-8'))
|
||||
|
||||
# Receive response
|
||||
# We need to read lines until we find the one with matching request_id
|
||||
# or until timeout/error. MPV might send events in between.
|
||||
start_time = _time.time()
|
||||
while _time.time() - start_time < self.timeout:
|
||||
response_data = b""
|
||||
if self.is_windows:
|
||||
try:
|
||||
response_data = self.sock.readline()
|
||||
except (OSError, IOError):
|
||||
return None
|
||||
else:
|
||||
try:
|
||||
# This is simplistic for Unix socket (might not get full line)
|
||||
# But for now assuming MPV sends line-buffered JSON
|
||||
chunk = self.sock.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
response_data = chunk
|
||||
# TODO: Handle partial lines if needed
|
||||
except socket.timeout:
|
||||
return None
|
||||
|
||||
if not response_data:
|
||||
break
|
||||
|
||||
try:
|
||||
lines = response_data.decode('utf-8').strip().split('\n')
|
||||
for line in lines:
|
||||
if not line: continue
|
||||
resp = json.loads(line)
|
||||
|
||||
# Debug: log responses
|
||||
from helper.logger import debug as _debug
|
||||
_debug(f"[IPC] Received: {line}")
|
||||
|
||||
# Check if this is the response to our request
|
||||
if resp.get("request_id") == request.get("request_id"):
|
||||
return resp
|
||||
|
||||
# Handle async log messages/events for visibility
|
||||
event_type = resp.get("event")
|
||||
if event_type == "log-message":
|
||||
level = resp.get("level", "info")
|
||||
prefix = resp.get("prefix", "")
|
||||
text = resp.get("text", "").strip()
|
||||
debug(f"[MPV {level}] {prefix} {text}".strip())
|
||||
elif event_type:
|
||||
debug(f"[MPV event] {event_type}: {resp}")
|
||||
elif "error" in resp and "request_id" not in resp:
|
||||
debug(f"[MPV error] {resp}")
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return None
|
||||
except Exception as exc:
|
||||
debug(f"Error sending command to MPV: {exc}")
|
||||
self.disconnect()
|
||||
return None
|
||||
|
||||
def disconnect(self) -> None:
|
||||
"""Disconnect from mpv IPC socket."""
|
||||
if self.sock:
|
||||
try:
|
||||
self.sock.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.sock = None
|
||||
|
||||
def __del__(self) -> None:
|
||||
"""Cleanup on object destruction."""
|
||||
self.disconnect()
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
self.connect()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.disconnect()
|
||||
|
||||
|
||||
def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = None,
|
||||
append: bool = True) -> bool:
|
||||
"""Send a file to be played in the existing MPV instance via IPC.
|
||||
|
||||
This attempts to send to an existing MPV instance. If it fails, the calling
|
||||
code should start a new MPV instance with the IPC pipe.
|
||||
|
||||
Args:
|
||||
file_url: URL or path to file to play
|
||||
title: Display title for the file
|
||||
headers: Optional HTTP headers (dict)
|
||||
append: If True, append to playlist; if False, replace
|
||||
|
||||
Returns:
|
||||
True if successfully sent to existing MPV, False if pipe unavailable.
|
||||
"""
|
||||
# Try to connect using the robust client
|
||||
client = get_mpv_client()
|
||||
if not client:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Command 0: Subscribe to log messages so MPV console errors surface in REPL
|
||||
_subscribe_log_messages(client)
|
||||
|
||||
# Command 1: Ensure our Lua helper is loaded for in-window controls
|
||||
_ensure_lua_script_loaded(client)
|
||||
|
||||
# Command 2: Set headers if provided
|
||||
if headers:
|
||||
header_str = ",".join([f"{k}: {v}" for k, v in headers.items()])
|
||||
cmd_headers = {
|
||||
"command": ["set_property", "http-header-fields", header_str],
|
||||
"request_id": 0
|
||||
}
|
||||
client.send_command(cmd_headers)
|
||||
|
||||
# Deduplicate: if target already exists in playlist, just play it
|
||||
normalized_new = _normalize_target(file_url)
|
||||
existing_index = None
|
||||
existing_title = None
|
||||
if normalized_new:
|
||||
playlist_resp = client.send_command({"command": ["get_property", "playlist"], "request_id": 98})
|
||||
if playlist_resp and playlist_resp.get("error") == "success":
|
||||
for idx, item in enumerate(playlist_resp.get("data", []) or []):
|
||||
for key in ("playlist-path", "filename"):
|
||||
norm_existing = _normalize_target(item.get(key)) if isinstance(item, dict) else None
|
||||
if norm_existing and norm_existing == normalized_new:
|
||||
existing_index = idx
|
||||
existing_title = item.get("title") if isinstance(item, dict) else None
|
||||
break
|
||||
if existing_index is not None:
|
||||
break
|
||||
|
||||
if existing_index is not None and append:
|
||||
play_cmd = {"command": ["playlist-play-index", existing_index], "request_id": 99}
|
||||
play_resp = client.send_command(play_cmd)
|
||||
if play_resp and play_resp.get("error") == "success":
|
||||
client.send_command({"command": ["set_property", "pause", False], "request_id": 100})
|
||||
safe_title = (title or existing_title or "").replace("\n", " ").replace("\r", " ").strip()
|
||||
if safe_title:
|
||||
client.send_command({"command": ["set_property", "force-media-title", safe_title], "request_id": 101})
|
||||
debug(f"Already in playlist, playing existing entry: {safe_title or file_url}")
|
||||
return True
|
||||
|
||||
# Command 2: Load file and inject title via memory:// wrapper so playlist shows friendly names immediately
|
||||
target = file_url
|
||||
load_mode = "append-play" if append else "replace"
|
||||
safe_title = (title or "").replace("\n", " ").replace("\r", " ").strip()
|
||||
target_to_send = target
|
||||
if safe_title and not str(target).startswith("memory://"):
|
||||
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{target}"
|
||||
target_to_send = f"memory://{m3u_content}"
|
||||
|
||||
cmd_load = {
|
||||
"command": ["loadfile", target_to_send, load_mode],
|
||||
"request_id": 1
|
||||
}
|
||||
|
||||
resp = client.send_command(cmd_load)
|
||||
if not resp or resp.get('error') != 'success':
|
||||
debug(f"MPV loadfile failed: {resp}")
|
||||
return False
|
||||
|
||||
# Command 3: Set title (metadata for display) - still useful for window title
|
||||
if safe_title:
|
||||
cmd_title = {
|
||||
"command": ["set_property", "force-media-title", safe_title],
|
||||
"request_id": 2
|
||||
}
|
||||
client.send_command(cmd_title)
|
||||
|
||||
debug(f"Sent to existing MPV: {safe_title or title}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
debug(f"Error in send_to_mpv: {e}")
|
||||
return False
|
||||
finally:
|
||||
client.disconnect()
|
||||
|
||||
|
||||
|
||||
def get_mpv_client(socket_path: Optional[str] = None) -> Optional[MPVIPCClient]:
|
||||
"""Get an MPV IPC client, attempting to connect.
|
||||
|
||||
Args:
|
||||
socket_path: Custom socket path (uses default if None)
|
||||
|
||||
Returns:
|
||||
Connected MPVIPCClient or None if connection fails.
|
||||
"""
|
||||
client = MPVIPCClient(socket_path=socket_path)
|
||||
if client.connect():
|
||||
return client
|
||||
return None
|
||||
|
||||
|
||||
def _subscribe_log_messages(client: MPVIPCClient) -> None:
|
||||
"""Ask MPV to emit log messages over IPC so we can surface console errors."""
|
||||
try:
|
||||
client.send_command({"command": ["request_log_messages", "warn"], "request_id": 11})
|
||||
except Exception as exc:
|
||||
debug(f"Failed to subscribe to MPV logs: {exc}")
|
||||
|
||||
|
||||
def _ensure_lua_script_loaded(client: MPVIPCClient) -> None:
|
||||
"""Load the bundled MPV Lua script to enable in-window controls.
|
||||
|
||||
Safe to call repeatedly; mpv will simply reload the script if already present.
|
||||
"""
|
||||
try:
|
||||
script_path = MPV_LUA_SCRIPT_PATH
|
||||
if not script_path or not os.path.exists(script_path):
|
||||
return
|
||||
resp = client.send_command({"command": ["load-script", script_path], "request_id": 12})
|
||||
if resp and resp.get("error") == "success":
|
||||
debug(f"Loaded MPV Lua script: {script_path}")
|
||||
else:
|
||||
debug(f"MPV Lua load response: {resp}")
|
||||
except Exception as exc:
|
||||
debug(f"Failed to load MPV Lua script: {exc}")
|
||||
|
||||
@@ -1,143 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Text-based progress bar utilities for consistent display across all downloads."""
|
||||
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
|
||||
def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
|
||||
"""Create a text-based progress bar.
|
||||
|
||||
Args:
|
||||
current: Current progress (bytes/items)
|
||||
total: Total to complete (bytes/items)
|
||||
width: Width of the bar in characters (default 40)
|
||||
label: Optional label prefix
|
||||
|
||||
Returns:
|
||||
Formatted progress bar string
|
||||
|
||||
Examples:
|
||||
format_progress_bar(50, 100)
|
||||
# Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
|
||||
|
||||
format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
|
||||
# Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
|
||||
"""
|
||||
if total <= 0:
|
||||
percentage = 0
|
||||
filled = 0
|
||||
else:
|
||||
percentage = (current / total) * 100
|
||||
filled = int((current / total) * width)
|
||||
|
||||
# Create bar: filled blocks + empty blocks
|
||||
bar = "█" * filled + "░" * (width - filled)
|
||||
|
||||
# Format percentage
|
||||
pct_str = f"{percentage:.1f}%"
|
||||
|
||||
# Build result
|
||||
if label:
|
||||
result = f"{label}: [{bar}] {pct_str}"
|
||||
else:
|
||||
result = f"[{bar}] {pct_str}"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def format_size(bytes_val: float) -> str:
|
||||
"""Format bytes to human-readable size.
|
||||
|
||||
Examples:
|
||||
format_size(1024) -> "1.00 KB"
|
||||
format_size(1024*1024) -> "1.00 MB"
|
||||
format_size(1024*1024*1024) -> "1.00 GB"
|
||||
"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||
if bytes_val < 1024:
|
||||
return f"{bytes_val:.2f} {unit}"
|
||||
bytes_val /= 1024
|
||||
return f"{bytes_val:.2f} PB"
|
||||
|
||||
|
||||
def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
|
||||
"""Format download status with progress bar and details.
|
||||
|
||||
Args:
|
||||
filename: Name of file being downloaded
|
||||
current: Current bytes downloaded
|
||||
total: Total file size
|
||||
speed: Download speed in bytes/sec
|
||||
|
||||
Returns:
|
||||
Formatted status line
|
||||
|
||||
Examples:
|
||||
format_download_status("movie.mkv", 512*1024*1024, 2*1024*1024*1024, 10*1024*1024)
|
||||
# Returns: "movie.mkv: [████████████░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0% (512.00 MB / 2.00 GB @ 10.00 MB/s)"
|
||||
"""
|
||||
bar = format_progress_bar(current, total, width=30)
|
||||
size_current = format_size(current)
|
||||
size_total = format_size(total)
|
||||
|
||||
if speed > 0:
|
||||
speed_str = f" @ {format_size(speed)}/s"
|
||||
else:
|
||||
speed_str = ""
|
||||
|
||||
return f"{bar} ({size_current} / {size_total}{speed_str})"
|
||||
|
||||
|
||||
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
|
||||
"""Print download progress to stderr (doesn't interfere with piped output).
|
||||
|
||||
Args:
|
||||
filename: File being downloaded
|
||||
current: Current bytes
|
||||
total: Total bytes
|
||||
speed: Speed in bytes/sec
|
||||
end: Line ending (default "\r" for overwriting, use "\n" for final)
|
||||
"""
|
||||
status = format_download_status(filename, current, total, speed)
|
||||
debug(status, end=end, flush=True)
|
||||
|
||||
|
||||
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
|
||||
"""Print final progress line (100%) with time elapsed.
|
||||
|
||||
Args:
|
||||
filename: File that was downloaded
|
||||
total: Total size
|
||||
elapsed: Time elapsed in seconds
|
||||
"""
|
||||
bar = format_progress_bar(total, total, width=30)
|
||||
size_str = format_size(total)
|
||||
|
||||
# Format elapsed time
|
||||
if elapsed < 60:
|
||||
time_str = f"{elapsed:.1f}s"
|
||||
elif elapsed < 3600:
|
||||
minutes = elapsed / 60
|
||||
time_str = f"{minutes:.1f}m"
|
||||
else:
|
||||
hours = elapsed / 3600
|
||||
time_str = f"{hours:.2f}h"
|
||||
|
||||
debug(f"{bar} ({size_str}) - {time_str}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Demo
|
||||
import time
|
||||
|
||||
log("Progress Bar Demo:", file=sys.stderr)
|
||||
|
||||
# Demo 1: Simple progress
|
||||
for i in range(101):
|
||||
print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
|
||||
time.sleep(0.02)
|
||||
|
||||
print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
|
||||
log()
|
||||
@@ -1,818 +0,0 @@
|
||||
"""Provider interfaces for search and file upload functionality.
|
||||
|
||||
This module defines two distinct provider types:
|
||||
1. SearchProvider: For searching content (books, music, videos, games)
|
||||
2. FileProvider: For uploading files to hosting services
|
||||
|
||||
No legacy code or backwards compatibility - clean, single source of truth.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import asyncio
|
||||
import subprocess
|
||||
import shutil
|
||||
import mimetypes
|
||||
import traceback
|
||||
import requests
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
PLAYWRIGHT_AVAILABLE = True
|
||||
except ImportError:
|
||||
PLAYWRIGHT_AVAILABLE = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SEARCH PROVIDERS
|
||||
# ============================================================================
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""Unified search result format across all search providers."""
|
||||
|
||||
origin: str # Provider name: "libgen", "soulseek", "debrid", "bandcamp", etc.
|
||||
title: str # Display title/filename
|
||||
path: str # Download target (URL, path, magnet, identifier)
|
||||
|
||||
detail: str = "" # Additional description
|
||||
annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"]
|
||||
media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet"
|
||||
size_bytes: Optional[int] = None
|
||||
tags: set[str] = field(default_factory=set) # Searchable tags
|
||||
columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns
|
||||
full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for pipeline processing."""
|
||||
return {
|
||||
"origin": self.origin,
|
||||
"title": self.title,
|
||||
"path": self.path,
|
||||
"detail": self.detail,
|
||||
"annotations": self.annotations,
|
||||
"media_kind": self.media_kind,
|
||||
"size_bytes": self.size_bytes,
|
||||
"tags": list(self.tags),
|
||||
"columns": list(self.columns),
|
||||
"full_metadata": self.full_metadata,
|
||||
}
|
||||
|
||||
|
||||
class SearchProvider(ABC):
|
||||
"""Base class for search providers."""
|
||||
|
||||
def __init__(self, config: Dict[str, Any] = None):
|
||||
self.config = config or {}
|
||||
self.name = self.__class__.__name__.lower()
|
||||
|
||||
@abstractmethod
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
"""Search for items matching the query.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
limit: Maximum results to return
|
||||
filters: Optional filtering criteria
|
||||
**kwargs: Provider-specific arguments
|
||||
|
||||
Returns:
|
||||
List of SearchResult objects
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Check if provider is available and properly configured."""
|
||||
return True
|
||||
|
||||
|
||||
class Libgen(SearchProvider):
|
||||
"""Search provider for Library Genesis books."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||
from helper.query_parser import parse_query, get_field, get_free_text
|
||||
|
||||
parsed = parse_query(query)
|
||||
isbn = get_field(parsed, 'isbn')
|
||||
author = get_field(parsed, 'author')
|
||||
title = get_field(parsed, 'title')
|
||||
free_text = get_free_text(parsed)
|
||||
|
||||
search_query = isbn or title or author or free_text or query
|
||||
|
||||
downloader = UnifiedBookDownloader(config=self.config)
|
||||
books = downloader.search_libgen(search_query, limit=limit)
|
||||
|
||||
results = []
|
||||
for idx, book in enumerate(books, 1):
|
||||
title = book.get("title", "Unknown")
|
||||
author = book.get("author", "Unknown")
|
||||
year = book.get("year", "Unknown")
|
||||
pages = book.get("pages") or book.get("pages_str") or ""
|
||||
extension = book.get("extension", "") or book.get("ext", "")
|
||||
filesize = book.get("filesize_str", "Unknown")
|
||||
isbn = book.get("isbn", "")
|
||||
mirror_url = book.get("mirror_url", "")
|
||||
|
||||
columns = [
|
||||
("Title", title),
|
||||
("Author", author),
|
||||
("Pages", str(pages)),
|
||||
("Ext", str(extension)),
|
||||
]
|
||||
|
||||
detail = f"By: {author}"
|
||||
if year and year != "Unknown":
|
||||
detail += f" ({year})"
|
||||
|
||||
annotations = [f"{filesize}"]
|
||||
if isbn:
|
||||
annotations.append(f"ISBN: {isbn}")
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="libgen",
|
||||
title=title,
|
||||
path=mirror_url or f"libgen:{book.get('id', '')}",
|
||||
detail=detail,
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
"number": idx,
|
||||
"author": author,
|
||||
"year": year,
|
||||
"isbn": isbn,
|
||||
"filesize": filesize,
|
||||
"pages": pages,
|
||||
"extension": extension,
|
||||
"book_id": book.get("book_id", ""),
|
||||
"md5": book.get("md5", ""),
|
||||
},
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[libgen] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class Soulseek(SearchProvider):
|
||||
"""Search provider for Soulseek P2P network."""
|
||||
|
||||
MUSIC_EXTENSIONS = {
|
||||
'.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus',
|
||||
'.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf',
|
||||
'.dff', '.wv', '.tta', '.tak', '.ac3', '.dts'
|
||||
}
|
||||
|
||||
USERNAME = "asjhkjljhkjfdsd334"
|
||||
PASSWORD = "khhhg"
|
||||
DOWNLOAD_DIR = "./downloads"
|
||||
MAX_WAIT_TRANSFER = 1200
|
||||
|
||||
async def perform_search(
|
||||
self,
|
||||
query: str,
|
||||
timeout: float = 9.0,
|
||||
limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform async Soulseek search."""
|
||||
import os
|
||||
from aioslsk.client import SoulSeekClient
|
||||
from aioslsk.settings import Settings, CredentialsSettings
|
||||
|
||||
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
|
||||
|
||||
settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD))
|
||||
client = SoulSeekClient(settings)
|
||||
|
||||
try:
|
||||
await client.start()
|
||||
await client.login()
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
search_request = await client.searches.search(query)
|
||||
await self._collect_results(client, search_request, timeout=timeout)
|
||||
return self._flatten_results(search_request)[:limit]
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return []
|
||||
finally:
|
||||
try:
|
||||
await client.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _flatten_results(self, search_request) -> List[dict]:
|
||||
flat = []
|
||||
for result in search_request.results:
|
||||
username = getattr(result, "username", "?")
|
||||
|
||||
for file_data in getattr(result, "shared_items", []):
|
||||
flat.append({
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
})
|
||||
|
||||
for file_data in getattr(result, "locked_results", []):
|
||||
flat.append({
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
})
|
||||
|
||||
return flat
|
||||
|
||||
async def _collect_results(self, client, search_request, timeout: float = 75.0) -> None:
|
||||
end = time.time() + timeout
|
||||
last_count = 0
|
||||
while time.time() < end:
|
||||
current_count = len(search_request.results)
|
||||
if current_count > last_count:
|
||||
debug(f"[soulseek] Got {current_count} result(s)...")
|
||||
last_count = current_count
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit))
|
||||
|
||||
if not flat_results:
|
||||
return []
|
||||
|
||||
# Filter to music files only
|
||||
music_results = []
|
||||
for item in flat_results:
|
||||
filename = item['filename']
|
||||
ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||
if ext in self.MUSIC_EXTENSIONS:
|
||||
music_results.append(item)
|
||||
|
||||
if not music_results:
|
||||
return []
|
||||
|
||||
# Extract metadata
|
||||
enriched_results = []
|
||||
for item in music_results:
|
||||
filename = item['filename']
|
||||
ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
|
||||
|
||||
# Get display filename
|
||||
display_name = filename.split('\\')[-1] if '\\' in filename else filename.split('/')[-1] if '/' in filename else filename
|
||||
|
||||
# Extract path hierarchy
|
||||
path_parts = filename.replace('\\', '/').split('/')
|
||||
artist = path_parts[-3] if len(path_parts) >= 3 else ''
|
||||
album = path_parts[-2] if len(path_parts) >= 3 else path_parts[-2] if len(path_parts) == 2 else ''
|
||||
|
||||
# Extract track number and title
|
||||
base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name
|
||||
track_num = ''
|
||||
title = base_name
|
||||
filename_artist = ''
|
||||
|
||||
match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name)
|
||||
if match:
|
||||
track_num = match.group(1)
|
||||
rest = match.group(2)
|
||||
if ' - ' in rest:
|
||||
filename_artist, title = rest.split(' - ', 1)
|
||||
else:
|
||||
title = rest
|
||||
|
||||
if filename_artist:
|
||||
artist = filename_artist
|
||||
|
||||
enriched_results.append({
|
||||
**item,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'title': title,
|
||||
'track_num': track_num,
|
||||
'ext': ext
|
||||
})
|
||||
|
||||
# Apply filters
|
||||
if filters:
|
||||
artist_filter = filters.get('artist', '').lower() if filters.get('artist') else ''
|
||||
album_filter = filters.get('album', '').lower() if filters.get('album') else ''
|
||||
track_filter = filters.get('track', '').lower() if filters.get('track') else ''
|
||||
|
||||
if artist_filter or album_filter or track_filter:
|
||||
filtered = []
|
||||
for item in enriched_results:
|
||||
if artist_filter and artist_filter not in item['artist'].lower():
|
||||
continue
|
||||
if album_filter and album_filter not in item['album'].lower():
|
||||
continue
|
||||
if track_filter and track_filter not in item['title'].lower():
|
||||
continue
|
||||
filtered.append(item)
|
||||
enriched_results = filtered
|
||||
|
||||
# Sort: .flac first, then by size
|
||||
enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size']))
|
||||
|
||||
# Convert to SearchResult
|
||||
results = []
|
||||
for idx, item in enumerate(enriched_results, 1):
|
||||
artist_display = item['artist'] if item['artist'] else "(no artist)"
|
||||
album_display = item['album'] if item['album'] else "(no album)"
|
||||
size_mb = int(item['size'] / 1024 / 1024)
|
||||
|
||||
columns = [
|
||||
("Track", item['track_num'] or "?"),
|
||||
("Title", item['title'][:40]),
|
||||
("Artist", artist_display[:32]),
|
||||
("Album", album_display[:32]),
|
||||
("Size", f"{size_mb} MB"),
|
||||
]
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="soulseek",
|
||||
title=item['title'],
|
||||
path=item['filename'],
|
||||
detail=f"{artist_display} - {album_display}",
|
||||
annotations=[f"{size_mb} MB", item['ext'].lstrip('.').upper()],
|
||||
media_kind="audio",
|
||||
size_bytes=item['size'],
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
"username": item['username'],
|
||||
"filename": item['filename'],
|
||||
"artist": item['artist'],
|
||||
"album": item['album'],
|
||||
"track_num": item['track_num'],
|
||||
"ext": item['ext'],
|
||||
},
|
||||
))
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[soulseek] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from aioslsk.client import SoulSeekClient
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class Bandcamp(SearchProvider):
|
||||
"""Search provider for Bandcamp."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
if not PLAYWRIGHT_AVAILABLE:
|
||||
log("[bandcamp] Playwright not available. Install with: pip install playwright", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results = []
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
# Parse query for artist: prefix
|
||||
if query.strip().lower().startswith("artist:"):
|
||||
artist_name = query[7:].strip().strip('"')
|
||||
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
|
||||
else:
|
||||
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
|
||||
|
||||
results = self._scrape_url(page, search_url, limit)
|
||||
|
||||
browser.close()
|
||||
except Exception as e:
|
||||
log(f"[bandcamp] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
return results
|
||||
|
||||
def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]:
|
||||
debug(f"[bandcamp] Scraping: {url}")
|
||||
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
results = []
|
||||
|
||||
# Check for search results
|
||||
search_results = page.query_selector_all(".searchresult")
|
||||
if search_results:
|
||||
for item in search_results[:limit]:
|
||||
try:
|
||||
heading = item.query_selector(".heading")
|
||||
if not heading:
|
||||
continue
|
||||
|
||||
link = heading.query_selector("a")
|
||||
if not link:
|
||||
continue
|
||||
|
||||
title = link.inner_text().strip()
|
||||
target_url = link.get_attribute("href")
|
||||
|
||||
subhead = item.query_selector(".subhead")
|
||||
artist = subhead.inner_text().strip() if subhead else "Unknown"
|
||||
|
||||
itemtype = item.query_selector(".itemtype")
|
||||
media_type = itemtype.inner_text().strip() if itemtype else "album"
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="bandcamp",
|
||||
title=title,
|
||||
path=target_url,
|
||||
detail=f"By: {artist}",
|
||||
annotations=[media_type],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Name", title),
|
||||
("Artist", artist),
|
||||
("Type", media_type),
|
||||
],
|
||||
full_metadata={
|
||||
"artist": artist,
|
||||
"type": media_type,
|
||||
},
|
||||
))
|
||||
except Exception as e:
|
||||
debug(f"[bandcamp] Error parsing result: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def validate(self) -> bool:
|
||||
return PLAYWRIGHT_AVAILABLE
|
||||
|
||||
|
||||
class YouTube(SearchProvider):
|
||||
"""Search provider for YouTube using yt-dlp."""
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
ytdlp_path = shutil.which("yt-dlp")
|
||||
if not ytdlp_path:
|
||||
log("[youtube] yt-dlp not found in PATH", file=sys.stderr)
|
||||
return []
|
||||
|
||||
search_query = f"ytsearch{limit}:{query}"
|
||||
|
||||
cmd = [
|
||||
ytdlp_path,
|
||||
"--dump-json",
|
||||
"--flat-playlist",
|
||||
"--no-warnings",
|
||||
search_query
|
||||
]
|
||||
|
||||
try:
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="replace"
|
||||
)
|
||||
|
||||
if process.returncode != 0:
|
||||
log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results = []
|
||||
for line in process.stdout.splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
video_data = json.loads(line)
|
||||
title = video_data.get("title", "Unknown")
|
||||
video_id = video_data.get("id", "")
|
||||
url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}"
|
||||
uploader = video_data.get("uploader", "Unknown")
|
||||
duration = video_data.get("duration", 0)
|
||||
view_count = video_data.get("view_count", 0)
|
||||
|
||||
duration_str = f"{int(duration//60)}:{int(duration%60):02d}" if duration else ""
|
||||
views_str = f"{view_count:,}" if view_count else ""
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="youtube",
|
||||
title=title,
|
||||
path=url,
|
||||
detail=f"By: {uploader}",
|
||||
annotations=[duration_str, f"{views_str} views"],
|
||||
media_kind="video",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Uploader", uploader),
|
||||
("Duration", duration_str),
|
||||
("Views", views_str),
|
||||
],
|
||||
full_metadata={
|
||||
"video_id": video_id,
|
||||
"uploader": uploader,
|
||||
"duration": duration,
|
||||
"view_count": view_count,
|
||||
},
|
||||
))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"[youtube] Error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
return shutil.which("yt-dlp") is not None
|
||||
|
||||
def pipe(self, path: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Return the playable URL for MPV (just the path for YouTube)."""
|
||||
return path
|
||||
|
||||
|
||||
# Search provider registry
|
||||
_SEARCH_PROVIDERS = {
|
||||
"libgen": Libgen,
|
||||
"soulseek": Soulseek,
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
}
|
||||
|
||||
|
||||
def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
|
||||
"""Get a search provider by name."""
|
||||
provider_class = _SEARCH_PROVIDERS.get(name.lower())
|
||||
|
||||
if provider_class is None:
|
||||
log(f"[provider] Unknown search provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
return provider
|
||||
except Exception as e:
|
||||
log(f"[provider] Error initializing '{name}': {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all search providers and their availability."""
|
||||
availability = {}
|
||||
for name, provider_class in _SEARCH_PROVIDERS.items():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FILE PROVIDERS
|
||||
# ============================================================================
|
||||
|
||||
class FileProvider(ABC):
|
||||
"""Base class for file upload providers."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
self.config = config or {}
|
||||
self.name = self.__class__.__name__.lower()
|
||||
|
||||
@abstractmethod
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
"""Upload a file and return the URL."""
|
||||
pass
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Check if provider is available/configured."""
|
||||
return True
|
||||
|
||||
|
||||
class ZeroXZero(FileProvider):
|
||||
"""File provider for 0x0.st."""
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from helper.http_client import HTTPClient
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
headers = {"User-Agent": "Medeia-Macina/1.0"}
|
||||
with HTTPClient(headers=headers) as client:
|
||||
with open(file_path, 'rb') as f:
|
||||
response = client.post(
|
||||
"https://0x0.st",
|
||||
files={"file": f}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.text.strip()
|
||||
else:
|
||||
raise Exception(f"Upload failed: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
log(f"[0x0] Upload error: {e}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class Matrix(FileProvider):
|
||||
"""File provider for Matrix (Element) chat rooms."""
|
||||
|
||||
def validate(self) -> bool:
|
||||
if not self.config:
|
||||
return False
|
||||
matrix_conf = self.config.get('storage', {}).get('matrix', {})
|
||||
return bool(
|
||||
matrix_conf.get('homeserver') and
|
||||
matrix_conf.get('room_id') and
|
||||
(matrix_conf.get('access_token') or matrix_conf.get('password'))
|
||||
)
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
matrix_conf = self.config.get('storage', {}).get('matrix', {})
|
||||
homeserver = matrix_conf.get('homeserver')
|
||||
access_token = matrix_conf.get('access_token')
|
||||
room_id = matrix_conf.get('room_id')
|
||||
|
||||
if not homeserver.startswith('http'):
|
||||
homeserver = f"https://{homeserver}"
|
||||
|
||||
# Upload media
|
||||
upload_url = f"{homeserver}/_matrix/media/v3/upload"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/octet-stream"
|
||||
}
|
||||
|
||||
mime_type, _ = mimetypes.guess_type(path)
|
||||
if mime_type:
|
||||
headers["Content-Type"] = mime_type
|
||||
|
||||
filename = path.name
|
||||
|
||||
with open(path, 'rb') as f:
|
||||
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix upload failed: {resp.text}")
|
||||
|
||||
content_uri = resp.json().get('content_uri')
|
||||
if not content_uri:
|
||||
raise Exception("No content_uri returned")
|
||||
|
||||
# Send message
|
||||
send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
|
||||
|
||||
# Determine message type
|
||||
msgtype = "m.file"
|
||||
ext = path.suffix.lower()
|
||||
|
||||
AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'}
|
||||
VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}
|
||||
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}
|
||||
|
||||
if ext in AUDIO_EXTS:
|
||||
msgtype = "m.audio"
|
||||
elif ext in VIDEO_EXTS:
|
||||
msgtype = "m.video"
|
||||
elif ext in IMAGE_EXTS:
|
||||
msgtype = "m.image"
|
||||
|
||||
info = {
|
||||
"mimetype": mime_type,
|
||||
"size": path.stat().st_size
|
||||
}
|
||||
|
||||
payload = {
|
||||
"msgtype": msgtype,
|
||||
"body": filename,
|
||||
"url": content_uri,
|
||||
"info": info
|
||||
}
|
||||
|
||||
resp = requests.post(send_url, headers=headers, json=payload)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix send message failed: {resp.text}")
|
||||
|
||||
event_id = resp.json().get('event_id')
|
||||
return f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
|
||||
|
||||
# File provider registry
|
||||
_FILE_PROVIDERS = {
|
||||
"0x0": ZeroXZero,
|
||||
"matrix": Matrix,
|
||||
}
|
||||
|
||||
|
||||
def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
|
||||
"""Get a file provider by name."""
|
||||
provider_class = _FILE_PROVIDERS.get(name.lower())
|
||||
|
||||
if provider_class is None:
|
||||
log(f"[provider] Unknown file provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] File provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
return provider
|
||||
except Exception as e:
|
||||
log(f"[provider] Error initializing file provider '{name}': {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all file providers and their availability."""
|
||||
availability = {}
|
||||
for name, provider_class in _FILE_PROVIDERS.items():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
"""Dynamic query parser for filtering and field extraction.
|
||||
|
||||
Supports query syntax like:
|
||||
- isbn:0557677203
|
||||
- author:"Albert Pike"
|
||||
- title:"Morals and Dogma"
|
||||
- year:2010
|
||||
- isbn:0557677203 author:"Albert Pike"
|
||||
- Mixed with free text: "Morals" isbn:0557677203
|
||||
|
||||
This allows flexible query strings that can be parsed by any search provider
|
||||
to extract specific fields for filtering and searching.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Tuple, Optional, Any
|
||||
import re
|
||||
|
||||
|
||||
def parse_query(query: str) -> Dict[str, Any]:
|
||||
"""Parse a query string into field:value pairs and free text.
|
||||
|
||||
Args:
|
||||
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- 'fields': Dict[field_name, field_value] for structured fields
|
||||
- 'text': str with remaining free text
|
||||
- 'raw': str original query
|
||||
"""
|
||||
result = {
|
||||
'fields': {},
|
||||
'text': '',
|
||||
'raw': query,
|
||||
}
|
||||
|
||||
if not query or not query.strip():
|
||||
return result
|
||||
|
||||
query = query.strip()
|
||||
remaining_parts = []
|
||||
|
||||
# Pattern to match: field:value or field:"quoted value"
|
||||
# Matches: word: followed by either quoted string or unquoted word
|
||||
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
||||
|
||||
pos = 0
|
||||
for match in re.finditer(pattern, query):
|
||||
# Add any text before this match
|
||||
if match.start() > pos:
|
||||
before_text = query[pos:match.start()].strip()
|
||||
if before_text:
|
||||
remaining_parts.append(before_text)
|
||||
|
||||
field_name = match.group(1).lower()
|
||||
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
||||
|
||||
result['fields'][field_name] = field_value
|
||||
pos = match.end()
|
||||
|
||||
# Add any remaining text after last match
|
||||
if pos < len(query):
|
||||
remaining_text = query[pos:].strip()
|
||||
if remaining_text:
|
||||
remaining_parts.append(remaining_text)
|
||||
|
||||
result['text'] = ' '.join(remaining_parts)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
"""Get a field value from parsed query, with optional default.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to look up (case-insensitive)
|
||||
default: Default value if field not found
|
||||
|
||||
Returns:
|
||||
Field value or default
|
||||
"""
|
||||
return parsed_query.get('fields', {}).get(field_name.lower(), default)
|
||||
|
||||
|
||||
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
|
||||
"""Check if a field exists in parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to check (case-insensitive)
|
||||
|
||||
Returns:
|
||||
True if field exists
|
||||
"""
|
||||
return field_name.lower() in parsed_query.get('fields', {})
|
||||
|
||||
|
||||
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
||||
"""Get the free text portion of a parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
|
||||
Returns:
|
||||
Free text or empty string
|
||||
"""
|
||||
return parsed_query.get('text', '')
|
||||
|
||||
|
||||
def build_query_for_provider(
|
||||
parsed_query: Dict[str, Any],
|
||||
provider: str,
|
||||
extraction_map: Optional[Dict[str, str]] = None
|
||||
) -> Tuple[str, Dict[str, str]]:
|
||||
"""Build a search query and filters dict for a specific provider.
|
||||
|
||||
Different providers have different search syntax. This function
|
||||
extracts the appropriate fields for each provider.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
|
||||
extraction_map: Optional mapping of field names to provider-specific names
|
||||
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
|
||||
|
||||
Returns:
|
||||
Tuple of (search_query: str, extracted_fields: Dict[field, value])
|
||||
"""
|
||||
extraction_map = extraction_map or {}
|
||||
extracted = {}
|
||||
free_text = get_free_text(parsed_query)
|
||||
|
||||
# Extract fields based on map
|
||||
for field_name, provider_key in extraction_map.items():
|
||||
if has_field(parsed_query, field_name):
|
||||
extracted[provider_key] = get_field(parsed_query, field_name)
|
||||
|
||||
# If provider-specific extraction needed, providers can implement it
|
||||
# For now, return the free text as query
|
||||
return free_text, extracted
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test cases
|
||||
test_queries = [
|
||||
'isbn:0557677203',
|
||||
'isbn:0557677203 author:"Albert Pike"',
|
||||
'Morals and Dogma isbn:0557677203',
|
||||
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
|
||||
'search term without fields',
|
||||
'author:"John Smith" title:"A Book"',
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
print(f"\nQuery: {query}")
|
||||
parsed = parse_query(query)
|
||||
print(f" Fields: {parsed['fields']}")
|
||||
print(f" Text: {parsed['text']}")
|
||||
@@ -1,523 +0,0 @@
|
||||
"""Remote Storage Server - REST API for file management on mobile devices.
|
||||
|
||||
This server runs on a mobile device (Android with Termux, iOS with iSH, etc.)
|
||||
and exposes the local library database as a REST API. Your PC connects to this
|
||||
server and uses it as a remote storage backend through the RemoteStorageBackend.
|
||||
|
||||
## INSTALLATION
|
||||
|
||||
### On Android (Termux):
|
||||
1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux
|
||||
2. In Termux:
|
||||
$ apt update && apt install python
|
||||
$ pip install flask flask-cors
|
||||
3. Copy this file to your device
|
||||
4. Run it (with optional API key):
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --port 5000
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey
|
||||
5. Server prints connection info automatically (IP, port, API key)
|
||||
|
||||
### On PC:
|
||||
1. Install requests: pip install requests
|
||||
2. Add to config.json:
|
||||
{
|
||||
"remote_storages": [
|
||||
{
|
||||
"name": "phone",
|
||||
"url": "http://192.168.1.100:5000",
|
||||
"api_key": "mysecretkey",
|
||||
"timeout": 30
|
||||
}
|
||||
]
|
||||
}
|
||||
Note: API key is optional. Works on WiFi or cellular data.
|
||||
|
||||
## USAGE
|
||||
|
||||
After setup, all cmdlets work with the phone:
|
||||
$ search-file zohar -store phone
|
||||
$ @1-3 | add-relationship -king @4 -store phone
|
||||
$ @1 | get-relationship -store phone
|
||||
|
||||
The server exposes REST endpoints that RemoteStorageBackend uses internally.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STORAGE_PATH: Optional[Path] = None
|
||||
API_KEY: Optional[str] = None # API key for authentication (None = no auth required)
|
||||
|
||||
# Try importing Flask - will be used in main() only
|
||||
try:
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
HAS_FLASK = True
|
||||
except ImportError:
|
||||
HAS_FLASK = False
|
||||
|
||||
# ============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
def get_local_ip() -> Optional[str]:
|
||||
"""Get the local IP address that would be used for external connections."""
|
||||
import socket
|
||||
try:
|
||||
# Create a socket to determine which interface would be used
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80)) # Google DNS
|
||||
ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return ip
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# ============================================================================
|
||||
# FLASK APP FACTORY
|
||||
# ============================================================================
|
||||
|
||||
def create_app():
|
||||
"""Create and configure Flask app with all routes."""
|
||||
if not HAS_FLASK:
|
||||
raise ImportError("Flask not installed. Install with: pip install flask flask-cors")
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# ========================================================================
|
||||
# HELPER DECORATORS
|
||||
# ========================================================================
|
||||
|
||||
def require_auth():
|
||||
"""Decorator to check API key authentication if configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if API_KEY:
|
||||
# Get API key from header or query parameter
|
||||
provided_key = request.headers.get('X-API-Key') or request.args.get('api_key')
|
||||
if not provided_key or provided_key != API_KEY:
|
||||
return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
def require_storage():
|
||||
"""Decorator to ensure storage path is configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if not STORAGE_PATH:
|
||||
return jsonify({"error": "Storage path not configured"}), 500
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
# ========================================================================
|
||||
# HEALTH CHECK
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
@require_auth()
|
||||
def health():
|
||||
"""Check server health and storage availability."""
|
||||
status = {
|
||||
"status": "ok",
|
||||
"storage_configured": STORAGE_PATH is not None,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
if STORAGE_PATH:
|
||||
status["storage_path"] = str(STORAGE_PATH)
|
||||
status["storage_exists"] = STORAGE_PATH.exists()
|
||||
try:
|
||||
from helper.folder_store import FolderDB
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
status["database_accessible"] = True
|
||||
except Exception as e:
|
||||
status["database_accessible"] = False
|
||||
status["database_error"] = str(e)
|
||||
|
||||
return jsonify(status), 200
|
||||
|
||||
# ========================================================================
|
||||
# FILE OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/files/search', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def search_files():
|
||||
"""Search for files by name or tag."""
|
||||
from helper.folder_store import LocalLibrarySearchOptimizer
|
||||
|
||||
query = request.args.get('q', '')
|
||||
limit = request.args.get('limit', 100, type=int)
|
||||
|
||||
if not query:
|
||||
return jsonify({"error": "Search query required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibrarySearchOptimizer(STORAGE_PATH) as db:
|
||||
results = db.search_by_name(query, limit)
|
||||
tag_results = db.search_by_tag(query, limit)
|
||||
all_results = {r['hash']: r for r in (results + tag_results)}
|
||||
|
||||
return jsonify({
|
||||
"query": query,
|
||||
"count": len(all_results),
|
||||
"files": list(all_results.values())
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Search error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Search failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_file_metadata(file_hash: str):
|
||||
"""Get metadata for a specific file by hash."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
|
||||
if not file_path or not file_path.exists():
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
tags = db.get_tags(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"size": file_path.stat().st_size,
|
||||
"metadata": metadata,
|
||||
"tags": tags
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get metadata error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/index', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def index_file():
|
||||
"""Index a new file in the storage."""
|
||||
from helper.folder_store import FolderDB
|
||||
from helper.utils import sha256_file
|
||||
|
||||
data = request.get_json() or {}
|
||||
file_path_str = data.get('path')
|
||||
tags = data.get('tags', [])
|
||||
url = data.get('url', [])
|
||||
|
||||
if not file_path_str:
|
||||
return jsonify({"error": "File path required"}), 400
|
||||
|
||||
try:
|
||||
file_path = Path(file_path_str)
|
||||
|
||||
if not file_path.exists():
|
||||
return jsonify({"error": "File does not exist"}), 404
|
||||
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
db.get_or_create_file_entry(file_path)
|
||||
|
||||
if tags:
|
||||
db.add_tags(file_path, tags)
|
||||
|
||||
if url:
|
||||
db.add_url(file_path, url)
|
||||
|
||||
file_hash = sha256_file(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"tags_added": len(tags),
|
||||
"url_added": len(url)
|
||||
}), 201
|
||||
except Exception as e:
|
||||
logger.error(f"Index error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Indexing failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# TAG OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_tags(file_hash: str):
|
||||
"""Get tags for a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
tags = db.get_tags(file_path)
|
||||
return jsonify({"hash": file_hash, "tags": tags}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_tags(file_hash: str):
|
||||
"""Add tags to a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
tags = data.get('tags', [])
|
||||
mode = data.get('mode', 'add')
|
||||
|
||||
if not tags:
|
||||
return jsonify({"error": "Tags required"}), 400
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if mode == 'replace':
|
||||
db.remove_tags(file_path, db.get_tags(file_path))
|
||||
|
||||
db.add_tags(file_path, tags)
|
||||
return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['DELETE'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def remove_tags(file_hash: str):
|
||||
"""Remove tags from a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
tags_str = request.args.get('tags', '')
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if tags_str:
|
||||
tags_to_remove = [t.strip() for t in tags_str.split(',')]
|
||||
else:
|
||||
tags_to_remove = db.get_tags(file_path)
|
||||
|
||||
db.remove_tags(file_path, tags_to_remove)
|
||||
return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Remove tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# RELATIONSHIP OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/relationships/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_relationships(file_hash: str):
|
||||
"""Get relationships for a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
relationships = metadata.get('relationships', {}) if metadata else {}
|
||||
return jsonify({"hash": file_hash, "relationships": relationships}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get relationships error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/relationships', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def set_relationship():
|
||||
"""Set a relationship between two files."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
from_hash = data.get('from_hash')
|
||||
to_hash = data.get('to_hash')
|
||||
rel_type = data.get('type', 'alt')
|
||||
|
||||
if not from_hash or not to_hash:
|
||||
return jsonify({"error": "from_hash and to_hash required"}), 400
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
from_path = db.search_hash(from_hash)
|
||||
to_path = db.search_hash(to_hash)
|
||||
|
||||
if not from_path or not to_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.set_relationship(from_path, to_path, rel_type)
|
||||
return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Set relationship error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# URL OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/url/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_url(file_hash: str):
|
||||
"""Get known url for a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
url = metadata.get('url', []) if metadata else []
|
||||
return jsonify({"hash": file_hash, "url": url}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get url error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/url/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_url(file_hash: str):
|
||||
"""Add url to a file."""
|
||||
from helper.folder_store import FolderDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
url = data.get('url', [])
|
||||
|
||||
if not url:
|
||||
return jsonify({"error": "url required"}), 400
|
||||
|
||||
try:
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.add_url(file_path, url)
|
||||
return jsonify({"hash": file_hash, "url_added": len(url)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add url error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
return app
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
if not HAS_FLASK:
|
||||
print("ERROR: Flask and flask-cors required")
|
||||
print("Install with: pip install flask flask-cors")
|
||||
sys.exit(1)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Remote Storage Server for Medios-Macina',
|
||||
epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey'
|
||||
)
|
||||
parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory')
|
||||
parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)')
|
||||
parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)')
|
||||
parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
global STORAGE_PATH, API_KEY
|
||||
STORAGE_PATH = Path(args.storage_path).resolve()
|
||||
API_KEY = args.api_key
|
||||
|
||||
if not STORAGE_PATH.exists():
|
||||
print(f"ERROR: Storage path does not exist: {STORAGE_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get local IP address
|
||||
local_ip = get_local_ip()
|
||||
if not local_ip:
|
||||
local_ip = "127.0.0.1"
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Remote Storage Server - Medios-Macina")
|
||||
print(f"{'='*70}")
|
||||
print(f"Storage Path: {STORAGE_PATH}")
|
||||
print(f"Local IP: {local_ip}")
|
||||
print(f"Server URL: http://{local_ip}:{args.port}")
|
||||
print(f"Health URL: http://{local_ip}:{args.port}/health")
|
||||
print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}")
|
||||
print(f"Debug Mode: {args.debug}")
|
||||
print(f"\n📋 Config for config.json:")
|
||||
config_entry = {
|
||||
"name": "phone",
|
||||
"url": f"http://{local_ip}:{args.port}",
|
||||
"timeout": 30
|
||||
}
|
||||
if args.api_key:
|
||||
config_entry["api_key"] = args.api_key
|
||||
print(json.dumps(config_entry, indent=2))
|
||||
print(f"\n{'='*70}\n")
|
||||
|
||||
try:
|
||||
from helper.folder_store import FolderDB
|
||||
with FolderDB(STORAGE_PATH) as db:
|
||||
logger.info("Database initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize database: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
app = create_app()
|
||||
app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
2268
helper/store.py
2268
helper/store.py
File diff suppressed because it is too large
Load Diff
155
helper/tasks.py
155
helper/tasks.py
@@ -1,155 +0,0 @@
|
||||
"""Background task handling and IPC helpers for mpv integration."""
|
||||
from __future__ import annotations
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
import threading
|
||||
import time
|
||||
from typing import IO, Iterable
|
||||
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
|
||||
"""Connect to the mpv IPC server located at *path*."""
|
||||
deadline = time.time() + timeout
|
||||
if not path:
|
||||
return None
|
||||
if os.name == 'nt':
|
||||
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
|
||||
while True:
|
||||
try:
|
||||
return open(path, 'r+b', buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc: # Pipe busy
|
||||
if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
sock = socket.socket(socket.AF_UNIX)
|
||||
while True:
|
||||
try:
|
||||
sock.connect(path)
|
||||
return sock.makefile('r+b', buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc:
|
||||
if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
def ipc_sender(ipc: IO[bytes] | None):
|
||||
"""Create a helper function for sending script messages via IPC."""
|
||||
if ipc is None:
|
||||
def _noop(_event: str, _payload: dict) -> None:
|
||||
return None
|
||||
return _noop
|
||||
lock = threading.Lock()
|
||||
def _send(event: str, payload: dict) -> None:
|
||||
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
|
||||
encoded = message.encode('utf-8') + b'\n'
|
||||
with lock:
|
||||
try:
|
||||
ipc.write(encoded)
|
||||
ipc.flush()
|
||||
except OSError:
|
||||
pass
|
||||
return _send
|
||||
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
|
||||
for raw in stream:
|
||||
yield raw.rstrip('\r\n')
|
||||
def _run_task(args, parser) -> int:
|
||||
if not args.command:
|
||||
parser.error('run-task requires a command to execute (use "--" before the command).')
|
||||
env = os.environ.copy()
|
||||
for entry in args.env:
|
||||
key, sep, value = entry.partition('=')
|
||||
if not sep:
|
||||
parser.error(f'Invalid environment variable definition: {entry!r}')
|
||||
env[key] = value
|
||||
command = list(args.command)
|
||||
if command and command[0] == '--':
|
||||
command.pop(0)
|
||||
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
|
||||
if not command:
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'error',
|
||||
'message': 'No command provided after separator',
|
||||
})
|
||||
log('[downlow.py] No command provided for run-task', file=sys.stderr)
|
||||
return 1
|
||||
if command and isinstance(command[0], str) and sys.executable:
|
||||
first = command[0].lower()
|
||||
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
|
||||
command[0] = sys.executable
|
||||
if os.environ.get('DOWNLOW_DEBUG'):
|
||||
log(f"Launching command: {command}", file=sys.stderr)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'start',
|
||||
'command': command,
|
||||
'cwd': args.cwd or os.getcwd(),
|
||||
})
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=args.cwd or None,
|
||||
env=env,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'error',
|
||||
'message': f'Executable not found: {exc.filename}',
|
||||
})
|
||||
log(f"{exc}", file=sys.stderr)
|
||||
return 1
|
||||
stdout_lines: list[str] = []
|
||||
stderr_lines: list[str] = []
|
||||
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
|
||||
for line in iter_stream(stream):
|
||||
sink.append(line)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': label,
|
||||
'line': line,
|
||||
})
|
||||
threads = []
|
||||
if process.stdout:
|
||||
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
|
||||
t_out.start()
|
||||
threads.append(t_out)
|
||||
if process.stderr:
|
||||
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
|
||||
t_err.start()
|
||||
threads.append(t_err)
|
||||
return_code = process.wait()
|
||||
for t in threads:
|
||||
t.join(timeout=0.1)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'exit',
|
||||
'returncode': return_code,
|
||||
'success': return_code == 0,
|
||||
})
|
||||
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
|
||||
if stdout_lines:
|
||||
log('\n'.join(stdout_lines))
|
||||
if stderr_lines:
|
||||
log('\n'.join(stderr_lines), file=sys.stderr)
|
||||
return return_code
|
||||
@@ -1,707 +0,0 @@
|
||||
"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
|
||||
|
||||
This module provides a single interface for downloading books from multiple sources:
|
||||
1. Try Archive.org direct download (if available)
|
||||
2. Try Archive.org borrowing (if user has credentials)
|
||||
3. Fallback to Libgen search by ISBN
|
||||
4. Attempt Libgen download
|
||||
|
||||
All sources integrated with proper metadata scraping and error handling.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
import requests
|
||||
from typing import Optional, Dict, Any, Tuple, List, Callable, cast
|
||||
from pathlib import Path
|
||||
|
||||
from helper.logger import debug
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UnifiedBookDownloader:
|
||||
"""Unified interface for downloading books from multiple sources."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
|
||||
"""Initialize the unified book downloader.
|
||||
|
||||
Args:
|
||||
config: Configuration dict with credentials
|
||||
output_dir: Default output directory
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.output_dir = output_dir
|
||||
self.session = requests.Session()
|
||||
|
||||
# Import download functions from their modules
|
||||
self._init_downloaders()
|
||||
|
||||
def _init_downloaders(self) -> None:
|
||||
"""Initialize downloader functions from their modules."""
|
||||
try:
|
||||
from helper.archive_client import (
|
||||
check_direct_download,
|
||||
get_openlibrary_by_isbn,
|
||||
loan
|
||||
)
|
||||
self.check_direct_download = check_direct_download
|
||||
self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
|
||||
self.loan_func = loan
|
||||
logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
|
||||
self.check_direct_download = None
|
||||
self.get_openlibrary_by_isbn = None
|
||||
self.loan_func = None
|
||||
|
||||
try:
|
||||
from helper.libgen_service import (
|
||||
DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
|
||||
download_from_mirror as _libgen_download,
|
||||
search_libgen as _libgen_search,
|
||||
)
|
||||
|
||||
def _log_info(message: str) -> None:
|
||||
debug(f"[UnifiedBookDownloader] {message}")
|
||||
|
||||
def _log_error(message: str) -> None:
|
||||
logger.error(f"[UnifiedBookDownloader] {message}")
|
||||
|
||||
self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
|
||||
query,
|
||||
limit=limit,
|
||||
log_info=_log_info,
|
||||
log_error=_log_error,
|
||||
)
|
||||
self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
|
||||
mirror_url,
|
||||
output_path,
|
||||
log_info=_log_info,
|
||||
log_error=_log_error,
|
||||
)
|
||||
logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
|
||||
self.search_libgen = None
|
||||
self.download_from_mirror = None
|
||||
|
||||
def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get all available download options for a book.
|
||||
|
||||
Checks in priority order:
|
||||
1. Archive.org direct download (public domain)
|
||||
2. Archive.org borrowing (if credentials available and book is borrowable)
|
||||
3. Libgen fallback (by ISBN)
|
||||
|
||||
Args:
|
||||
book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
|
||||
|
||||
Returns:
|
||||
Dict with available download methods and metadata
|
||||
"""
|
||||
options = {
|
||||
'book_title': book_data.get('title', 'Unknown'),
|
||||
'book_author': book_data.get('author', 'Unknown'),
|
||||
'isbn': book_data.get('isbn', ''),
|
||||
'openlibrary_id': book_data.get('openlibrary_id', ''),
|
||||
'methods': [], # Will be sorted by priority
|
||||
'metadata': {}
|
||||
}
|
||||
|
||||
# Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
|
||||
ol_id = book_data.get('openlibrary_id', '')
|
||||
book_id = None
|
||||
|
||||
if ol_id.startswith('OL') and len(ol_id) > 2:
|
||||
# Remove 'OL' prefix (keep everything after it including the suffix letter)
|
||||
# The book_id is all digits after 'OL'
|
||||
book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
|
||||
|
||||
# PRIORITY 1: Check direct download (fastest, no auth needed)
|
||||
if self.check_direct_download:
|
||||
try:
|
||||
can_download, pdf_url = self.check_direct_download(book_id)
|
||||
if can_download:
|
||||
options['methods'].append({
|
||||
'type': 'archive.org_direct',
|
||||
'label': 'Archive.org Direct Download',
|
||||
'requires_auth': False,
|
||||
'pdf_url': pdf_url,
|
||||
'book_id': book_id,
|
||||
'priority': 1 # Highest priority
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
|
||||
|
||||
# PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
|
||||
# First verify the book is actually lendable via OpenLibrary API
|
||||
if self._has_archive_credentials():
|
||||
is_lendable, status = self._check_book_lendable_status(ol_id)
|
||||
|
||||
if is_lendable:
|
||||
options['methods'].append({
|
||||
'type': 'archive.org_borrow',
|
||||
'label': 'Archive.org Borrow',
|
||||
'requires_auth': True,
|
||||
'book_id': book_id,
|
||||
'priority': 2 # Second priority
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
|
||||
else:
|
||||
logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
|
||||
|
||||
# PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
|
||||
isbn = book_data.get('isbn', '')
|
||||
title = book_data.get('title', '')
|
||||
author = book_data.get('author', '')
|
||||
|
||||
if self.search_libgen:
|
||||
# Can use Libgen if we have ISBN OR title (or both)
|
||||
if isbn or title:
|
||||
options['methods'].append({
|
||||
'type': 'libgen',
|
||||
'label': 'Libgen Search & Download',
|
||||
'requires_auth': False,
|
||||
'isbn': isbn,
|
||||
'title': title,
|
||||
'author': author,
|
||||
'priority': 3 # Third priority (fallback)
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
|
||||
|
||||
# Sort by priority (higher priority first)
|
||||
options['methods'].sort(key=lambda x: x.get('priority', 999))
|
||||
|
||||
return options
|
||||
|
||||
def _has_archive_credentials(self) -> bool:
|
||||
"""Check if Archive.org credentials are available."""
|
||||
try:
|
||||
from helper.archive_client import credential_openlibrary
|
||||
email, password = credential_openlibrary(self.config)
|
||||
return bool(email and password)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Check if a book is lendable via OpenLibrary API.
|
||||
|
||||
Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
|
||||
Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
|
||||
|
||||
Args:
|
||||
ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
|
||||
|
||||
Returns:
|
||||
Tuple of (is_lendable: bool, status_reason: Optional[str])
|
||||
"""
|
||||
try:
|
||||
if not ol_id.startswith('OL'):
|
||||
return False, "Invalid OpenLibrary ID format"
|
||||
|
||||
# If this is a Work ID (ends with W), we can't query Volumes API
|
||||
# Work IDs are abstract umbrella records, not specific editions
|
||||
if ol_id.endswith('W'):
|
||||
logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
|
||||
return False, "Work ID not supported by Volumes API (not a specific edition)"
|
||||
|
||||
# If it ends with M, it's an Edition ID - proceed with query
|
||||
if not ol_id.endswith('M'):
|
||||
logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
|
||||
return False, "Invalid OpenLibrary ID type"
|
||||
|
||||
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
|
||||
response = self.session.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Empty response means no records found
|
||||
if not data:
|
||||
logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
|
||||
return False, "No availability data found"
|
||||
|
||||
# The response is wrapped in OLID key
|
||||
olid_key = f"OLID:{ol_id}"
|
||||
if olid_key not in data:
|
||||
logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
|
||||
return False, "No availability data found"
|
||||
|
||||
olid_data = data[olid_key]
|
||||
|
||||
# Check items array for lendable status
|
||||
if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
|
||||
items = olid_data['items']
|
||||
|
||||
# Check the first item for lending status
|
||||
first_item = items[0]
|
||||
|
||||
# Handle both dict and string representations (PowerShell converts to string)
|
||||
if isinstance(first_item, dict):
|
||||
status = first_item.get('status', '')
|
||||
else:
|
||||
# String representation - check if 'lendable' is in it
|
||||
status = str(first_item).lower()
|
||||
|
||||
is_lendable = 'lendable' in str(status).lower()
|
||||
|
||||
if is_lendable:
|
||||
logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
|
||||
return True, "LENDABLE"
|
||||
else:
|
||||
status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
|
||||
logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
|
||||
return False, status_str
|
||||
else:
|
||||
# No items array or empty
|
||||
logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
|
||||
return False, "Not available for lending"
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
|
||||
return False, "API timeout"
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
|
||||
return False, f"API error"
|
||||
|
||||
|
||||
async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
|
||||
"""Download a book using the specified method.
|
||||
|
||||
Args:
|
||||
method: Download method dict from get_download_options()
|
||||
output_dir: Directory to save the book
|
||||
|
||||
Returns:
|
||||
Tuple of (success: bool, message: str)
|
||||
"""
|
||||
output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
|
||||
method_type = method.get('type', '')
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
|
||||
|
||||
try:
|
||||
if method_type == 'archive.org_direct':
|
||||
return await self._download_archive_direct(method, output_dir)
|
||||
|
||||
elif method_type == 'archive.org_borrow':
|
||||
return await self._download_archive_borrow(method, output_dir)
|
||||
|
||||
elif method_type == 'libgen':
|
||||
return await self._download_libgen(method, output_dir)
|
||||
|
||||
else:
|
||||
return False, f"Unknown download method: {method_type}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
|
||||
return False, f"Download failed: {str(e)}"
|
||||
|
||||
async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download directly from Archive.org."""
|
||||
try:
|
||||
pdf_url = method.get('pdf_url', '')
|
||||
book_id = method.get('book_id', '')
|
||||
|
||||
if not pdf_url:
|
||||
return False, "No PDF URL available"
|
||||
|
||||
# Determine output filename
|
||||
filename = f"{book_id}.pdf"
|
||||
output_path = Path(output_dir) / filename
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
|
||||
|
||||
# Download in a thread to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
success = await loop.run_in_executor(
|
||||
None,
|
||||
self._download_file,
|
||||
pdf_url,
|
||||
str(output_path)
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
|
||||
return True, f"Downloaded to: {output_path}"
|
||||
else:
|
||||
return False, "Failed to download PDF"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
|
||||
return False, f"Archive download failed: {str(e)}"
|
||||
|
||||
async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download via Archive.org borrowing (requires credentials).
|
||||
|
||||
Process (follows archive_client.py pattern):
|
||||
1. Login to Archive.org with credentials
|
||||
2. Call loan endpoint to borrow the book (14-day loan)
|
||||
3. Get book info (page links, metadata)
|
||||
4. Download all pages as images
|
||||
5. Merge images into PDF
|
||||
|
||||
The loan function from archive_client.py handles:
|
||||
- Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
|
||||
- Creating borrow token for access
|
||||
- Handling borrow failures
|
||||
|
||||
get_book_infos() extracts page links from the borrowed book viewer
|
||||
download() downloads all pages using thread pool
|
||||
img2pdf merges pages into searchable PDF
|
||||
"""
|
||||
try:
|
||||
from helper.archive_client import credential_openlibrary
|
||||
|
||||
book_id = method.get('book_id', '')
|
||||
|
||||
# Get credentials
|
||||
email, password = credential_openlibrary(self.config)
|
||||
if not email or not password:
|
||||
return False, "Archive.org credentials not configured"
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
|
||||
|
||||
# Login and borrow (in thread, following download_book.py pattern)
|
||||
loop = asyncio.get_event_loop()
|
||||
borrow_result = await loop.run_in_executor(
|
||||
None,
|
||||
self._archive_borrow_and_download,
|
||||
email,
|
||||
password,
|
||||
book_id,
|
||||
output_dir
|
||||
)
|
||||
|
||||
if borrow_result and isinstance(borrow_result, tuple):
|
||||
success, filepath = borrow_result
|
||||
if success:
|
||||
logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
|
||||
return True, filepath
|
||||
else:
|
||||
logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
|
||||
return False, filepath
|
||||
else:
|
||||
return False, "Failed to borrow book from Archive.org"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||
return False, f"Archive borrow failed: {str(e)}"
|
||||
|
||||
async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download via Libgen search and download with mirror fallback."""
|
||||
try:
|
||||
isbn = method.get('isbn', '')
|
||||
title = method.get('title', '')
|
||||
|
||||
if not isbn and not title:
|
||||
return False, "Need ISBN or title for Libgen search"
|
||||
|
||||
if not self.search_libgen:
|
||||
return False, "Libgen searcher not available"
|
||||
|
||||
# Define wrapper functions to safely call the methods
|
||||
search_func = self.search_libgen
|
||||
if search_func is None:
|
||||
return False, "Search function not available"
|
||||
|
||||
preloaded_results = method.get('results')
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
if preloaded_results:
|
||||
results = list(preloaded_results)
|
||||
if not results:
|
||||
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||
else:
|
||||
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
|
||||
return False, f"No Libgen results found for: {isbn or title}"
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
|
||||
|
||||
# Determine output filename (use first result for naming)
|
||||
first_result = results[0]
|
||||
filename = f"{first_result.get('title', 'book')}"
|
||||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||||
|
||||
# Try each result's mirror until one succeeds
|
||||
for idx, result in enumerate(results, 1):
|
||||
mirror_url = result.get('mirror_url', '')
|
||||
|
||||
if not mirror_url:
|
||||
logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
|
||||
continue
|
||||
|
||||
# Use extension from this result if available
|
||||
extension = result.get('extension', 'pdf')
|
||||
if extension and not extension.startswith('.'):
|
||||
extension = f".{extension}"
|
||||
elif not extension:
|
||||
extension = '.pdf'
|
||||
|
||||
output_path = Path(output_dir) / (filename + extension)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
|
||||
|
||||
download_func = self.download_from_mirror
|
||||
if download_func is None:
|
||||
return False, "Download function not available"
|
||||
|
||||
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
|
||||
|
||||
def download_wrapper():
|
||||
return download_callable(mirror_url, str(output_path))
|
||||
|
||||
# Download (in thread)
|
||||
try:
|
||||
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
|
||||
|
||||
if success:
|
||||
dest_path = Path(downloaded_path) if downloaded_path else output_path
|
||||
# Validate downloaded file is not HTML (common Libgen issue)
|
||||
if dest_path.exists():
|
||||
try:
|
||||
with open(dest_path, 'rb') as f:
|
||||
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
|
||||
if '<!doctype' in file_start or '<html' in file_start:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
|
||||
dest_path.unlink() # Delete the HTML file
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
|
||||
return True, str(dest_path)
|
||||
else:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
|
||||
continue
|
||||
|
||||
return False, f"All {len(results)} mirrors failed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
|
||||
return False, f"Libgen download failed: {str(e)}"
|
||||
|
||||
async def download_libgen_selection(
|
||||
self,
|
||||
selected: Dict[str, Any],
|
||||
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
) -> Tuple[bool, str]:
|
||||
"""Download a specific Libgen result with optional fallbacks."""
|
||||
|
||||
if not isinstance(selected, dict):
|
||||
return False, "Selected result must be a dictionary"
|
||||
|
||||
ordered_results: List[Dict[str, Any]] = [selected]
|
||||
if remaining:
|
||||
for item in remaining:
|
||||
if isinstance(item, dict) and item is not selected:
|
||||
ordered_results.append(item)
|
||||
|
||||
method: Dict[str, Any] = {
|
||||
'type': 'libgen',
|
||||
'isbn': selected.get('isbn', '') or '',
|
||||
'title': selected.get('title', '') or '',
|
||||
'author': selected.get('author', '') or '',
|
||||
'results': ordered_results,
|
||||
}
|
||||
|
||||
return await self.download_book(method, output_dir)
|
||||
|
||||
def download_libgen_selection_sync(
|
||||
self,
|
||||
selected: Dict[str, Any],
|
||||
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
) -> Tuple[bool, str]:
|
||||
"""Synchronous helper for downloading a Libgen selection."""
|
||||
|
||||
async def _run() -> Tuple[bool, str]:
|
||||
return await self.download_libgen_selection(selected, remaining, output_dir)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(loop)
|
||||
return loop.run_until_complete(_run())
|
||||
finally:
|
||||
loop.close()
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
def _download_file(self, url: str, output_path: str) -> bool:
|
||||
"""Download a file from URL."""
|
||||
try:
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(output_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] File download error: {e}")
|
||||
return False
|
||||
|
||||
def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
|
||||
"""Borrow a book from Archive.org and download pages as PDF.
|
||||
|
||||
This follows the exact process from archive_client.py:
|
||||
1. Login with credentials
|
||||
2. Call loan() to create 14-day borrow
|
||||
3. Get book info (extract page url)
|
||||
4. Download all pages as images
|
||||
5. Merge images into searchable PDF
|
||||
|
||||
Returns tuple of (success: bool, filepath/message: str)
|
||||
"""
|
||||
try:
|
||||
from helper.archive_client import login, loan, get_book_infos, download
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
|
||||
session = login(email, password)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
|
||||
# Call loan to create the 14-day borrow
|
||||
session = loan(session, book_id, verbose=True)
|
||||
|
||||
# If we get here, borrowing succeeded
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
|
||||
|
||||
# Now get the book info (page url and metadata)
|
||||
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
|
||||
# Try both URL formats: with /borrow and without
|
||||
book_url = [
|
||||
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
|
||||
f"https://archive.org/details/{book_id}" # Fallback to details page
|
||||
]
|
||||
|
||||
title = None
|
||||
links = None
|
||||
metadata = None
|
||||
last_error = None
|
||||
|
||||
for book_url in book_url:
|
||||
try:
|
||||
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
|
||||
response = session.get(book_url, timeout=10)
|
||||
|
||||
# Log response status
|
||||
if response.status_code != 200:
|
||||
logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
|
||||
# Continue to try next URL
|
||||
continue
|
||||
|
||||
# Try to parse the response
|
||||
title, links, metadata = get_book_infos(session, book_url)
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
|
||||
logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
|
||||
last_error = e
|
||||
continue
|
||||
|
||||
if links is None:
|
||||
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
|
||||
# Borrow extraction failed - return False
|
||||
return False, "Could not extract borrowed book pages"
|
||||
|
||||
# Create temporary directory for images
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
|
||||
logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
|
||||
|
||||
try:
|
||||
# Download all pages (uses thread pool)
|
||||
images = download(
|
||||
session=session,
|
||||
n_threads=10,
|
||||
directory=temp_dir,
|
||||
links=links,
|
||||
scale=3, # Default resolution
|
||||
book_id=book_id
|
||||
)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
|
||||
|
||||
# Try to merge pages into PDF
|
||||
try:
|
||||
import img2pdf
|
||||
logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
|
||||
|
||||
# Prepare PDF metadata
|
||||
pdfmeta = {}
|
||||
if metadata:
|
||||
if "title" in metadata:
|
||||
pdfmeta["title"] = metadata["title"]
|
||||
if "creator" in metadata:
|
||||
pdfmeta["author"] = metadata["creator"]
|
||||
pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
|
||||
pdfmeta["creationdate"] = None # Avoid timezone issues
|
||||
|
||||
# Convert images to PDF
|
||||
pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
|
||||
if not pdf_content:
|
||||
logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
|
||||
return False, "Failed to convert pages to PDF"
|
||||
|
||||
# Save the PDF
|
||||
pdf_filename = f"{title}.pdf" if title else "book.pdf"
|
||||
pdf_path = Path(output_dir) / pdf_filename
|
||||
|
||||
# Handle duplicate filenames
|
||||
i = 1
|
||||
while pdf_path.exists():
|
||||
pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
|
||||
i += 1
|
||||
|
||||
with open(pdf_path, 'wb') as f:
|
||||
f.write(pdf_content)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
|
||||
|
||||
return True, str(pdf_path)
|
||||
|
||||
except ImportError:
|
||||
logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
|
||||
|
||||
# Create JPG collection directory
|
||||
if not title:
|
||||
title = f"book_{book_id}"
|
||||
jpg_dir = Path(output_dir) / title
|
||||
i = 1
|
||||
while jpg_dir.exists():
|
||||
jpg_dir = Path(output_dir) / f"{title}({i})"
|
||||
i += 1
|
||||
|
||||
# Move temporary directory to final location
|
||||
shutil.move(temp_dir, str(jpg_dir))
|
||||
temp_dir = None # Mark as already moved
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
|
||||
return True, str(jpg_dir)
|
||||
|
||||
finally:
|
||||
# Clean up temporary directory if it still exists
|
||||
if temp_dir and Path(temp_dir).exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
except SystemExit:
|
||||
# loan() function calls sys.exit on failure - catch it
|
||||
logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
|
||||
return False, "Book could not be borrowed (may not be available for borrowing)"
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||
return False, f"Borrow failed: {str(e)}"
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the session."""
|
||||
self.session.close()
|
||||
492
helper/utils.py
492
helper/utils.py
@@ -1,492 +0,0 @@
|
||||
"""General-purpose helpers used across the downlow CLI."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import ffmpeg
|
||||
import base64
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass, field
|
||||
from fnmatch import fnmatch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import helper.utils_constant
|
||||
|
||||
try:
|
||||
import cbor2
|
||||
except ImportError:
|
||||
cbor2 = None # type: ignore
|
||||
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
||||
_format_logger = logging.getLogger(__name__)
|
||||
def ensure_directory(path: Path) -> None:
|
||||
"""Ensure *path* exists as a directory."""
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc: # pragma: no cover - surfaced to caller
|
||||
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
|
||||
def unique_path(path: Path) -> Path:
|
||||
"""Return a unique path by appending " (n)" if needed."""
|
||||
if not path.exists():
|
||||
return path
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
counter = 1
|
||||
while True:
|
||||
candidate = parent / f"{stem} ({counter}){suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
counter += 1
|
||||
|
||||
def sanitize_metadata_value(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if not isinstance(value, str):
|
||||
value = str(value)
|
||||
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
|
||||
if not value:
|
||||
return None
|
||||
return value
|
||||
def unique_preserve_order(values: Iterable[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
ordered: list[str] = []
|
||||
for value in values:
|
||||
if value not in seen:
|
||||
seen.add(value)
|
||||
ordered.append(value)
|
||||
return ordered
|
||||
def sha256_file(file_path: Path) -> str:
|
||||
"""Return the SHA-256 hex digest of *path*."""
|
||||
hasher = hashlib.sha256()
|
||||
with file_path.open('rb') as handle:
|
||||
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
|
||||
"""Create a .metadata sidecar file with JSON metadata.
|
||||
|
||||
The metadata dict should contain title. If not present, it will be derived from
|
||||
the filename. This ensures the .metadata file can be matched during batch import.
|
||||
|
||||
Args:
|
||||
file_path: Path to the exported file
|
||||
metadata: Dictionary of metadata to save
|
||||
"""
|
||||
if not metadata:
|
||||
return
|
||||
file_name = file_path.stem
|
||||
file_ext = file_path.suffix.lower()
|
||||
# Ensure metadata has a title field that matches the filename (without extension)
|
||||
# This allows the sidecar to be matched and imported properly during batch import
|
||||
if 'title' not in metadata or not metadata.get('title'):
|
||||
metadata['title'] = file_name
|
||||
metadata['hash'] = sha256_file(file_path)
|
||||
metadata['size'] = Path(file_path).stat().st_size
|
||||
format_found = False
|
||||
for mime_type, ext_map in helper.utils_constant.mime_maps.items():
|
||||
for key, info in ext_map.items():
|
||||
if info.get("ext") == file_ext:
|
||||
metadata['type'] = mime_type
|
||||
format_found = True
|
||||
break
|
||||
if format_found:
|
||||
break
|
||||
else:
|
||||
metadata['type'] = 'unknown'
|
||||
metadata.update(ffprobe(str(file_path)))
|
||||
|
||||
|
||||
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
|
||||
try:
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
except OSError as exc:
|
||||
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
|
||||
|
||||
def create_tags_sidecar(file_path: Path, tags: set) -> None:
|
||||
"""Create a .tags sidecar file with tags (one per line).
|
||||
|
||||
Args:
|
||||
file_path: Path to the exported file
|
||||
tags: Set of tag strings
|
||||
"""
|
||||
if not tags:
|
||||
return
|
||||
|
||||
tags_path = file_path.with_suffix(file_path.suffix + '.tags')
|
||||
try:
|
||||
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||
for tag in sorted(tags):
|
||||
f.write(f"{tag}\n")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
|
||||
|
||||
|
||||
def ffprobe(file_path: str) -> dict:
|
||||
probe = ffmpeg.probe(file_path)
|
||||
metadata = {}
|
||||
|
||||
# Format-level info
|
||||
fmt = probe.get("format", {})
|
||||
metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
|
||||
metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
|
||||
metadata["format_name"] = fmt.get("format_name", None)
|
||||
|
||||
# Stream-level info
|
||||
for stream in probe.get("streams", []):
|
||||
codec_type = stream.get("codec_type")
|
||||
if codec_type == "audio":
|
||||
metadata["audio_codec"] = stream.get("codec_name")
|
||||
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
|
||||
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
|
||||
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
|
||||
elif codec_type == "video":
|
||||
metadata["video_codec"] = stream.get("codec_name")
|
||||
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||
elif codec_type == "image":
|
||||
metadata["image_codec"] = stream.get("codec_name")
|
||||
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CBOR Utilities - Consolidated from cbor.py
|
||||
# ============================================================================
|
||||
"""CBOR utilities backed by the `cbor2` library."""
|
||||
|
||||
|
||||
def decode_cbor(data: bytes) -> Any:
|
||||
"""Decode *data* from CBOR into native Python objects."""
|
||||
if not data:
|
||||
return None
|
||||
if cbor2 is None:
|
||||
raise ImportError("cbor2 library is required for CBOR decoding")
|
||||
return cbor2.loads(data)
|
||||
|
||||
|
||||
def jsonify(value: Any) -> Any:
|
||||
"""Convert *value* into a JSON-friendly structure."""
|
||||
if isinstance(value, dict):
|
||||
return {str(key): jsonify(val) for key, val in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [jsonify(item) for item in value]
|
||||
if isinstance(value, bytes):
|
||||
return {"__bytes__": base64.b64encode(value).decode("ascii")}
|
||||
return value
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Format Utilities - Consolidated from format_utils.py
|
||||
# ============================================================================
|
||||
"""Formatting utilities for displaying metadata consistently across the application."""
|
||||
|
||||
|
||||
def format_bytes(bytes_value) -> str:
|
||||
"""Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
|
||||
|
||||
Args:
|
||||
bytes_value: Size in bytes (int or float)
|
||||
|
||||
Returns:
|
||||
Formatted string like '1.5 MB' or '756 MB'
|
||||
"""
|
||||
if bytes_value is None or bytes_value <= 0:
|
||||
return "0 B"
|
||||
|
||||
if isinstance(bytes_value, (int, float)):
|
||||
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||
if bytes_value < 1024:
|
||||
if unit == "B":
|
||||
return f"{int(bytes_value)} {unit}"
|
||||
return f"{bytes_value:.1f} {unit}"
|
||||
bytes_value /= 1024
|
||||
return f"{bytes_value:.1f} PB"
|
||||
return str(bytes_value)
|
||||
|
||||
|
||||
def format_duration(seconds) -> str:
|
||||
"""Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
|
||||
|
||||
Args:
|
||||
seconds: Duration in seconds (int or float)
|
||||
|
||||
Returns:
|
||||
Formatted string like '1:23:45' or '5:30'
|
||||
"""
|
||||
if seconds is None or seconds == '':
|
||||
return "N/A"
|
||||
|
||||
if isinstance(seconds, str):
|
||||
try:
|
||||
seconds = float(seconds)
|
||||
except ValueError:
|
||||
return str(seconds)
|
||||
|
||||
if not isinstance(seconds, (int, float)):
|
||||
return str(seconds)
|
||||
|
||||
total_seconds = int(seconds)
|
||||
if total_seconds < 0:
|
||||
return "N/A"
|
||||
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
secs = total_seconds % 60
|
||||
|
||||
if hours > 0:
|
||||
return f"{hours}:{minutes:02d}:{secs:02d}"
|
||||
elif minutes > 0:
|
||||
return f"{minutes}:{secs:02d}"
|
||||
else:
|
||||
return f"{secs}s"
|
||||
|
||||
|
||||
def format_timestamp(timestamp_str) -> str:
|
||||
"""Format ISO timestamp to readable format.
|
||||
|
||||
Args:
|
||||
timestamp_str: ISO format timestamp string or None
|
||||
|
||||
Returns:
|
||||
Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
|
||||
"""
|
||||
if not timestamp_str:
|
||||
return "N/A"
|
||||
|
||||
try:
|
||||
# Handle ISO format timestamps
|
||||
if isinstance(timestamp_str, str):
|
||||
# Try parsing ISO format
|
||||
if 'T' in timestamp_str:
|
||||
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
||||
else:
|
||||
# Try other common formats
|
||||
dt = datetime.fromisoformat(timestamp_str)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception as e:
|
||||
_format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
|
||||
|
||||
return str(timestamp_str)
|
||||
|
||||
|
||||
def format_metadata_value(key: str, value) -> str:
|
||||
"""Format a metadata value based on its key for display.
|
||||
|
||||
This is the central formatting rule for all metadata display.
|
||||
|
||||
Args:
|
||||
key: Metadata field name
|
||||
value: Value to format
|
||||
|
||||
Returns:
|
||||
Formatted string for display
|
||||
"""
|
||||
if value is None or value == '':
|
||||
return "N/A"
|
||||
|
||||
# Apply field-specific formatting
|
||||
if key in ('size', 'file_size'):
|
||||
return format_bytes(value)
|
||||
elif key in ('duration', 'length'):
|
||||
return format_duration(value)
|
||||
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
|
||||
return format_timestamp(value)
|
||||
else:
|
||||
return str(value)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Link Utilities - Consolidated from link_utils.py
|
||||
# ============================================================================
|
||||
"""Link utilities - Extract and process url from various sources."""
|
||||
|
||||
|
||||
def extract_link_from_args(args: Iterable[str]) -> Any | None:
|
||||
"""Extract HTTP/HTTPS URL from command arguments.
|
||||
|
||||
Args:
|
||||
args: Command arguments
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
args_list = list(args) if not isinstance(args, (list, tuple)) else args
|
||||
if not args_list or len(args_list) == 0:
|
||||
return None
|
||||
|
||||
potential_link = str(args_list[0])
|
||||
if potential_link.startswith(('http://', 'https://')):
|
||||
return potential_link
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_link_from_result(result: Any) -> Any | None:
|
||||
"""Extract URL from a result object (dict or object with attributes).
|
||||
|
||||
Args:
|
||||
result: Result object from pipeline (dict or object)
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
if isinstance(result, dict):
|
||||
return result.get('url') or result.get('link') or result.get('href')
|
||||
|
||||
return (
|
||||
getattr(result, 'url', None) or
|
||||
getattr(result, 'link', None) or
|
||||
getattr(result, 'href', None)
|
||||
)
|
||||
|
||||
|
||||
def extract_link(result: Any, args: Iterable[str]) -> Any | None:
|
||||
"""Extract link from args or result (args take priority).
|
||||
|
||||
Args:
|
||||
result: Pipeline result object
|
||||
args: Command arguments
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
# Try args first
|
||||
link = extract_link_from_args(args)
|
||||
if link:
|
||||
return link
|
||||
|
||||
# Fall back to result
|
||||
return extract_link_from_result(result)
|
||||
|
||||
|
||||
def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
|
||||
"""Get API key from config with fallback support.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
service: Service name for logging
|
||||
key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
|
||||
|
||||
Returns:
|
||||
API key if found and not empty, None otherwise
|
||||
"""
|
||||
try:
|
||||
parts = key_path.split('.')
|
||||
value = config
|
||||
for part in parts:
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part)
|
||||
else:
|
||||
return None
|
||||
|
||||
if isinstance(value, str):
|
||||
return value.strip() or None
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
|
||||
"""Add direct link information to result object.
|
||||
|
||||
Args:
|
||||
result: Result object to modify (dict or object)
|
||||
direct_link: The unlocked/direct URL
|
||||
original_link: The original restricted URL
|
||||
"""
|
||||
if isinstance(result, dict):
|
||||
result['direct_link'] = direct_link
|
||||
result['original_link'] = original_link
|
||||
else:
|
||||
setattr(result, 'direct_link', direct_link)
|
||||
setattr(result, 'original_link', original_link)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# URL Policy Resolution - Consolidated from url_parser.py
|
||||
# ============================================================================
|
||||
"""URL policy resolution for downlow workflows."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UrlPolicy:
|
||||
"""Describe how a URL should be handled by download and screenshot flows."""
|
||||
|
||||
skip_download: bool = False
|
||||
skip_metadata: bool = False
|
||||
force_screenshot: bool = False
|
||||
extra_tags: list[str] = field(default_factory=list)
|
||||
|
||||
def apply_tags(self, sources: Iterable[str]) -> list[str]:
|
||||
tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
|
||||
for value in sources:
|
||||
text = str(value).strip()
|
||||
if text:
|
||||
tags.append(text)
|
||||
return tags
|
||||
|
||||
|
||||
def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
|
||||
pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
|
||||
if not pattern:
|
||||
return None
|
||||
skip_download = bool(rule.get("skip_download"))
|
||||
skip_metadata = bool(rule.get("skip_metadata"))
|
||||
force_screenshot = bool(rule.get("force_screenshot"))
|
||||
extra_tags_raw = rule.get("extra_tags")
|
||||
if isinstance(extra_tags_raw, str):
|
||||
extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
|
||||
elif isinstance(extra_tags_raw, (list, tuple, set)):
|
||||
extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
|
||||
else:
|
||||
extra_tags = []
|
||||
return {
|
||||
"pattern": pattern,
|
||||
"skip_download": skip_download,
|
||||
"skip_metadata": skip_metadata,
|
||||
"force_screenshot": force_screenshot,
|
||||
"extra_tags": extra_tags,
|
||||
}
|
||||
|
||||
|
||||
def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
|
||||
policies_raw = config.get("url_policies")
|
||||
if not policies_raw:
|
||||
return UrlPolicy()
|
||||
if not isinstance(policies_raw, list):
|
||||
return UrlPolicy()
|
||||
parsed = urlparse(url)
|
||||
subject = f"{parsed.netloc}{parsed.path}"
|
||||
host = parsed.netloc
|
||||
resolved = UrlPolicy()
|
||||
for rule_raw in policies_raw:
|
||||
if not isinstance(rule_raw, dict):
|
||||
continue
|
||||
rule = _normalise_rule(rule_raw)
|
||||
if rule is None:
|
||||
continue
|
||||
pattern = rule["pattern"]
|
||||
if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
|
||||
continue
|
||||
if rule["skip_download"]:
|
||||
resolved.skip_download = True
|
||||
if rule["skip_metadata"]:
|
||||
resolved.skip_metadata = True
|
||||
if rule["force_screenshot"]:
|
||||
resolved.force_screenshot = True
|
||||
if rule["extra_tags"]:
|
||||
for tag in rule["extra_tags"]:
|
||||
if tag not in resolved.extra_tags:
|
||||
resolved.extra_tags.append(tag)
|
||||
return resolved
|
||||
@@ -1,102 +0,0 @@
|
||||
mime_maps = {
|
||||
"image": {
|
||||
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
|
||||
"png": { "ext": ".png", "mimes": ["image/png"] },
|
||||
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
|
||||
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
|
||||
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
|
||||
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
|
||||
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
|
||||
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
|
||||
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
|
||||
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
|
||||
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
|
||||
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
|
||||
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
|
||||
},
|
||||
"image_sequence": {
|
||||
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
|
||||
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
|
||||
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
|
||||
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
|
||||
},
|
||||
"video": {
|
||||
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
|
||||
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
|
||||
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
|
||||
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
|
||||
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
|
||||
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
|
||||
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
|
||||
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
|
||||
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
|
||||
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
|
||||
},
|
||||
"audio": {
|
||||
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
|
||||
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
|
||||
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
|
||||
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
|
||||
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
|
||||
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
|
||||
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
|
||||
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
|
||||
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
|
||||
},
|
||||
"document": {
|
||||
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
|
||||
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
|
||||
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
|
||||
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
|
||||
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
|
||||
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
|
||||
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
|
||||
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
|
||||
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
|
||||
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
|
||||
},
|
||||
"archive": {
|
||||
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
|
||||
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
|
||||
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
|
||||
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
|
||||
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
|
||||
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
|
||||
},
|
||||
"project": {
|
||||
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
|
||||
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
|
||||
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
|
||||
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
|
||||
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
|
||||
},
|
||||
"other": {
|
||||
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
|
||||
"json": { "ext": ".json", "mimes": ["application/json"] },
|
||||
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
|
||||
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_type_from_ext(ext: str) -> str:
|
||||
"""Determine the type (e.g., 'image', 'video', 'audio') from file extension.
|
||||
|
||||
Args:
|
||||
ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg')
|
||||
|
||||
Returns:
|
||||
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
|
||||
"""
|
||||
if not ext:
|
||||
return 'other'
|
||||
|
||||
# Normalize: remove leading dot and convert to lowercase
|
||||
ext_clean = ext.lstrip('.').lower()
|
||||
|
||||
# Search through mime_maps to find matching type
|
||||
for type_name, extensions_dict in mime_maps.items():
|
||||
if ext_clean in extensions_dict:
|
||||
return type_name
|
||||
|
||||
return 'other'
|
||||
@@ -1,671 +0,0 @@
|
||||
"""Worker task management with persistent database storage.
|
||||
|
||||
Manages worker tasks for downloads, searches, imports, etc. with automatic
|
||||
persistence to database and optional auto-refresh callbacks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Callable
|
||||
from datetime import datetime
|
||||
from threading import Thread, Lock
|
||||
import time
|
||||
|
||||
from .folder_store import FolderDB
|
||||
from helper.logger import log
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Worker:
|
||||
"""Represents a single worker task with state management."""
|
||||
|
||||
def __init__(self, worker_id: str, worker_type: str, title: str = "",
|
||||
description: str = "", manager: Optional['WorkerManager'] = None):
|
||||
"""Initialize a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for this worker
|
||||
worker_type: Type of work (e.g., 'download', 'search', 'import')
|
||||
title: Human-readable title
|
||||
description: Detailed description
|
||||
manager: Reference to parent WorkerManager for state updates
|
||||
"""
|
||||
self.id = worker_id
|
||||
self.type = worker_type
|
||||
self.title = title or worker_type
|
||||
self.description = description
|
||||
self.manager = manager
|
||||
self.status = "running"
|
||||
self.progress = ""
|
||||
self.details = ""
|
||||
self.error_message = ""
|
||||
self.result = "pending"
|
||||
self._stdout_buffer = []
|
||||
self._steps_buffer = []
|
||||
|
||||
def log_step(self, step_text: str) -> None:
|
||||
"""Log a step for this worker.
|
||||
|
||||
Args:
|
||||
step_text: Text describing the step
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.log_step(self.id, step_text)
|
||||
else:
|
||||
logger.info(f"[{self.id}] {step_text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error logging step for worker {self.id}: {e}")
|
||||
|
||||
def append_stdout(self, text: str) -> None:
|
||||
"""Append text to stdout log.
|
||||
|
||||
Args:
|
||||
text: Text to append
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.append_worker_stdout(self.id, text)
|
||||
else:
|
||||
self._stdout_buffer.append(text)
|
||||
except Exception as e:
|
||||
logger.error(f"Error appending stdout for worker {self.id}: {e}")
|
||||
|
||||
def get_stdout(self) -> str:
|
||||
"""Get all stdout for this worker.
|
||||
|
||||
Returns:
|
||||
Complete stdout text
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
return self.manager.get_stdout(self.id)
|
||||
else:
|
||||
return "\n".join(self._stdout_buffer)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting stdout for worker {self.id}: {e}")
|
||||
return ""
|
||||
|
||||
def get_steps(self) -> str:
|
||||
"""Get all steps for this worker.
|
||||
|
||||
Returns:
|
||||
Complete steps text
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
return self.manager.get_steps(self.id)
|
||||
else:
|
||||
return "\n".join(self._steps_buffer)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting steps for worker {self.id}: {e}")
|
||||
return ""
|
||||
|
||||
def update_progress(self, progress: str = "", details: str = "") -> None:
|
||||
"""Update worker progress.
|
||||
|
||||
Args:
|
||||
progress: Progress string (e.g., "50%")
|
||||
details: Additional details
|
||||
"""
|
||||
self.progress = progress
|
||||
self.details = details
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.update_worker(self.id, progress, details)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating worker {self.id}: {e}")
|
||||
|
||||
def finish(self, result: str = "completed", message: str = "") -> None:
|
||||
"""Mark worker as finished.
|
||||
|
||||
Args:
|
||||
result: Result status ('completed', 'error', 'cancelled')
|
||||
message: Result message/error details
|
||||
"""
|
||||
self.result = result
|
||||
self.status = "finished"
|
||||
self.error_message = message
|
||||
try:
|
||||
if self.manager:
|
||||
# Flush and disable logging handler before marking finished
|
||||
self.manager.disable_logging_for_worker(self.id)
|
||||
# Then mark as finished in database
|
||||
self.manager.finish_worker(self.id, result, message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error finishing worker {self.id}: {e}")
|
||||
|
||||
|
||||
class WorkerLoggingHandler(logging.StreamHandler):
|
||||
"""Custom logging handler that captures logs for a worker."""
|
||||
|
||||
def __init__(self, worker_id: str, db: FolderDB,
|
||||
manager: Optional['WorkerManager'] = None,
|
||||
buffer_size: int = 50):
|
||||
"""Initialize the handler.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to capture logs for
|
||||
db: Reference to LocalLibraryDB for storing logs
|
||||
buffer_size: Number of logs to buffer before flushing to DB
|
||||
"""
|
||||
super().__init__()
|
||||
self.worker_id = worker_id
|
||||
self.db = db
|
||||
self.manager = manager
|
||||
self.buffer_size = buffer_size
|
||||
self.buffer = []
|
||||
self._lock = Lock()
|
||||
|
||||
# Set a format that includes timestamp and level
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
self.setFormatter(formatter)
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record."""
|
||||
try:
|
||||
# Try to format the record normally
|
||||
try:
|
||||
msg = self.format(record)
|
||||
except (TypeError, ValueError):
|
||||
# If formatting fails (e.g., %d format with non-int arg),
|
||||
# build message manually without calling getMessage()
|
||||
try:
|
||||
# Try to format with args if possible
|
||||
if record.args:
|
||||
msg = record.msg % record.args
|
||||
else:
|
||||
msg = record.msg
|
||||
except (TypeError, ValueError):
|
||||
# If that fails too, just use the raw message string
|
||||
msg = str(record.msg)
|
||||
|
||||
# Add timestamp and level if not already in message
|
||||
import time
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
|
||||
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
|
||||
|
||||
with self._lock:
|
||||
self.buffer.append(msg)
|
||||
|
||||
# Flush to DB when buffer reaches size
|
||||
if len(self.buffer) >= self.buffer_size:
|
||||
self._flush()
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
def _flush(self):
|
||||
"""Flush buffered logs to database."""
|
||||
if self.buffer:
|
||||
log_text = '\n'.join(self.buffer)
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||
else:
|
||||
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||
except Exception as e:
|
||||
# If we can't write to DB, at least log it
|
||||
log(f"Error flushing worker logs: {e}")
|
||||
self.buffer = []
|
||||
|
||||
def flush(self):
|
||||
"""Flush any buffered records."""
|
||||
with self._lock:
|
||||
self._flush()
|
||||
super().flush()
|
||||
|
||||
def close(self):
|
||||
"""Close the handler."""
|
||||
self.flush()
|
||||
super().close()
|
||||
|
||||
|
||||
class WorkerManager:
|
||||
"""Manages persistent worker tasks with auto-refresh capability."""
|
||||
|
||||
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
|
||||
"""Initialize the worker manager.
|
||||
|
||||
Args:
|
||||
library_root: Root directory for the local library database
|
||||
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
|
||||
"""
|
||||
self.library_root = Path(library_root)
|
||||
self.db = FolderDB(library_root)
|
||||
self.auto_refresh_interval = auto_refresh_interval
|
||||
self.refresh_callbacks: List[Callable] = []
|
||||
self.refresh_thread: Optional[Thread] = None
|
||||
self._stop_refresh = False
|
||||
self._lock = Lock()
|
||||
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
|
||||
self._worker_last_step: Dict[str, str] = {}
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
if self.db:
|
||||
try:
|
||||
self.db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit - close database."""
|
||||
self.close()
|
||||
|
||||
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
|
||||
"""Register a callback to be called on worker updates.
|
||||
|
||||
Args:
|
||||
callback: Function that receives list of active workers
|
||||
"""
|
||||
with self._lock:
|
||||
self.refresh_callbacks.append(callback)
|
||||
|
||||
def expire_running_workers(
|
||||
self,
|
||||
older_than_seconds: int = 300,
|
||||
worker_id_prefix: Optional[str] = None,
|
||||
reason: Optional[str] = None,
|
||||
status: str = "error",
|
||||
) -> int:
|
||||
"""Mark stale running workers as finished.
|
||||
|
||||
Args:
|
||||
older_than_seconds: Idle threshold before expiring.
|
||||
worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
|
||||
reason: Error message if none already exists.
|
||||
status: New status to apply.
|
||||
|
||||
Returns:
|
||||
Count of workers updated.
|
||||
"""
|
||||
try:
|
||||
return self.db.expire_running_workers(
|
||||
older_than_seconds=older_than_seconds,
|
||||
status=status,
|
||||
reason=reason,
|
||||
worker_id_prefix=worker_id_prefix,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def remove_refresh_callback(self, callback: Callable) -> None:
|
||||
"""Remove a refresh callback.
|
||||
|
||||
Args:
|
||||
callback: The callback function to remove
|
||||
"""
|
||||
with self._lock:
|
||||
if callback in self.refresh_callbacks:
|
||||
self.refresh_callbacks.remove(callback)
|
||||
|
||||
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
|
||||
"""Enable logging capture for a worker.
|
||||
|
||||
Creates a logging handler that captures all logs for this worker.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to capture logs for
|
||||
|
||||
Returns:
|
||||
The logging handler that was created, or None if there was an error
|
||||
"""
|
||||
try:
|
||||
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
|
||||
with self._lock:
|
||||
self.worker_handlers[worker_id] = handler
|
||||
|
||||
# Add the handler to the root logger so it captures all logs
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addHandler(handler)
|
||||
root_logger.setLevel(logging.DEBUG) # Capture all levels
|
||||
|
||||
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
|
||||
return handler
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def disable_logging_for_worker(self, worker_id: str) -> None:
|
||||
"""Disable logging capture for a worker and flush any pending logs.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to stop capturing logs for
|
||||
"""
|
||||
try:
|
||||
with self._lock:
|
||||
handler = self.worker_handlers.pop(worker_id, None)
|
||||
|
||||
if handler:
|
||||
# Flush and close the handler
|
||||
handler.flush()
|
||||
handler.close()
|
||||
|
||||
# Remove from root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.removeHandler(handler)
|
||||
|
||||
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||
|
||||
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
|
||||
description: str = "", total_steps: int = 0,
|
||||
pipe: Optional[str] = None) -> bool:
|
||||
"""Start tracking a new worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
worker_type: Type of worker (e.g., 'download', 'search', 'import')
|
||||
title: Worker title/name
|
||||
description: Worker description
|
||||
total_steps: Total number of steps for progress tracking
|
||||
pipe: Text of the originating pipe/prompt, if any
|
||||
|
||||
Returns:
|
||||
True if worker was inserted successfully
|
||||
"""
|
||||
try:
|
||||
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
|
||||
if result > 0:
|
||||
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
|
||||
self._start_refresh_if_needed()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
|
||||
details: str = "", error: str = "") -> bool:
|
||||
"""Update worker progress and status.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
progress: Progress percentage (0-100)
|
||||
current_step: Current step description
|
||||
details: Additional details
|
||||
error: Error message if any
|
||||
|
||||
Returns:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
kwargs = {}
|
||||
if progress > 0:
|
||||
kwargs['progress'] = progress
|
||||
if current_step:
|
||||
kwargs['current_step'] = current_step
|
||||
if details:
|
||||
kwargs['description'] = details
|
||||
if error:
|
||||
kwargs['error_message'] = error
|
||||
|
||||
if kwargs:
|
||||
kwargs['last_updated'] = datetime.now().isoformat()
|
||||
if 'current_step' in kwargs and kwargs['current_step']:
|
||||
self._worker_last_step[worker_id] = str(kwargs['current_step'])
|
||||
return self.db.update_worker(worker_id, **kwargs)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def finish_worker(self, worker_id: str, result: str = "completed",
|
||||
error_msg: str = "", result_data: str = "") -> bool:
|
||||
"""Mark a worker as finished.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
result: Result status ('completed', 'error', 'cancelled')
|
||||
error_msg: Error message if any
|
||||
result_data: Result data as JSON string
|
||||
|
||||
Returns:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
kwargs = {
|
||||
'status': result,
|
||||
'completed_at': datetime.now().isoformat()
|
||||
}
|
||||
if error_msg:
|
||||
kwargs['error_message'] = error_msg
|
||||
if result_data:
|
||||
kwargs['result_data'] = result_data
|
||||
|
||||
success = self.db.update_worker(worker_id, **kwargs)
|
||||
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
|
||||
self._worker_last_step.pop(worker_id, None)
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def get_active_workers(self) -> List[Dict[str, Any]]:
|
||||
"""Get all active (running) workers.
|
||||
|
||||
Returns:
|
||||
List of active worker dictionaries
|
||||
"""
|
||||
try:
|
||||
return self.db.get_active_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""Get all finished workers (completed, errored, or cancelled).
|
||||
|
||||
Args:
|
||||
limit: Maximum number of workers to retrieve
|
||||
|
||||
Returns:
|
||||
List of finished worker dictionaries
|
||||
"""
|
||||
try:
|
||||
all_workers = self.db.get_all_workers(limit=limit)
|
||||
# Filter to only finished workers
|
||||
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
|
||||
return finished
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a specific worker's data.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Worker data or None if not found
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
|
||||
"""Fetch recorded worker timeline events."""
|
||||
return self.db.get_worker_events(worker_id, limit)
|
||||
|
||||
def log_step(self, worker_id: str, step_text: str) -> bool:
|
||||
"""Log a step to a worker's step history.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
step_text: Step description to log
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
success = self.db.append_worker_steps(worker_id, step_text)
|
||||
if success:
|
||||
self._worker_last_step[worker_id] = step_text
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _get_last_step(self, worker_id: str) -> Optional[str]:
|
||||
"""Return the most recent step description for a worker."""
|
||||
return self._worker_last_step.get(worker_id)
|
||||
|
||||
def get_steps(self, worker_id: str) -> str:
|
||||
"""Get step logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Steps text or empty string if not found
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker_steps(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
|
||||
return ''
|
||||
|
||||
def start_auto_refresh(self) -> None:
|
||||
"""Start the auto-refresh thread for periodic worker updates."""
|
||||
if self.auto_refresh_interval <= 0:
|
||||
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
|
||||
return
|
||||
|
||||
if self.refresh_thread and self.refresh_thread.is_alive():
|
||||
logger.debug("[WorkerManager] Auto-refresh already running")
|
||||
return
|
||||
|
||||
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
|
||||
self._stop_refresh = False
|
||||
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
|
||||
self.refresh_thread.start()
|
||||
|
||||
def stop_auto_refresh(self) -> None:
|
||||
"""Stop the auto-refresh thread."""
|
||||
logger.info("[WorkerManager] Stopping auto-refresh")
|
||||
self._stop_refresh = True
|
||||
if self.refresh_thread:
|
||||
self.refresh_thread.join(timeout=5)
|
||||
self.refresh_thread = None
|
||||
|
||||
def _start_refresh_if_needed(self) -> None:
|
||||
"""Start auto-refresh if we have active workers and callbacks."""
|
||||
active = self.get_active_workers()
|
||||
if active and self.refresh_callbacks and not self._stop_refresh:
|
||||
self.start_auto_refresh()
|
||||
|
||||
def _auto_refresh_loop(self) -> None:
|
||||
"""Main auto-refresh loop that periodically queries and notifies."""
|
||||
try:
|
||||
while not self._stop_refresh:
|
||||
time.sleep(self.auto_refresh_interval)
|
||||
|
||||
# Check if there are active workers
|
||||
active = self.get_active_workers()
|
||||
|
||||
if not active:
|
||||
# No more active workers, stop refreshing
|
||||
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
|
||||
break
|
||||
|
||||
# Call all registered callbacks with the active workers
|
||||
with self._lock:
|
||||
for callback in self.refresh_callbacks:
|
||||
try:
|
||||
callback(active)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
|
||||
finally:
|
||||
logger.debug("[WorkerManager] Auto-refresh loop ended")
|
||||
|
||||
def cleanup_old_workers(self, days: int = 7) -> int:
|
||||
"""Clean up completed/errored workers older than specified days.
|
||||
|
||||
Args:
|
||||
days: Delete workers completed more than this many days ago
|
||||
|
||||
Returns:
|
||||
Number of workers deleted
|
||||
"""
|
||||
try:
|
||||
count = self.db.cleanup_old_workers(days)
|
||||
if count > 0:
|
||||
logger.info(f"[WorkerManager] Cleaned up {count} old workers")
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||
"""Append text to a worker's stdout log.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
text: Text to append
|
||||
channel: Logical channel (stdout, stderr, log, etc.)
|
||||
|
||||
Returns:
|
||||
True if append was successful
|
||||
"""
|
||||
try:
|
||||
step_label = self._get_last_step(worker_id)
|
||||
return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def get_stdout(self, worker_id: str) -> str:
|
||||
"""Get stdout logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Worker's stdout or empty string
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker_stdout(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
|
||||
return ""
|
||||
|
||||
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||
"""Compatibility wrapper for append_stdout."""
|
||||
return self.append_stdout(worker_id, text, channel=channel)
|
||||
|
||||
def clear_stdout(self, worker_id: str) -> bool:
|
||||
"""Clear stdout logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
True if clear was successful
|
||||
"""
|
||||
try:
|
||||
return self.db.clear_worker_stdout(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the worker manager and database connection."""
|
||||
self.stop_auto_refresh()
|
||||
self.db.close()
|
||||
logger.info("[WorkerManager] Closed")
|
||||
Reference in New Issue
Block a user