AST

2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions
--- a/helper/unified_book_downloader.py
+++ b/helper/unified_book_downloader.py
@@ -0,0 +1,706 @@
+"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
+
+This module provides a single interface for downloading books from multiple sources:
+1. Try Archive.org direct download (if available)
+2. Try Archive.org borrowing (if user has credentials)
+3. Fallback to Libgen search by ISBN
+4. Attempt Libgen download
+
+All sources integrated with proper metadata scraping and error handling.
+"""
+
+import logging
+import asyncio
+import requests
+from typing import Optional, Dict, Any, Tuple, List, Callable, cast
+from pathlib import Path
+
+from helper.logger import debug
+
+logger = logging.getLogger(__name__)
+
+
+class UnifiedBookDownloader:
+    """Unified interface for downloading books from multiple sources."""
+    
+    def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
+        """Initialize the unified book downloader.
+        
+        Args:
+            config: Configuration dict with credentials
+            output_dir: Default output directory
+        """
+        self.config = config or {}
+        self.output_dir = output_dir
+        self.session = requests.Session()
+        
+        # Import download functions from their modules
+        self._init_downloaders()
+    
+    def _init_downloaders(self) -> None:
+        """Initialize downloader functions from their modules."""
+        try:
+            from helper.archive_client import (
+                check_direct_download,
+                get_openlibrary_by_isbn,
+                loan
+            )
+            self.check_direct_download = check_direct_download
+            self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
+            self.loan_func = loan
+            logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
+        except Exception as e:
+            logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
+            self.check_direct_download = None
+            self.get_openlibrary_by_isbn = None
+            self.loan_func = None
+        
+        try:
+            from helper.libgen_service import (
+                DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
+                download_from_mirror as _libgen_download,
+                search_libgen as _libgen_search,
+            )
+
+            def _log_info(message: str) -> None:
+                debug(f"[UnifiedBookDownloader] {message}")
+
+            def _log_error(message: str) -> None:
+                logger.error(f"[UnifiedBookDownloader] {message}")
+
+            self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
+                query,
+                limit=limit,
+                log_info=_log_info,
+                log_error=_log_error,
+            )
+            self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
+                mirror_url,
+                output_path,
+                log_info=_log_info,
+                log_error=_log_error,
+            )
+            logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
+        except Exception as e:
+            logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
+            self.search_libgen = None
+            self.download_from_mirror = None
+    
+    def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Get all available download options for a book.
+        
+        Checks in priority order:
+        1. Archive.org direct download (public domain)
+        2. Archive.org borrowing (if credentials available and book is borrowable)
+        3. Libgen fallback (by ISBN)
+        
+        Args:
+            book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
+            
+        Returns:
+            Dict with available download methods and metadata
+        """
+        options = {
+            'book_title': book_data.get('title', 'Unknown'),
+            'book_author': book_data.get('author', 'Unknown'),
+            'isbn': book_data.get('isbn', ''),
+            'openlibrary_id': book_data.get('openlibrary_id', ''),
+            'methods': [],  # Will be sorted by priority
+            'metadata': {}
+        }
+        
+        # Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
+        ol_id = book_data.get('openlibrary_id', '')
+        book_id = None
+        
+        if ol_id.startswith('OL') and len(ol_id) > 2:
+            # Remove 'OL' prefix (keep everything after it including the suffix letter)
+            # The book_id is all digits after 'OL'
+            book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
+            
+            # PRIORITY 1: Check direct download (fastest, no auth needed)
+            if self.check_direct_download:
+                try:
+                    can_download, pdf_url = self.check_direct_download(book_id)
+                    if can_download:
+                        options['methods'].append({
+                            'type': 'archive.org_direct',
+                            'label': 'Archive.org Direct Download',
+                            'requires_auth': False,
+                            'pdf_url': pdf_url,
+                            'book_id': book_id,
+                            'priority': 1  # Highest priority
+                        })
+                        logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
+                except Exception as e:
+                    logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
+            
+            # PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
+            # First verify the book is actually lendable via OpenLibrary API
+            if self._has_archive_credentials():
+                is_lendable, status = self._check_book_lendable_status(ol_id)
+                
+                if is_lendable:
+                    options['methods'].append({
+                        'type': 'archive.org_borrow',
+                        'label': 'Archive.org Borrow',
+                        'requires_auth': True,
+                        'book_id': book_id,
+                        'priority': 2  # Second priority
+                    })
+                    logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
+                else:
+                    logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
+        
+        # PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
+        isbn = book_data.get('isbn', '')
+        title = book_data.get('title', '')
+        author = book_data.get('author', '')
+        
+        if self.search_libgen:
+            # Can use Libgen if we have ISBN OR title (or both)
+            if isbn or title:
+                options['methods'].append({
+                    'type': 'libgen',
+                    'label': 'Libgen Search & Download',
+                    'requires_auth': False,
+                    'isbn': isbn,
+                    'title': title,
+                    'author': author,
+                    'priority': 3  # Third priority (fallback)
+                })
+                logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
+        
+        # Sort by priority (higher priority first)
+        options['methods'].sort(key=lambda x: x.get('priority', 999))
+        
+        return options
+    
+    def _has_archive_credentials(self) -> bool:
+        """Check if Archive.org credentials are available."""
+        try:
+            from helper.archive_client import credential_openlibrary
+            email, password = credential_openlibrary(self.config)
+            return bool(email and password)
+        except Exception:
+            return False
+    
+    def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
+        """Check if a book is lendable via OpenLibrary API.
+        
+        Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
+        Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
+        
+        Args:
+            ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
+            
+        Returns:
+            Tuple of (is_lendable: bool, status_reason: Optional[str])
+        """
+        try:
+            if not ol_id.startswith('OL'):
+                return False, "Invalid OpenLibrary ID format"
+            
+            # If this is a Work ID (ends with W), we can't query Volumes API
+            # Work IDs are abstract umbrella records, not specific editions
+            if ol_id.endswith('W'):
+                logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
+                return False, "Work ID not supported by Volumes API (not a specific edition)"
+            
+            # If it ends with M, it's an Edition ID - proceed with query
+            if not ol_id.endswith('M'):
+                logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
+                return False, "Invalid OpenLibrary ID type"
+            
+            url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
+            response = self.session.get(url, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            
+            # Empty response means no records found
+            if not data:
+                logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
+                return False, "No availability data found"
+            
+            # The response is wrapped in OLID key
+            olid_key = f"OLID:{ol_id}"
+            if olid_key not in data:
+                logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
+                return False, "No availability data found"
+            
+            olid_data = data[olid_key]
+            
+            # Check items array for lendable status
+            if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
+                items = olid_data['items']
+                
+                # Check the first item for lending status
+                first_item = items[0]
+                
+                # Handle both dict and string representations (PowerShell converts to string)
+                if isinstance(first_item, dict):
+                    status = first_item.get('status', '')
+                else:
+                    # String representation - check if 'lendable' is in it
+                    status = str(first_item).lower()
+                
+                is_lendable = 'lendable' in str(status).lower()
+                
+                if is_lendable:
+                    logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
+                    return True, "LENDABLE"
+                else:
+                    status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
+                    logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
+                    return False, status_str
+            else:
+                # No items array or empty
+                logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
+                return False, "Not available for lending"
+                
+        except requests.exceptions.Timeout:
+            logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
+            return False, "API timeout"
+        except Exception as e:
+            logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
+            return False, f"API error"
+    
+    
+    async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
+        """Download a book using the specified method.
+        
+        Args:
+            method: Download method dict from get_download_options()
+            output_dir: Directory to save the book
+            
+        Returns:
+            Tuple of (success: bool, message: str)
+        """
+        output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
+        method_type = method.get('type', '')
+        
+        logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
+        
+        try:
+            if method_type == 'archive.org_direct':
+                return await self._download_archive_direct(method, output_dir)
+            
+            elif method_type == 'archive.org_borrow':
+                return await self._download_archive_borrow(method, output_dir)
+            
+            elif method_type == 'libgen':
+                return await self._download_libgen(method, output_dir)
+            
+            else:
+                return False, f"Unknown download method: {method_type}"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
+            return False, f"Download failed: {str(e)}"
+    
+    async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download directly from Archive.org."""
+        try:
+            pdf_url = method.get('pdf_url', '')
+            book_id = method.get('book_id', '')
+            
+            if not pdf_url:
+                return False, "No PDF URL available"
+            
+            # Determine output filename
+            filename = f"{book_id}.pdf"
+            output_path = Path(output_dir) / filename
+            
+            logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
+            
+            # Download in a thread to avoid blocking
+            loop = asyncio.get_event_loop()
+            success = await loop.run_in_executor(
+                None,
+                self._download_file,
+                pdf_url,
+                str(output_path)
+            )
+            
+            if success:
+                logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
+                return True, f"Downloaded to: {output_path}"
+            else:
+                return False, "Failed to download PDF"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
+            return False, f"Archive download failed: {str(e)}"
+    
+    async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download via Archive.org borrowing (requires credentials).
+        
+        Process (follows archive_client.py pattern):
+        1. Login to Archive.org with credentials
+        2. Call loan endpoint to borrow the book (14-day loan)
+        3. Get book info (page links, metadata)
+        4. Download all pages as images
+        5. Merge images into PDF
+        
+        The loan function from archive_client.py handles:
+        - Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
+        - Creating borrow token for access
+        - Handling borrow failures
+        
+        get_book_infos() extracts page links from the borrowed book viewer
+        download() downloads all pages using thread pool
+        img2pdf merges pages into searchable PDF
+        """
+        try:
+            from helper.archive_client import credential_openlibrary
+            
+            book_id = method.get('book_id', '')
+            
+            # Get credentials
+            email, password = credential_openlibrary(self.config)
+            if not email or not password:
+                return False, "Archive.org credentials not configured"
+            
+            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
+            
+            # Login and borrow (in thread, following download_book.py pattern)
+            loop = asyncio.get_event_loop()
+            borrow_result = await loop.run_in_executor(
+                None,
+                self._archive_borrow_and_download,
+                email,
+                password,
+                book_id,
+                output_dir
+            )
+            
+            if borrow_result and isinstance(borrow_result, tuple):
+                success, filepath = borrow_result
+                if success:
+                    logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
+                    return True, filepath
+                else:
+                    logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
+                    return False, filepath
+            else:
+                return False, "Failed to borrow book from Archive.org"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
+            return False, f"Archive borrow failed: {str(e)}"
+    
+    async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
+        """Download via Libgen search and download with mirror fallback."""
+        try:
+            isbn = method.get('isbn', '')
+            title = method.get('title', '')
+            
+            if not isbn and not title:
+                return False, "Need ISBN or title for Libgen search"
+            
+            if not self.search_libgen:
+                return False, "Libgen searcher not available"
+            
+            # Define wrapper functions to safely call the methods
+            search_func = self.search_libgen
+            if search_func is None:
+                return False, "Search function not available"
+            
+            preloaded_results = method.get('results')
+            loop = asyncio.get_event_loop()
+
+            if preloaded_results:
+                results = list(preloaded_results)
+                if not results:
+                    results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
+            else:
+                results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
+            
+            if not results:
+                logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
+                return False, f"No Libgen results found for: {isbn or title}"
+            
+            logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
+            
+            # Determine output filename (use first result for naming)
+            first_result = results[0]
+            filename = f"{first_result.get('title', 'book')}"
+            filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
+            
+            # Try each result's mirror until one succeeds
+            for idx, result in enumerate(results, 1):
+                mirror_url = result.get('mirror_url', '')
+                
+                if not mirror_url:
+                    logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
+                    continue
+                
+                # Use extension from this result if available
+                extension = result.get('extension', 'pdf')
+                if extension and not extension.startswith('.'):
+                    extension = f".{extension}"
+                elif not extension:
+                    extension = '.pdf'
+                
+                output_path = Path(output_dir) / (filename + extension)
+                
+                logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
+                
+                download_func = self.download_from_mirror
+                if download_func is None:
+                    return False, "Download function not available"
+                
+                download_callable = cast(Callable[[str, str], bool], download_func)
+
+                def download_wrapper():
+                    return download_callable(mirror_url, str(output_path))
+                
+                # Download (in thread)
+                try:
+                    success = await loop.run_in_executor(None, download_wrapper)
+                    
+                    if success:
+                        # Validate downloaded file is not HTML (common Libgen issue)
+                        if output_path.exists():
+                            try:
+                                with open(output_path, 'rb') as f:
+                                    file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
+                                    if '<!doctype' in file_start or '<html' in file_start:
+                                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
+                                        output_path.unlink()  # Delete the HTML file
+                                        continue
+                            except Exception as e:
+                                logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
+                        
+                        logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
+                        return True, str(output_path)
+                    else:
+                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
+                except Exception as e:
+                    logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
+                    continue
+            
+            return False, f"All {len(results)} mirrors failed"
+        
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
+            return False, f"Libgen download failed: {str(e)}"
+
+    async def download_libgen_selection(
+        self,
+        selected: Dict[str, Any],
+        remaining: Optional[List[Dict[str, Any]]] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[bool, str]:
+        """Download a specific Libgen result with optional fallbacks."""
+
+        if not isinstance(selected, dict):
+            return False, "Selected result must be a dictionary"
+
+        ordered_results: List[Dict[str, Any]] = [selected]
+        if remaining:
+            for item in remaining:
+                if isinstance(item, dict) and item is not selected:
+                    ordered_results.append(item)
+
+        method: Dict[str, Any] = {
+            'type': 'libgen',
+            'isbn': selected.get('isbn', '') or '',
+            'title': selected.get('title', '') or '',
+            'author': selected.get('author', '') or '',
+            'results': ordered_results,
+        }
+
+        return await self.download_book(method, output_dir)
+
+    def download_libgen_selection_sync(
+        self,
+        selected: Dict[str, Any],
+        remaining: Optional[List[Dict[str, Any]]] = None,
+        output_dir: Optional[str] = None,
+    ) -> Tuple[bool, str]:
+        """Synchronous helper for downloading a Libgen selection."""
+
+        async def _run() -> Tuple[bool, str]:
+            return await self.download_libgen_selection(selected, remaining, output_dir)
+
+        loop = asyncio.new_event_loop()
+        try:
+            asyncio.set_event_loop(loop)
+            return loop.run_until_complete(_run())
+        finally:
+            loop.close()
+            asyncio.set_event_loop(None)
+    
+    def _download_file(self, url: str, output_path: str) -> bool:
+        """Download a file from URL."""
+        try:
+            response = requests.get(url, stream=True, timeout=30)
+            response.raise_for_status()
+            
+            with open(output_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            
+            return True
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] File download error: {e}")
+            return False
+    
+    def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
+        """Borrow a book from Archive.org and download pages as PDF.
+        
+        This follows the exact process from archive_client.py:
+        1. Login with credentials
+        2. Call loan() to create 14-day borrow
+        3. Get book info (extract page URLs)
+        4. Download all pages as images
+        5. Merge images into searchable PDF
+        
+        Returns tuple of (success: bool, filepath/message: str)
+        """
+        try:
+            from helper.archive_client import login, loan, get_book_infos, download
+            import tempfile
+            import shutil
+            
+            logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
+            session = login(email, password)
+            
+            logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
+            # Call loan to create the 14-day borrow
+            session = loan(session, book_id, verbose=True)
+            
+            # If we get here, borrowing succeeded
+            logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
+            
+            # Now get the book info (page URLs and metadata)
+            logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
+            # Try both URL formats: with /borrow and without
+            book_urls = [
+                f"https://archive.org/borrow/{book_id}",  # Try borrow page first (for borrowed books)
+                f"https://archive.org/details/{book_id}"   # Fallback to details page
+            ]
+            
+            title = None
+            links = None
+            metadata = None
+            last_error = None
+            
+            for book_url in book_urls:
+                try:
+                    logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
+                    response = session.get(book_url, timeout=10)
+                    
+                    # Log response status
+                    if response.status_code != 200:
+                        logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
+                        # Continue to try next URL
+                        continue
+                    
+                    # Try to parse the response
+                    title, links, metadata = get_book_infos(session, book_url)
+                    logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
+                    logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
+                    break
+                except Exception as e:
+                    logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
+                    last_error = e
+                    continue
+            
+            if links is None:
+                logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
+                # Borrow extraction failed - return False
+                return False, "Could not extract borrowed book pages"
+            
+            # Create temporary directory for images
+            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
+            logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
+            
+            try:
+                # Download all pages (uses thread pool)
+                images = download(
+                    session=session,
+                    n_threads=10,
+                    directory=temp_dir,
+                    links=links,
+                    scale=3,  # Default resolution
+                    book_id=book_id
+                )
+                
+                logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
+                
+                # Try to merge pages into PDF
+                try:
+                    import img2pdf
+                    logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
+                    
+                    # Prepare PDF metadata
+                    pdfmeta = {}
+                    if metadata:
+                        if "title" in metadata:
+                            pdfmeta["title"] = metadata["title"]
+                        if "creator" in metadata:
+                            pdfmeta["author"] = metadata["creator"]
+                    pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
+                    pdfmeta["creationdate"] = None  # Avoid timezone issues
+                    
+                    # Convert images to PDF
+                    pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
+                    if not pdf_content:
+                        logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
+                        return False, "Failed to convert pages to PDF"
+                    
+                    # Save the PDF
+                    pdf_filename = f"{title}.pdf" if title else "book.pdf"
+                    pdf_path = Path(output_dir) / pdf_filename
+                    
+                    # Handle duplicate filenames
+                    i = 1
+                    while pdf_path.exists():
+                        pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
+                        i += 1
+                    
+                    with open(pdf_path, 'wb') as f:
+                        f.write(pdf_content)
+                    
+                    logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
+                    
+                    return True, str(pdf_path)
+                
+                except ImportError:
+                    logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
+                    
+                    # Create JPG collection directory
+                    if not title:
+                        title = f"book_{book_id}"
+                    jpg_dir = Path(output_dir) / title
+                    i = 1
+                    while jpg_dir.exists():
+                        jpg_dir = Path(output_dir) / f"{title}({i})"
+                        i += 1
+                    
+                    # Move temporary directory to final location
+                    shutil.move(temp_dir, str(jpg_dir))
+                    temp_dir = None  # Mark as already moved
+                    
+                    logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
+                    return True, str(jpg_dir)
+            
+            finally:
+                # Clean up temporary directory if it still exists
+                if temp_dir and Path(temp_dir).exists():
+                    shutil.rmtree(temp_dir)
+        
+        except SystemExit:
+            # loan() function calls sys.exit on failure - catch it
+            logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
+            return False, "Book could not be borrowed (may not be available for borrowing)"
+        except Exception as e:
+            logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
+            return False, f"Borrow failed: {str(e)}"
+    
+    def close(self) -> None:
+        """Close the session."""
+        self.session.close()