This commit is contained in:
nose
2025-12-11 12:47:30 -08:00
parent 6b05dc5552
commit 65d12411a2
92 changed files with 17447 additions and 14308 deletions

View File

@@ -555,7 +555,7 @@ class UnifiedBookDownloader:
This follows the exact process from archive_client.py:
1. Login with credentials
2. Call loan() to create 14-day borrow
3. Get book info (extract page URLs)
3. Get book info (extract page url)
4. Download all pages as images
5. Merge images into searchable PDF
@@ -576,10 +576,10 @@ class UnifiedBookDownloader:
# If we get here, borrowing succeeded
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
# Now get the book info (page URLs and metadata)
# Now get the book info (page url and metadata)
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
# Try both URL formats: with /borrow and without
book_urls = [
book_url = [
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
f"https://archive.org/details/{book_id}" # Fallback to details page
]
@@ -589,7 +589,7 @@ class UnifiedBookDownloader:
metadata = None
last_error = None
for book_url in book_urls:
for book_url in book_url:
try:
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
response = session.get(book_url, timeout=10)
@@ -611,7 +611,7 @@ class UnifiedBookDownloader:
continue
if links is None:
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}")
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
# Borrow extraction failed - return False
return False, "Could not extract borrowed book pages"