jkjnkjkllkjjk
This commit is contained in:
76
CLI.py
76
CLI.py
@@ -676,8 +676,8 @@ def _create_cmdlet_cli():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from helper.hydrus import get_client
|
from helper.hydrus import get_client
|
||||||
get_client(config) # Pre-acquire and cache session key
|
# get_client(config) # Pre-acquire and cache session key
|
||||||
debug("✓ Hydrus session key acquired")
|
# debug("✓ Hydrus session key acquired")
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
# Hydrus is not available - this is expected and normal
|
# Hydrus is not available - this is expected and normal
|
||||||
# Don't show a message, just continue without it
|
# Don't show a message, just continue without it
|
||||||
@@ -697,6 +697,78 @@ def _create_cmdlet_cli():
|
|||||||
initialize_hydrus_health_check(config)
|
initialize_hydrus_health_check(config)
|
||||||
initialize_matrix_health_check(config)
|
initialize_matrix_health_check(config)
|
||||||
initialize_local_library_scan(config)
|
initialize_local_library_scan(config)
|
||||||
|
|
||||||
|
# --- Startup File Counts ---
|
||||||
|
# Count Local Files
|
||||||
|
try:
|
||||||
|
from helper.file_storage import LocalStorageBackend
|
||||||
|
from config import get_local_storage_path
|
||||||
|
storage_path = get_local_storage_path(config)
|
||||||
|
if storage_path:
|
||||||
|
# Use LocalStorageBackend to perform the search as requested
|
||||||
|
# Pass a large limit to get all files
|
||||||
|
storage = LocalStorageBackend(location=storage_path)
|
||||||
|
local_files = storage.search("*", limit=100000)
|
||||||
|
print(f"Local: {len(local_files)}")
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"⚠ Could not count local files: {e}")
|
||||||
|
|
||||||
|
# Count Hydrus Files (if available)
|
||||||
|
from hydrus_health_check import is_hydrus_available
|
||||||
|
if is_hydrus_available():
|
||||||
|
try:
|
||||||
|
from helper.hydrus import get_client
|
||||||
|
client = get_client(config)
|
||||||
|
# Hydrus search for all files
|
||||||
|
# search_files returns IDs.
|
||||||
|
response = client.search_files(["system:everything"])
|
||||||
|
hydrus_ids = response.get("file_ids", [])
|
||||||
|
print(f"Hydrus: {len(hydrus_ids)}")
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"⚠ Could not count Hydrus files: {e}")
|
||||||
|
|
||||||
|
# Count Debrid Magnets (if available)
|
||||||
|
try:
|
||||||
|
from config import get_api_key
|
||||||
|
from helper.alldebrid import AllDebridClient
|
||||||
|
|
||||||
|
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
|
||||||
|
if api_key:
|
||||||
|
# Use AllDebridClient to get magnets
|
||||||
|
# We can use magnet_status with ID or just list active magnets if there's an endpoint
|
||||||
|
# The magnet/status endpoint without ID returns all magnets
|
||||||
|
# But helper/alldebrid.py magnet_status requires ID.
|
||||||
|
# Let's check if we can use the client directly to call magnet/status without ID
|
||||||
|
# Or if there is a method for it.
|
||||||
|
# Looking at alldebrid.py, magnet_status takes magnet_id.
|
||||||
|
# But the API docs say /magnet/status returns all magnets if no ID provided?
|
||||||
|
# Actually, usually /magnet/status requires ID or 'all' or something.
|
||||||
|
# Let's try to use the client's _request method if possible, or instantiate client.
|
||||||
|
|
||||||
|
# We'll instantiate client and try to list magnets.
|
||||||
|
# Since magnet_status in helper requires ID, we might need to bypass it or add a method.
|
||||||
|
# But wait, let's check if we can just use the raw request via client.
|
||||||
|
|
||||||
|
client = AllDebridClient(api_key)
|
||||||
|
# The helper class doesn't expose a "list all" method easily,
|
||||||
|
# but we can try calling _request directly if we access it, or add a method.
|
||||||
|
# Accessing protected member _request is okay for this CLI script.
|
||||||
|
|
||||||
|
# API: /magnet/status
|
||||||
|
resp = client._request('magnet/status')
|
||||||
|
if resp.get('status') == 'success':
|
||||||
|
data = resp.get('data', {})
|
||||||
|
magnets = data.get('magnets', [])
|
||||||
|
if isinstance(magnets, list):
|
||||||
|
print(f"Debrid: {len(magnets)}")
|
||||||
|
elif isinstance(magnets, dict):
|
||||||
|
# Sometimes it returns a dict if single item? Or dict of magnets?
|
||||||
|
print(f"Debrid: {len(magnets)}")
|
||||||
|
except Exception as e:
|
||||||
|
# Don't show error if just not configured or failed
|
||||||
|
# debug(f"⚠ Could not count Debrid magnets: {e}")
|
||||||
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug(f"⚠ Could not check service availability: {e}")
|
debug(f"⚠ Could not check service availability: {e}")
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ class SharedArgs:
|
|||||||
STORAGE = CmdletArg(
|
STORAGE = CmdletArg(
|
||||||
"storage",
|
"storage",
|
||||||
type="enum",
|
type="enum",
|
||||||
choices=["hydrus", "local", "debrid", "ftp"],
|
choices=["hydrus", "local", "debrid", "ftp", "matrix"],
|
||||||
required=False,
|
required=False,
|
||||||
description="Storage location or destination for saving/uploading files.",
|
description="Storage location or destination for saving/uploading files.",
|
||||||
alias="s",
|
alias="s",
|
||||||
@@ -268,6 +268,7 @@ class SharedArgs:
|
|||||||
'hydrus': Path.home() / ".hydrus" / "client_files",
|
'hydrus': Path.home() / ".hydrus" / "client_files",
|
||||||
'debrid': Path.home() / "Debrid",
|
'debrid': Path.home() / "Debrid",
|
||||||
'ftp': Path.home() / "FTP",
|
'ftp': Path.home() / "FTP",
|
||||||
|
'matrix': Path.home() / "Matrix", # Placeholder, not used for upload path
|
||||||
}
|
}
|
||||||
|
|
||||||
if storage_value is None:
|
if storage_value is None:
|
||||||
|
|||||||
@@ -541,8 +541,36 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
# Map provider 0x0 to storage 0x0 for download-data
|
# Map provider 0x0 to storage 0x0 for download-data
|
||||||
if provider_name == "0x0":
|
if provider_name == "0x0":
|
||||||
dl_args.extend(["-storage", "0x0"])
|
dl_args.extend(["-storage", "0x0"])
|
||||||
|
|
||||||
return dl_module._run(result, dl_args, config)
|
# Capture results from download-data so we can add them to DB
|
||||||
|
captured_results = []
|
||||||
|
original_emit = ctx.emit
|
||||||
|
|
||||||
|
def capture_emit(obj):
|
||||||
|
captured_results.append(obj)
|
||||||
|
original_emit(obj)
|
||||||
|
|
||||||
|
ctx.emit = capture_emit
|
||||||
|
|
||||||
|
try:
|
||||||
|
ret_code = dl_module._run(result, dl_args, config)
|
||||||
|
finally:
|
||||||
|
ctx.emit = original_emit
|
||||||
|
|
||||||
|
if ret_code != 0:
|
||||||
|
return ret_code
|
||||||
|
|
||||||
|
# Process the downloaded files recursively to add them to DB
|
||||||
|
if captured_results:
|
||||||
|
log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr)
|
||||||
|
success_count = 0
|
||||||
|
for res in captured_results:
|
||||||
|
# Recursively call add-file with the downloaded result
|
||||||
|
if _run(res, _args, config) == 0:
|
||||||
|
success_count += 1
|
||||||
|
return 0 if success_count > 0 else 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
if media_path is None:
|
if media_path is None:
|
||||||
log("File path could not be resolved")
|
log("File path could not be resolved")
|
||||||
@@ -609,13 +637,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Handle storage-based operations (location is not None here)
|
# Handle storage-based operations (location is not None here)
|
||||||
valid_locations = {'hydrus', 'local'}
|
valid_locations = {'hydrus', 'local', 'matrix'}
|
||||||
is_valid_location = location in valid_locations
|
is_valid_location = location in valid_locations
|
||||||
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
|
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
|
||||||
|
|
||||||
if not (is_valid_location or is_local_path):
|
if not (is_valid_location or is_local_path):
|
||||||
log(f"❌ Invalid location: {location}")
|
log(f"❌ Invalid location: {location}")
|
||||||
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if location == 'local':
|
if location == 'local':
|
||||||
@@ -704,6 +732,36 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
|
|
||||||
return exit_code
|
return exit_code
|
||||||
|
|
||||||
|
elif location == 'matrix':
|
||||||
|
log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
result_url = storage["matrix"].upload(media_path, config=config)
|
||||||
|
log(f"Matrix: {result_url}", file=sys.stderr)
|
||||||
|
|
||||||
|
result_dict = create_pipe_object_result(
|
||||||
|
source='matrix',
|
||||||
|
identifier=result_url,
|
||||||
|
file_path=str(media_path),
|
||||||
|
cmdlet_name='add-file',
|
||||||
|
title=media_path.name,
|
||||||
|
target=result_url
|
||||||
|
)
|
||||||
|
ctx.emit(result_dict)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if delete_after_upload:
|
||||||
|
try:
|
||||||
|
media_path.unlink()
|
||||||
|
_cleanup_sidecar_files(media_path)
|
||||||
|
log(f"✅ Deleted file and sidecar", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
# location == 'hydrus'
|
# location == 'hydrus'
|
||||||
# Compute file hash to check if already in Hydrus
|
# Compute file hash to check if already in Hydrus
|
||||||
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
|
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
|
||||||
|
|||||||
@@ -1594,6 +1594,25 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
|||||||
if not urls_to_download and files_downloaded_directly == 0:
|
if not urls_to_download and files_downloaded_directly == 0:
|
||||||
debug(f"No downloadable URLs found")
|
debug(f"No downloadable URLs found")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
# Deduplicate URLs while preserving order
|
||||||
|
unique_urls = []
|
||||||
|
seen_keys = set()
|
||||||
|
|
||||||
|
for u in urls_to_download:
|
||||||
|
key = None
|
||||||
|
if isinstance(u, dict):
|
||||||
|
key = u.get('url') or u.get('link') or u.get('target') or u.get('source_url')
|
||||||
|
if not key:
|
||||||
|
key = str(u)
|
||||||
|
else:
|
||||||
|
key = str(u)
|
||||||
|
|
||||||
|
if key and key not in seen_keys:
|
||||||
|
seen_keys.add(key)
|
||||||
|
unique_urls.append(u)
|
||||||
|
|
||||||
|
urls_to_download = unique_urls
|
||||||
|
|
||||||
debug(f"Processing {len(urls_to_download)} URL(s)")
|
debug(f"Processing {len(urls_to_download)} URL(s)")
|
||||||
for i, u in enumerate(urls_to_download, 1):
|
for i, u in enumerate(urls_to_download, 1):
|
||||||
@@ -1749,6 +1768,108 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
|||||||
debug(f" ✗ Error while borrowing: {e}")
|
debug(f" ✗ Error while borrowing: {e}")
|
||||||
exit_code = 1
|
exit_code = 1
|
||||||
continue
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
# Check for BookNotAvailableError (imported dynamically or by name)
|
||||||
|
if type(e).__name__ == 'BookNotAvailableError':
|
||||||
|
debug(f" ⚠ Book is waitlisted/unavailable on Archive.org")
|
||||||
|
|
||||||
|
# Fallback to LibGen if ISBN is available
|
||||||
|
isbn = url.get('isbn')
|
||||||
|
if isbn:
|
||||||
|
debug(f" ▶ Falling back to LibGen search for ISBN: {isbn}")
|
||||||
|
from helper.search_provider import LibGenProvider
|
||||||
|
|
||||||
|
provider = LibGenProvider(config)
|
||||||
|
# Search specifically by ISBN
|
||||||
|
results = provider.search(f"isbn:{isbn}", limit=1)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
debug(f" ✓ Found {len(results)} result(s) on LibGen")
|
||||||
|
# Use the first result
|
||||||
|
libgen_result = results[0]
|
||||||
|
|
||||||
|
# Construct a new URL entry for the main loop to process
|
||||||
|
# We can't easily inject into the loop, so we'll process it here
|
||||||
|
# LibGen results from provider have 'target' as mirror URL or libgen:ID
|
||||||
|
|
||||||
|
target = libgen_result.target
|
||||||
|
debug(f" → Downloading from LibGen: {libgen_result.title}")
|
||||||
|
|
||||||
|
# We need to use the LibGen download logic.
|
||||||
|
# The easiest way is to call the UnifiedBookDownloader directly or
|
||||||
|
# delegate to the 'libgen' origin handler if we can.
|
||||||
|
# But we are inside the loop.
|
||||||
|
|
||||||
|
# Let's use UnifiedBookDownloader directly to download to final_output_dir
|
||||||
|
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||||
|
downloader = UnifiedBookDownloader(config)
|
||||||
|
|
||||||
|
# The target might be a mirror URL or libgen:ID
|
||||||
|
# UnifiedBookDownloader.download_book expects a book dict or similar?
|
||||||
|
# Actually, let's look at how 'libgen' origin is handled in the main loop.
|
||||||
|
# It uses urls_to_download.append(url_entry).
|
||||||
|
|
||||||
|
# We can just process this result right here.
|
||||||
|
# The provider result has full_metadata which is the book dict.
|
||||||
|
book_data = libgen_result.full_metadata
|
||||||
|
|
||||||
|
# Download the book
|
||||||
|
# We need to find a working mirror
|
||||||
|
mirrors = book_data.get('mirrors', {})
|
||||||
|
download_url = book_data.get('mirror_url')
|
||||||
|
|
||||||
|
if not download_url and mirrors:
|
||||||
|
# Pick first mirror
|
||||||
|
download_url = next(iter(mirrors.values()))
|
||||||
|
|
||||||
|
if download_url:
|
||||||
|
debug(f" → Mirror: {download_url}")
|
||||||
|
# Use helper.download.download_media or similar?
|
||||||
|
# UnifiedBookDownloader has download_book(book, output_dir)
|
||||||
|
|
||||||
|
# Reconstruct book dict for downloader
|
||||||
|
# It expects: title, author, year, extension, mirrors, etc.
|
||||||
|
# book_data should have most of it.
|
||||||
|
|
||||||
|
filepath = downloader.download_book(book_data, final_output_dir)
|
||||||
|
if filepath:
|
||||||
|
debug(f" ✓ Successfully downloaded from LibGen: {filepath}")
|
||||||
|
downloaded_files.append(str(filepath))
|
||||||
|
|
||||||
|
# Emit result
|
||||||
|
file_hash = _compute_file_hash(filepath)
|
||||||
|
emit_tags = ['book', 'libgen']
|
||||||
|
if isbn: emit_tags.append(f'isbn:{isbn}')
|
||||||
|
|
||||||
|
pipe_obj = create_pipe_object_result(
|
||||||
|
source='libgen',
|
||||||
|
identifier=book_data.get('md5', 'unknown'),
|
||||||
|
file_path=str(filepath),
|
||||||
|
cmdlet_name='download-data',
|
||||||
|
title=libgen_result.title,
|
||||||
|
file_hash=file_hash,
|
||||||
|
tags=emit_tags,
|
||||||
|
source_url=download_url
|
||||||
|
)
|
||||||
|
pipeline_context.emit(pipe_obj)
|
||||||
|
exit_code = 0
|
||||||
|
continue # Success!
|
||||||
|
else:
|
||||||
|
debug(f" ✗ Failed to download from LibGen")
|
||||||
|
else:
|
||||||
|
debug(f" ✗ No download URL found in LibGen result")
|
||||||
|
else:
|
||||||
|
debug(f" ✗ No results found on LibGen for ISBN: {isbn}")
|
||||||
|
else:
|
||||||
|
debug(f" ⚠ No ISBN available for LibGen fallback")
|
||||||
|
|
||||||
|
# If fallback failed or wasn't possible, abort
|
||||||
|
debug(f" ✗ Unable to borrow from Archive.org and LibGen fallback failed.")
|
||||||
|
exit_code = 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Re-raise other exceptions
|
||||||
|
raise e
|
||||||
|
|
||||||
debug(f" → Extracting page information...")
|
debug(f" → Extracting page information...")
|
||||||
# Try both URL formats
|
# Try both URL formats
|
||||||
@@ -1806,8 +1927,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
|||||||
import img2pdf
|
import img2pdf
|
||||||
debug(f" → Merging pages into PDF...")
|
debug(f" → Merging pages into PDF...")
|
||||||
|
|
||||||
filename = title if title else f"book_{book_id_str}"
|
# Use title from result item if available, otherwise fallback to extracted title
|
||||||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
filename_title = title_val if title_val and title_val != 'Unknown Book' else (title if title else f"book_{book_id_str}")
|
||||||
|
# Allow underscores and spaces
|
||||||
|
filename = "".join(c for c in filename_title if c.isalnum() or c in (' ', '.', '-', '_'))[:100]
|
||||||
output_path = Path(final_output_dir) / f"{filename}.pdf"
|
output_path = Path(final_output_dir) / f"{filename}.pdf"
|
||||||
|
|
||||||
# Make unique filename if needed
|
# Make unique filename if needed
|
||||||
@@ -1828,6 +1951,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
|||||||
file_hash = _compute_file_hash(output_path)
|
file_hash = _compute_file_hash(output_path)
|
||||||
# Build tags including ISBN if available
|
# Build tags including ISBN if available
|
||||||
emit_tags = ['book', 'borrowed', 'pdf']
|
emit_tags = ['book', 'borrowed', 'pdf']
|
||||||
|
if title_val and title_val != 'Unknown Book':
|
||||||
|
emit_tags.append(f'title:{title_val}')
|
||||||
isbn_tag = url.get('isbn')
|
isbn_tag = url.get('isbn')
|
||||||
if isbn_tag:
|
if isbn_tag:
|
||||||
emit_tags.append(f'isbn:{isbn_tag}')
|
emit_tags.append(f'isbn:{isbn_tag}')
|
||||||
@@ -2343,6 +2468,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
|||||||
|
|
||||||
debug(f"Downloading: {url}")
|
debug(f"Downloading: {url}")
|
||||||
|
|
||||||
|
# Special handling for LibGen URLs
|
||||||
|
if "libgen" in url or "library.lol" in url:
|
||||||
|
debug(f"🔄 Detected LibGen URL, using specialized downloader: {url}")
|
||||||
|
try:
|
||||||
|
from helper.libgen_service import download_from_mirror, search_libgen
|
||||||
|
|
||||||
|
# If it's a search/details page, try to find the download link
|
||||||
|
# e.g. https://libgen.li/series.php?id=577851
|
||||||
|
# We can try to extract the ID and search for it, or just try to download if it's a mirror
|
||||||
|
|
||||||
|
# Extract ID if possible, BUT skip for series/edition pages which are handled by download_from_mirror
|
||||||
|
libgen_id = ""
|
||||||
|
results = []
|
||||||
|
|
||||||
|
if "series.php" not in url and "edition.php" not in url:
|
||||||
|
match = re.search(r"id=(\d+)", url)
|
||||||
|
if match:
|
||||||
|
libgen_id = match.group(1)
|
||||||
|
debug(f" Extracted LibGen ID: {libgen_id}")
|
||||||
|
|
||||||
|
# Search by ID to get fresh mirror links
|
||||||
|
results = search_libgen(libgen_id, limit=1)
|
||||||
|
if results:
|
||||||
|
# Use the mirror URL from the result
|
||||||
|
mirror_url = results[0].get("mirror_url")
|
||||||
|
if mirror_url:
|
||||||
|
debug(f" Resolved to mirror URL: {mirror_url}")
|
||||||
|
url = mirror_url
|
||||||
|
|
||||||
|
# Attempt download with specialized function
|
||||||
|
# We need a filename. LibGen doesn't always give one easily in the URL.
|
||||||
|
# download_from_mirror expects a full path.
|
||||||
|
# We'll try to guess a filename or use a temp one and rename later?
|
||||||
|
# Actually download_from_mirror writes to output_path.
|
||||||
|
|
||||||
|
# Let's try to get metadata to make a good filename
|
||||||
|
filename = "libgen_download.bin"
|
||||||
|
if libgen_id and results:
|
||||||
|
title = results[0].get("title", "book")
|
||||||
|
ext = results[0].get("extension", "pdf")
|
||||||
|
# Sanitize filename
|
||||||
|
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||||
|
filename = f"{safe_title}.{ext}"
|
||||||
|
elif "series.php" in url:
|
||||||
|
filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf"
|
||||||
|
|
||||||
|
output_path = final_output_dir / filename
|
||||||
|
|
||||||
|
if download_from_mirror(url, output_path, log_info=debug, log_error=log):
|
||||||
|
debug(f"✓ LibGen download successful: {output_path}")
|
||||||
|
|
||||||
|
# Create a result object
|
||||||
|
info = {
|
||||||
|
"id": libgen_id or "libgen",
|
||||||
|
"title": filename,
|
||||||
|
"webpage_url": url,
|
||||||
|
"ext": output_path.suffix.lstrip("."),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Emit result
|
||||||
|
pipeline_context.emit(create_pipe_object_result(
|
||||||
|
source="libgen",
|
||||||
|
identifier=libgen_id or "libgen",
|
||||||
|
file_path=str(output_path),
|
||||||
|
cmdlet_name="download-data",
|
||||||
|
title=filename,
|
||||||
|
extra=info
|
||||||
|
))
|
||||||
|
downloaded_files.append(str(output_path))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
debug("⚠ LibGen specialized download failed, falling back to generic downloader...")
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"⚠ LibGen specialized download error: {e}")
|
||||||
|
# Fall through to generic downloader
|
||||||
|
|
||||||
# Resolve cookies path if specified
|
# Resolve cookies path if specified
|
||||||
final_cookies_path = None
|
final_cookies_path = None
|
||||||
if cookies_path:
|
if cookies_path:
|
||||||
|
|||||||
103
cmdlets/matrix.py
Normal file
103
cmdlets/matrix.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
from typing import Any, Dict, Sequence, List
|
||||||
|
import sys
|
||||||
|
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||||
|
from helper.logger import log, debug
|
||||||
|
from result_table import ResultTable
|
||||||
|
from helper.file_storage import MatrixStorageBackend
|
||||||
|
from config import save_config, load_config
|
||||||
|
import pipeline as ctx
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
# Initialize backend
|
||||||
|
backend = MatrixStorageBackend()
|
||||||
|
|
||||||
|
# Get current default room
|
||||||
|
matrix_conf = config.get('storage', {}).get('matrix', {})
|
||||||
|
current_room_id = matrix_conf.get('room_id')
|
||||||
|
|
||||||
|
# Fetch rooms
|
||||||
|
debug("Fetching joined rooms from Matrix...")
|
||||||
|
rooms = backend.list_rooms(config)
|
||||||
|
|
||||||
|
if not rooms:
|
||||||
|
debug("No joined rooms found or Matrix not configured.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle selection if provided
|
||||||
|
selection = parsed.get("selection")
|
||||||
|
if selection:
|
||||||
|
new_room_id = None
|
||||||
|
selected_room_name = None
|
||||||
|
|
||||||
|
# Try as index (1-based)
|
||||||
|
try:
|
||||||
|
idx = int(selection) - 1
|
||||||
|
if 0 <= idx < len(rooms):
|
||||||
|
selected_room = rooms[idx]
|
||||||
|
new_room_id = selected_room['id']
|
||||||
|
selected_room_name = selected_room['name']
|
||||||
|
except ValueError:
|
||||||
|
# Try as Room ID
|
||||||
|
for room in rooms:
|
||||||
|
if room['id'] == selection:
|
||||||
|
new_room_id = selection
|
||||||
|
selected_room_name = room['name']
|
||||||
|
break
|
||||||
|
|
||||||
|
if new_room_id:
|
||||||
|
# Update config
|
||||||
|
# Load fresh config from disk to avoid saving runtime objects (like WorkerManager)
|
||||||
|
disk_config = load_config()
|
||||||
|
|
||||||
|
if 'storage' not in disk_config: disk_config['storage'] = {}
|
||||||
|
if 'matrix' not in disk_config['storage']: disk_config['storage']['matrix'] = {}
|
||||||
|
|
||||||
|
disk_config['storage']['matrix']['room_id'] = new_room_id
|
||||||
|
save_config(disk_config)
|
||||||
|
|
||||||
|
debug(f"Default Matrix room set to: {selected_room_name} ({new_room_id})")
|
||||||
|
current_room_id = new_room_id
|
||||||
|
else:
|
||||||
|
debug(f"Invalid selection: {selection}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Display table
|
||||||
|
table = ResultTable("Matrix Rooms")
|
||||||
|
for i, room in enumerate(rooms):
|
||||||
|
is_default = (room['id'] == current_room_id)
|
||||||
|
|
||||||
|
row = table.add_row()
|
||||||
|
row.add_column("Default", "*" if is_default else "")
|
||||||
|
row.add_column("Name", room['name'])
|
||||||
|
row.add_column("ID", room['id'])
|
||||||
|
|
||||||
|
# Set selection args so user can type @N to select
|
||||||
|
# This will run .matrix N
|
||||||
|
table.set_row_selection_args(i, [str(i + 1)])
|
||||||
|
|
||||||
|
table.set_source_command(".matrix")
|
||||||
|
|
||||||
|
# Register results
|
||||||
|
ctx.set_last_result_table_overlay(table, rooms)
|
||||||
|
ctx.set_current_stage_table(table)
|
||||||
|
|
||||||
|
print(table)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name=".matrix",
|
||||||
|
aliases=["matrix", "rooms"],
|
||||||
|
summary="List and select default Matrix room",
|
||||||
|
usage=".matrix [selection]",
|
||||||
|
args=[
|
||||||
|
CmdletArg(
|
||||||
|
name="selection",
|
||||||
|
type="string",
|
||||||
|
description="Index or ID of the room to set as default",
|
||||||
|
required=False
|
||||||
|
)
|
||||||
|
],
|
||||||
|
exec=_run
|
||||||
|
)
|
||||||
@@ -70,12 +70,15 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
|
|||||||
|
|
||||||
return title or filename or "Unknown"
|
return title or filename or "Unknown"
|
||||||
|
|
||||||
def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
|
||||||
"""Queue items to MPV, starting it if necessary.
|
"""Queue items to MPV, starting it if necessary.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
items: List of items to queue
|
items: List of items to queue
|
||||||
clear_first: If True, the first item will replace the current playlist
|
clear_first: If True, the first item will replace the current playlist
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if MPV was started, False if items were queued via IPC.
|
||||||
"""
|
"""
|
||||||
for i, item in enumerate(items):
|
for i, item in enumerate(items):
|
||||||
# Extract URL/Path
|
# Extract URL/Path
|
||||||
@@ -115,7 +118,7 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
|||||||
# MPV not running (or died)
|
# MPV not running (or died)
|
||||||
# Start MPV with remaining items
|
# Start MPV with remaining items
|
||||||
_start_mpv(items[i:])
|
_start_mpv(items[i:])
|
||||||
return
|
return True
|
||||||
elif resp.get("error") == "success":
|
elif resp.get("error") == "success":
|
||||||
# Also set property for good measure
|
# Also set property for good measure
|
||||||
if title:
|
if title:
|
||||||
@@ -125,14 +128,30 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
|||||||
else:
|
else:
|
||||||
error_msg = str(resp.get('error'))
|
error_msg = str(resp.get('error'))
|
||||||
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
|
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
"""Manage and play items in the MPV playlist via IPC."""
|
"""Manage and play items in the MPV playlist via IPC."""
|
||||||
|
|
||||||
parsed = parse_cmdlet_args(args, CMDLET)
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
# Initialize mpv_started flag
|
||||||
|
mpv_started = False
|
||||||
|
|
||||||
# Handle positional index argument if provided
|
# Handle positional index argument if provided
|
||||||
index_arg = parsed.get("index")
|
index_arg = parsed.get("index")
|
||||||
|
url_arg = parsed.get("url")
|
||||||
|
|
||||||
|
# If index_arg is provided but is not an integer, treat it as a URL
|
||||||
|
# This allows .pipe "http://..." without -url flag
|
||||||
|
if index_arg is not None:
|
||||||
|
try:
|
||||||
|
int(index_arg)
|
||||||
|
except ValueError:
|
||||||
|
# Not an integer, treat as URL if url_arg is not set
|
||||||
|
if not url_arg:
|
||||||
|
url_arg = index_arg
|
||||||
|
index_arg = None
|
||||||
|
|
||||||
clear_mode = parsed.get("clear")
|
clear_mode = parsed.get("clear")
|
||||||
list_mode = parsed.get("list")
|
list_mode = parsed.get("list")
|
||||||
@@ -141,6 +160,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
save_mode = parsed.get("save")
|
save_mode = parsed.get("save")
|
||||||
load_mode = parsed.get("load")
|
load_mode = parsed.get("load")
|
||||||
|
|
||||||
|
# Handle URL queuing
|
||||||
|
mpv_started = False
|
||||||
|
if url_arg:
|
||||||
|
mpv_started = _queue_items([url_arg])
|
||||||
|
# If we just queued a URL, we probably want to list the playlist to show it was added
|
||||||
|
# unless other flags are present
|
||||||
|
if not (clear_mode or play_mode or pause_mode or save_mode or load_mode):
|
||||||
|
list_mode = True
|
||||||
|
|
||||||
# Handle Save Playlist
|
# Handle Save Playlist
|
||||||
if save_mode:
|
if save_mode:
|
||||||
playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}"
|
playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}"
|
||||||
@@ -296,7 +324,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
|
|
||||||
# Handle piped input (add to playlist)
|
# Handle piped input (add to playlist)
|
||||||
# Skip adding if -list is specified (user just wants to see current playlist)
|
# Skip adding if -list is specified (user just wants to see current playlist)
|
||||||
if result and not list_mode:
|
if result and not list_mode and not url_arg:
|
||||||
# If result is a list of items, add them to playlist
|
# If result is a list of items, add them to playlist
|
||||||
items_to_add = []
|
items_to_add = []
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
@@ -304,7 +332,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
elif isinstance(result, dict):
|
elif isinstance(result, dict):
|
||||||
items_to_add = [result]
|
items_to_add = [result]
|
||||||
|
|
||||||
_queue_items(items_to_add)
|
if _queue_items(items_to_add):
|
||||||
|
mpv_started = True
|
||||||
|
|
||||||
if items_to_add:
|
if items_to_add:
|
||||||
# If we added items, we might want to play the first one if nothing is playing?
|
# If we added items, we might want to play the first one if nothing is playing?
|
||||||
@@ -315,6 +344,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
items = _get_playlist()
|
items = _get_playlist()
|
||||||
|
|
||||||
if items is None:
|
if items is None:
|
||||||
|
if mpv_started:
|
||||||
|
# MPV was just started, so we can't list items yet.
|
||||||
|
# But we know it's running (or trying to start), so don't start another instance.
|
||||||
|
return 0
|
||||||
|
|
||||||
debug("MPV is not running. Starting new instance...")
|
debug("MPV is not running. Starting new instance...")
|
||||||
_start_mpv([])
|
_start_mpv([])
|
||||||
return 0
|
return 0
|
||||||
@@ -369,7 +403,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
# List items (Default action or after clear)
|
# List items (Default action or after clear)
|
||||||
if list_mode or index_arg is None:
|
if list_mode or (index_arg is None and not url_arg):
|
||||||
if not items:
|
if not items:
|
||||||
debug("MPV playlist is empty.")
|
debug("MPV playlist is empty.")
|
||||||
return 0
|
return 0
|
||||||
@@ -451,12 +485,18 @@ CMDLET = Cmdlet(
|
|||||||
name=".pipe",
|
name=".pipe",
|
||||||
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
||||||
summary="Manage and play items in the MPV playlist via IPC",
|
summary="Manage and play items in the MPV playlist via IPC",
|
||||||
usage=".pipe [index] [-clear]",
|
usage=".pipe [index|url] [-clear] [-url URL]",
|
||||||
args=[
|
args=[
|
||||||
CmdletArg(
|
CmdletArg(
|
||||||
name="index",
|
name="index",
|
||||||
type="int",
|
type="string", # Changed to string to allow URL detection
|
||||||
description="Index of item to play or clear",
|
description="Index of item to play/clear, or URL to queue",
|
||||||
|
required=False
|
||||||
|
),
|
||||||
|
CmdletArg(
|
||||||
|
name="url",
|
||||||
|
type="string",
|
||||||
|
description="URL to queue",
|
||||||
required=False
|
required=False
|
||||||
),
|
),
|
||||||
CmdletArg(
|
CmdletArg(
|
||||||
|
|||||||
@@ -141,8 +141,33 @@ def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|||||||
return payload
|
return payload
|
||||||
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||||
store_label = payload.get("origin") or payload.get("source") or origin_value
|
store_label = payload.get("origin") or payload.get("source") or origin_value
|
||||||
|
|
||||||
|
# Handle extension
|
||||||
|
extension = payload.get("ext", "")
|
||||||
|
if not extension and title:
|
||||||
|
path_obj = Path(str(title))
|
||||||
|
if path_obj.suffix:
|
||||||
|
extension = path_obj.suffix.lstrip('.')
|
||||||
|
title = path_obj.stem
|
||||||
|
|
||||||
|
# Handle size
|
||||||
|
size_val = payload.get("size") or payload.get("size_bytes")
|
||||||
|
size_str = ""
|
||||||
|
if size_val:
|
||||||
|
try:
|
||||||
|
size_bytes = int(size_val)
|
||||||
|
size_mb = size_bytes / (1024 * 1024)
|
||||||
|
size_str = f"{size_mb:.1f} MB"
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
size_str = str(size_val)
|
||||||
|
|
||||||
normalized = dict(payload)
|
normalized = dict(payload)
|
||||||
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
|
normalized["columns"] = [
|
||||||
|
("Title", str(title)),
|
||||||
|
("Ext", str(extension)),
|
||||||
|
("Store", str(store_label)),
|
||||||
|
("Size", str(size_str))
|
||||||
|
]
|
||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
19
debug_db.py
Normal file
19
debug_db.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
db_path = Path("C:/Media Machina/.downlow_library.db")
|
||||||
|
|
||||||
|
if not db_path.exists():
|
||||||
|
print(f"DB not found at {db_path}")
|
||||||
|
else:
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
print("Files in DB:")
|
||||||
|
cursor.execute("SELECT id, file_path FROM files")
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
print(f"ID: {row[0]}, Path: {row[1]}")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
@@ -75,6 +75,11 @@ def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optio
|
|||||||
return email, password
|
return email, password
|
||||||
|
|
||||||
|
|
||||||
|
class BookNotAvailableError(Exception):
|
||||||
|
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def display_error(response: requests.Response, message: str) -> None:
|
def display_error(response: requests.Response, message: str) -> None:
|
||||||
"""Display error and exit."""
|
"""Display error and exit."""
|
||||||
log(message, file=sys.stderr)
|
log(message, file=sys.stderr)
|
||||||
@@ -133,9 +138,11 @@ def loan(session: requests.Session, book_id: str, verbose: bool = True) -> reque
|
|||||||
if response.status_code == 400:
|
if response.status_code == 400:
|
||||||
try:
|
try:
|
||||||
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
|
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
|
||||||
debug("This book doesn't need to be borrowed")
|
debug("Book is not available for borrowing (waitlisted or in use)")
|
||||||
return session
|
raise BookNotAvailableError("Book is waitlisted or in use")
|
||||||
display_error(response, "Something went wrong when trying to borrow the book.")
|
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||||
|
except BookNotAvailableError:
|
||||||
|
raise
|
||||||
except:
|
except:
|
||||||
display_error(response, "The book cannot be borrowed")
|
display_error(response, "The book cannot be borrowed")
|
||||||
|
|
||||||
@@ -182,11 +189,21 @@ def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str],
|
|||||||
|
|
||||||
# Try to extract the infos URL from the response
|
# Try to extract the infos URL from the response
|
||||||
try:
|
try:
|
||||||
# Look for the "url" field in the response
|
# Look for the "url" field in the response using regex
|
||||||
if '"url":"' not in r:
|
# Matches "url":"//archive.org/..."
|
||||||
raise ValueError("No 'url' field found in response")
|
import re
|
||||||
infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
|
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
|
||||||
except (IndexError, ValueError) as e:
|
if not match:
|
||||||
|
raise ValueError("No 'url' field found in response")
|
||||||
|
|
||||||
|
url_path = match.group(1)
|
||||||
|
if url_path.startswith("//"):
|
||||||
|
infos_url = "https:" + url_path
|
||||||
|
else:
|
||||||
|
infos_url = url_path
|
||||||
|
|
||||||
|
infos_url = infos_url.replace("\\u0026", "&")
|
||||||
|
except (IndexError, ValueError, AttributeError) as e:
|
||||||
# If URL extraction fails, raise with better error message
|
# If URL extraction fails, raise with better error message
|
||||||
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
|
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import requests
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from helper.logger import log, debug
|
from helper.logger import log, debug
|
||||||
|
from helper.utils_constant import mime_maps
|
||||||
|
|
||||||
|
|
||||||
class StorageBackend(ABC):
|
class StorageBackend(ABC):
|
||||||
@@ -707,6 +708,18 @@ class HydrusStorageBackend(StorageBackend):
|
|||||||
if title != f"Hydrus File {file_id}":
|
if title != f"Hydrus File {file_id}":
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Resolve extension from MIME type
|
||||||
|
mime_type = meta.get("mime")
|
||||||
|
ext = ""
|
||||||
|
if mime_type:
|
||||||
|
for category in mime_maps.values():
|
||||||
|
for ext_key, info in category.items():
|
||||||
|
if mime_type in info.get("mimes", []):
|
||||||
|
ext = info.get("ext", "").lstrip('.')
|
||||||
|
break
|
||||||
|
if ext:
|
||||||
|
break
|
||||||
|
|
||||||
# Filter results based on query type
|
# Filter results based on query type
|
||||||
# If user provided explicit namespace (has ':'), don't do substring filtering
|
# If user provided explicit namespace (has ':'), don't do substring filtering
|
||||||
# Just include what the tag search returned
|
# Just include what the tag search returned
|
||||||
@@ -726,7 +739,8 @@ class HydrusStorageBackend(StorageBackend):
|
|||||||
"origin": "hydrus",
|
"origin": "hydrus",
|
||||||
"tags": all_tags,
|
"tags": all_tags,
|
||||||
"file_id": file_id,
|
"file_id": file_id,
|
||||||
"mime": meta.get("mime"),
|
"mime": mime_type,
|
||||||
|
"ext": ext,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
# Free-form search: check if search terms match the title or tags
|
# Free-form search: check if search terms match the title or tags
|
||||||
@@ -758,7 +772,8 @@ class HydrusStorageBackend(StorageBackend):
|
|||||||
"origin": "hydrus",
|
"origin": "hydrus",
|
||||||
"tags": all_tags,
|
"tags": all_tags,
|
||||||
"file_id": file_id,
|
"file_id": file_id,
|
||||||
"mime": meta.get("mime"),
|
"mime": mime_type,
|
||||||
|
"ext": ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
debug(f"Found {len(results)} result(s)")
|
debug(f"Found {len(results)} result(s)")
|
||||||
@@ -971,6 +986,60 @@ class MatrixStorageBackend(StorageBackend):
|
|||||||
def get_name(self) -> str:
|
def get_name(self) -> str:
|
||||||
return "matrix"
|
return "matrix"
|
||||||
|
|
||||||
|
def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
|
"""List joined rooms with their names."""
|
||||||
|
matrix_conf = config.get('storage', {}).get('matrix', {})
|
||||||
|
homeserver = matrix_conf.get('homeserver')
|
||||||
|
access_token = matrix_conf.get('access_token')
|
||||||
|
|
||||||
|
if not homeserver or not access_token:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not homeserver.startswith('http'):
|
||||||
|
homeserver = f"https://{homeserver}"
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get joined rooms
|
||||||
|
resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return []
|
||||||
|
|
||||||
|
room_ids = resp.json().get('joined_rooms', [])
|
||||||
|
rooms = []
|
||||||
|
|
||||||
|
for rid in room_ids:
|
||||||
|
# Try to get room name
|
||||||
|
name = "Unknown Room"
|
||||||
|
try:
|
||||||
|
# Get state event for name
|
||||||
|
name_resp = requests.get(
|
||||||
|
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name",
|
||||||
|
headers=headers,
|
||||||
|
timeout=2
|
||||||
|
)
|
||||||
|
if name_resp.status_code == 200:
|
||||||
|
name = name_resp.json().get('name', name)
|
||||||
|
else:
|
||||||
|
# Try canonical alias
|
||||||
|
alias_resp = requests.get(
|
||||||
|
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias",
|
||||||
|
headers=headers,
|
||||||
|
timeout=2
|
||||||
|
)
|
||||||
|
if alias_resp.status_code == 200:
|
||||||
|
name = alias_resp.json().get('alias', name)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
rooms.append({'id': rid, 'name': name})
|
||||||
|
|
||||||
|
return rooms
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error listing Matrix rooms: {e}", file=sys.stderr)
|
||||||
|
return []
|
||||||
|
|
||||||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||||||
"""Upload file to Matrix room.
|
"""Upload file to Matrix room.
|
||||||
|
|
||||||
@@ -993,8 +1062,8 @@ class MatrixStorageBackend(StorageBackend):
|
|||||||
access_token = matrix_conf.get('access_token')
|
access_token = matrix_conf.get('access_token')
|
||||||
room_id = matrix_conf.get('room_id')
|
room_id = matrix_conf.get('room_id')
|
||||||
|
|
||||||
if not homeserver or not room_id:
|
if not homeserver:
|
||||||
raise ValueError("Matrix homeserver and room_id required")
|
raise ValueError("Matrix homeserver required")
|
||||||
|
|
||||||
# Ensure homeserver has protocol
|
# Ensure homeserver has protocol
|
||||||
if not homeserver.startswith('http'):
|
if not homeserver.startswith('http'):
|
||||||
@@ -1004,6 +1073,39 @@ class MatrixStorageBackend(StorageBackend):
|
|||||||
if not access_token:
|
if not access_token:
|
||||||
raise ValueError("Matrix access_token required (login not yet implemented)")
|
raise ValueError("Matrix access_token required (login not yet implemented)")
|
||||||
|
|
||||||
|
# Handle room selection if not provided
|
||||||
|
if not room_id:
|
||||||
|
log("No room_id configured. Fetching joined rooms...", file=sys.stderr)
|
||||||
|
rooms = self.list_rooms(config)
|
||||||
|
|
||||||
|
if not rooms:
|
||||||
|
raise ValueError("No joined rooms found or failed to fetch rooms.")
|
||||||
|
|
||||||
|
from result_table import ResultTable
|
||||||
|
table = ResultTable("Matrix Rooms")
|
||||||
|
for i, room in enumerate(rooms):
|
||||||
|
row = table.add_row()
|
||||||
|
row.add_column("#", str(i + 1))
|
||||||
|
row.add_column("Name", room['name'])
|
||||||
|
row.add_column("ID", room['id'])
|
||||||
|
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
# Simple interactive selection
|
||||||
|
try:
|
||||||
|
selection = input("Select room # to upload to: ")
|
||||||
|
idx = int(selection) - 1
|
||||||
|
if 0 <= idx < len(rooms):
|
||||||
|
room_id = rooms[idx]['id']
|
||||||
|
log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid selection")
|
||||||
|
except Exception:
|
||||||
|
raise ValueError("Invalid room selection")
|
||||||
|
|
||||||
|
if not room_id:
|
||||||
|
raise ValueError("Matrix room_id required")
|
||||||
|
|
||||||
# 1. Upload Media
|
# 1. Upload Media
|
||||||
upload_url = f"{homeserver}/_matrix/media/r3/upload"
|
upload_url = f"{homeserver}/_matrix/media/r3/upload"
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
@@ -1337,19 +1337,44 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
|
|||||||
timeout = 10.0
|
timeout = 10.0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = HydrusClient(url, access_key, timeout)
|
# Use HTTPClient directly to avoid session key logic and reduce retries
|
||||||
# Lightweight probe: get services
|
# This prevents log spam when Hydrus is offline (avoiding 3 retries x 2 requests)
|
||||||
# Temporarily suppress error logging for health checks (expected to fail if Hydrus unavailable)
|
from helper.http_client import HTTPClient
|
||||||
hydrus_logger = logging.getLogger("helper.hydrus")
|
|
||||||
original_level = hydrus_logger.level
|
probe_url = f"{url.rstrip('/')}/get_services"
|
||||||
hydrus_logger.setLevel(logging.CRITICAL) # Suppress errors/warnings
|
|
||||||
|
headers = {}
|
||||||
|
if access_key:
|
||||||
|
headers["Hydrus-Client-API-Access-Key"] = access_key
|
||||||
|
|
||||||
|
# Suppress HTTPClient logging during probe to avoid "Request failed" logs on startup
|
||||||
|
http_logger = logging.getLogger("helper.http_client")
|
||||||
|
original_level = http_logger.level
|
||||||
|
http_logger.setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_ = client.get_services()
|
# Use retries=1 (single attempt, no retry) to fail fast
|
||||||
_HYDRUS_AVAILABLE = True
|
with HTTPClient(timeout=timeout, retries=1, headers=headers, verify_ssl=False) as http:
|
||||||
_HYDRUS_UNAVAILABLE_REASON = None
|
try:
|
||||||
return True, None
|
response = http.get(probe_url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
_HYDRUS_AVAILABLE = True
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = None
|
||||||
|
return True, None
|
||||||
|
else:
|
||||||
|
# Even if we get a 4xx/5xx, the service is "reachable" but maybe auth failed
|
||||||
|
# But for "availability" we usually mean "usable".
|
||||||
|
# If auth fails (403), we can't use it, so return False.
|
||||||
|
reason = f"HTTP {response.status_code}: {response.reason_phrase}"
|
||||||
|
_HYDRUS_AVAILABLE = False
|
||||||
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||||
|
return False, reason
|
||||||
|
except Exception as e:
|
||||||
|
# This catches connection errors from HTTPClient
|
||||||
|
raise e
|
||||||
finally:
|
finally:
|
||||||
hydrus_logger.setLevel(original_level)
|
http_logger.setLevel(original_level)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
reason = str(exc)
|
reason = str(exc)
|
||||||
_HYDRUS_AVAILABLE = False
|
_HYDRUS_AVAILABLE = False
|
||||||
|
|||||||
@@ -1,21 +1,44 @@
|
|||||||
"""Shared Library Genesis search and download helpers."""
|
"""Shared Library Genesis search and download helpers.
|
||||||
|
|
||||||
|
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
|
||||||
|
Targets libgen.is/rs/st mirrors and parses the results table directly.
|
||||||
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import requests
|
import requests
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
from urllib.parse import quote, urljoin
|
from urllib.parse import quote, urljoin
|
||||||
|
|
||||||
from libgen import search_sync, LibgenError
|
# Optional dependencies
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
except ImportError:
|
||||||
|
BeautifulSoup = None
|
||||||
|
|
||||||
LogFn = Optional[Callable[[str], None]]
|
LogFn = Optional[Callable[[str], None]]
|
||||||
ErrorFn = Optional[Callable[[str], None]]
|
ErrorFn = Optional[Callable[[str], None]]
|
||||||
|
|
||||||
DEFAULT_TIMEOUT = 10.0
|
DEFAULT_TIMEOUT = 20.0
|
||||||
DEFAULT_LIMIT = 50
|
DEFAULT_LIMIT = 50
|
||||||
|
|
||||||
logging.getLogger(__name__).setLevel(logging.WARNING)
|
# Mirrors to try in order
|
||||||
|
MIRRORS = [
|
||||||
|
"https://libgen.is",
|
||||||
|
"https://libgen.rs",
|
||||||
|
"https://libgen.st",
|
||||||
|
"http://libgen.is",
|
||||||
|
"http://libgen.rs",
|
||||||
|
"http://libgen.st",
|
||||||
|
"https://libgen.li", # Different structure, fallback
|
||||||
|
"http://libgen.li",
|
||||||
|
"https://libgen.gl", # Different structure, fallback
|
||||||
|
"http://libgen.gl",
|
||||||
|
]
|
||||||
|
|
||||||
|
logging.getLogger(__name__).setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def _call(logger: LogFn, message: str) -> None:
|
def _call(logger: LogFn, message: str) -> None:
|
||||||
@@ -23,168 +46,248 @@ def _call(logger: LogFn, message: str) -> None:
|
|||||||
logger(message)
|
logger(message)
|
||||||
|
|
||||||
|
|
||||||
def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
|
class LibgenSearch:
|
||||||
"""Search Libgen without triggering ads.php requests."""
|
"""Robust LibGen searcher."""
|
||||||
try:
|
|
||||||
from bs4 import BeautifulSoup
|
def __init__(self, session: Optional[requests.Session] = None):
|
||||||
except ImportError: # pragma: no cover
|
self.session = session or requests.Session()
|
||||||
logging.warning("BeautifulSoup not available; falling back to standard search")
|
self.session.headers.update({
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||||
|
})
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
|
||||||
|
"""Search LibGen mirrors."""
|
||||||
|
if not BeautifulSoup:
|
||||||
|
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
for mirror in MIRRORS:
|
||||||
|
try:
|
||||||
|
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||||
|
results = self._search_libgen_li(mirror, query, limit)
|
||||||
|
else:
|
||||||
|
results = self._search_libgen_rs(mirror, query, limit)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f"Mirror {mirror} failed: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
mirrors = [
|
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||||
"https://libgen.gl",
|
"""Search libgen.rs/is/st style mirrors."""
|
||||||
"https://libgen.vg",
|
# Search URL: /search.php?req=QUERY&res=100&column=def
|
||||||
"https://libgen.la",
|
url = f"{mirror}/search.php"
|
||||||
"https://libgen.bz",
|
params = {
|
||||||
"https://libgen.gs",
|
"req": query,
|
||||||
]
|
"res": 100, # Request more to filter later
|
||||||
|
"column": "def",
|
||||||
|
"open": 0,
|
||||||
|
"view": "simple",
|
||||||
|
"phrase": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
# Find the table with results. usually class 'c'
|
||||||
|
table = soup.find("table", {"class": "c"})
|
||||||
|
if not table:
|
||||||
|
# Try finding by structure (table with many rows)
|
||||||
|
tables = soup.find_all("table")
|
||||||
|
for t in tables:
|
||||||
|
if len(t.find_all("tr")) > 5:
|
||||||
|
table = t
|
||||||
|
break
|
||||||
|
|
||||||
|
if not table:
|
||||||
|
return []
|
||||||
|
|
||||||
session = session or requests.Session()
|
results = []
|
||||||
session.headers.setdefault(
|
# Skip header row
|
||||||
"User-Agent",
|
rows = table.find_all("tr")[1:]
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
||||||
)
|
for row in rows:
|
||||||
|
cols = row.find_all("td")
|
||||||
for mirror in mirrors:
|
if len(cols) < 9:
|
||||||
try:
|
|
||||||
search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
|
|
||||||
response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
|
|
||||||
if response.status_code != 200:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Columns:
|
||||||
|
# 0: ID
|
||||||
|
# 1: Author(s)
|
||||||
|
# 2: Title
|
||||||
|
# 3: Publisher
|
||||||
|
# 4: Year
|
||||||
|
# 5: Pages
|
||||||
|
# 6: Language
|
||||||
|
# 7: Size
|
||||||
|
# 8: Extension
|
||||||
|
# 9+: Mirrors
|
||||||
|
|
||||||
|
try:
|
||||||
|
libgen_id = cols[0].get_text(strip=True)
|
||||||
|
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
|
||||||
|
if not authors:
|
||||||
|
authors = [cols[1].get_text(strip=True)]
|
||||||
|
|
||||||
|
title_tag = cols[2].find("a")
|
||||||
|
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
|
||||||
|
|
||||||
|
# Extract MD5 from title link if possible (often in href)
|
||||||
|
# href='book/index.php?md5=...'
|
||||||
|
md5 = ""
|
||||||
|
if title_tag and title_tag.has_attr("href"):
|
||||||
|
href = title_tag["href"]
|
||||||
|
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
|
||||||
|
if match:
|
||||||
|
md5 = match.group(1)
|
||||||
|
|
||||||
|
publisher = cols[3].get_text(strip=True)
|
||||||
|
year = cols[4].get_text(strip=True)
|
||||||
|
pages = cols[5].get_text(strip=True)
|
||||||
|
language = cols[6].get_text(strip=True)
|
||||||
|
size = cols[7].get_text(strip=True)
|
||||||
|
extension = cols[8].get_text(strip=True)
|
||||||
|
|
||||||
|
# Mirrors
|
||||||
|
# Usually col 9 is http://library.lol/main/MD5
|
||||||
|
mirror_links = []
|
||||||
|
for i in range(9, len(cols)):
|
||||||
|
a = cols[i].find("a")
|
||||||
|
if a and a.has_attr("href"):
|
||||||
|
mirror_links.append(a["href"])
|
||||||
|
|
||||||
|
# Construct direct download page link (library.lol)
|
||||||
|
# If we have MD5, we can guess it: http://library.lol/main/{md5}
|
||||||
|
if md5:
|
||||||
|
download_link = f"http://library.lol/main/{md5}"
|
||||||
|
elif mirror_links:
|
||||||
|
download_link = mirror_links[0]
|
||||||
|
else:
|
||||||
|
download_link = ""
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, "html.parser")
|
results.append({
|
||||||
table = soup.find("table", {"class": "catalog"})
|
"id": libgen_id,
|
||||||
if table is None:
|
"title": title,
|
||||||
for candidate in soup.find_all("table"):
|
"author": ", ".join(authors),
|
||||||
rows = candidate.find_all("tr")
|
"publisher": publisher,
|
||||||
if len(rows) > 2:
|
"year": year,
|
||||||
table = candidate
|
"pages": pages,
|
||||||
break
|
"language": language,
|
||||||
if table is None:
|
"filesize_str": size,
|
||||||
logging.debug("[libgen_no_ads] No results table on %s", mirror)
|
"extension": extension,
|
||||||
|
"md5": md5,
|
||||||
|
"mirror_url": download_link,
|
||||||
|
"cover": "", # Could extract from hover if needed
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(results) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f"Error parsing row: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
rows = table.find_all("tr")[1:]
|
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||||
results: List[Dict[str, Any]] = []
|
"""Search libgen.li/gl style mirrors."""
|
||||||
for row in rows:
|
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
|
||||||
try:
|
url = f"{mirror}/index.php"
|
||||||
cells = row.find_all("td")
|
params = {
|
||||||
if len(cells) < 9:
|
"req": query,
|
||||||
continue
|
"res": 100,
|
||||||
|
"covers": "on",
|
||||||
size_cell = cells[7]
|
"filesuns": "all",
|
||||||
file_link = size_cell.find("a")
|
}
|
||||||
mirror_link = ""
|
|
||||||
if file_link:
|
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||||
href = str(file_link.get("href", ""))
|
resp.raise_for_status()
|
||||||
if href.startswith("/"):
|
|
||||||
mirror_link = mirror + href
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
elif href:
|
table = soup.find("table", {"id": "tablelibgen"})
|
||||||
mirror_link = urljoin(mirror, href)
|
if not table:
|
||||||
|
table = soup.find("table", {"class": "table table-striped"})
|
||||||
if not mirror_link:
|
|
||||||
title_link = cells[1].find("a") if len(cells) > 1 else None
|
if not table:
|
||||||
if title_link:
|
return []
|
||||||
href = str(title_link.get("href", ""))
|
|
||||||
if href.startswith("/"):
|
results = []
|
||||||
mirror_link = mirror + href
|
rows = table.find_all("tr")[1:]
|
||||||
elif href:
|
|
||||||
mirror_link = urljoin(mirror, href)
|
for row in rows:
|
||||||
|
cols = row.find_all("td")
|
||||||
if not mirror_link:
|
if len(cols) < 9:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(
|
try:
|
||||||
{
|
# Structure is different
|
||||||
"id": "",
|
# 0: Cover
|
||||||
"mirror": mirror_link,
|
# 1: Title (with link to file.php?id=...)
|
||||||
"cover": "",
|
# 2: Author
|
||||||
"title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
|
# 3: Publisher
|
||||||
"authors": [cells[2].get_text(strip=True)]
|
# 4: Year
|
||||||
if len(cells) > 2
|
# 5: Language
|
||||||
else ["Unknown"],
|
# 6: Pages
|
||||||
"publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
|
# 7: Size
|
||||||
"year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
|
# 8: Extension
|
||||||
"pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
|
# 9: Mirrors
|
||||||
"language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
|
|
||||||
"size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
|
title_col = cols[1]
|
||||||
"extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
|
title_link = title_col.find("a")
|
||||||
"isbn": "",
|
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
|
||||||
}
|
|
||||||
)
|
# Extract ID from link
|
||||||
except Exception as exc: # pragma: no cover - defensive
|
libgen_id = ""
|
||||||
logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
|
if title_link and title_link.has_attr("href"):
|
||||||
continue
|
href = title_link["href"]
|
||||||
|
# href is usually "file.php?id=..." or "edition.php?id=..."
|
||||||
if results:
|
match = re.search(r"id=(\d+)", href)
|
||||||
logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
|
if match:
|
||||||
return results
|
libgen_id = match.group(1)
|
||||||
except Exception as exc: # pragma: no cover - mirror issues
|
|
||||||
logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
|
authors = cols[2].get_text(strip=True)
|
||||||
continue
|
publisher = cols[3].get_text(strip=True)
|
||||||
|
year = cols[4].get_text(strip=True)
|
||||||
return []
|
language = cols[5].get_text(strip=True)
|
||||||
|
pages = cols[6].get_text(strip=True)
|
||||||
|
size = cols[7].get_text(strip=True)
|
||||||
def format_book_info(book: Any) -> Dict[str, Any]:
|
extension = cols[8].get_text(strip=True)
|
||||||
"""Format Libgen search result into a consistent dictionary."""
|
|
||||||
filesize_bytes = 0
|
# Mirror link
|
||||||
size_str = getattr(book, "size", "") or ""
|
# Usually in col 9 or title link
|
||||||
if size_str:
|
mirror_url = ""
|
||||||
parts = size_str.strip().split()
|
if title_link:
|
||||||
try:
|
href = title_link["href"]
|
||||||
value = float(parts[0])
|
if href.startswith("/"):
|
||||||
unit = parts[1].upper() if len(parts) > 1 else "B"
|
mirror_url = mirror + href
|
||||||
if unit in {"MB", "M"}:
|
else:
|
||||||
filesize_bytes = int(value * 1024 * 1024)
|
mirror_url = urljoin(mirror, href)
|
||||||
elif unit in {"GB", "G"}:
|
|
||||||
filesize_bytes = int(value * 1024 * 1024 * 1024)
|
results.append({
|
||||||
elif unit in {"KB", "K"}:
|
"id": libgen_id,
|
||||||
filesize_bytes = int(value * 1024)
|
"title": title,
|
||||||
else:
|
"author": authors,
|
||||||
filesize_bytes = int(value)
|
"publisher": publisher,
|
||||||
except (ValueError, IndexError): # pragma: no cover - defensive
|
"year": year,
|
||||||
filesize_bytes = 0
|
"pages": pages,
|
||||||
|
"language": language,
|
||||||
title = getattr(book, "title", "") or ""
|
"filesize_str": size,
|
||||||
isbn = getattr(book, "isbn", "") or ""
|
"extension": extension,
|
||||||
if not isbn and title:
|
"md5": "", # .li doesn't show MD5 easily in table
|
||||||
import re
|
"mirror_url": mirror_url,
|
||||||
|
})
|
||||||
match = re.search(
|
|
||||||
r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
|
if len(results) >= limit:
|
||||||
title,
|
break
|
||||||
)
|
except Exception:
|
||||||
if match:
|
continue
|
||||||
potential_isbn = match.group(0).strip()
|
|
||||||
if re.search(r"\d{10,13}", potential_isbn):
|
return results
|
||||||
isbn = potential_isbn
|
|
||||||
title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
|
|
||||||
|
|
||||||
authors_value = getattr(book, "authors", None)
|
|
||||||
if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
|
|
||||||
authors_str = ", ".join(str(author) for author in authors_value)
|
|
||||||
else:
|
|
||||||
authors_str = str(authors_value or "Unknown")
|
|
||||||
|
|
||||||
download_links = getattr(book, "download_links", None)
|
|
||||||
mirror_url = None
|
|
||||||
if download_links and getattr(download_links, "get_link", None):
|
|
||||||
mirror_url = download_links.get_link
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title or "Unknown",
|
|
||||||
"author": authors_str,
|
|
||||||
"publisher": getattr(book, "publisher", "") or "",
|
|
||||||
"year": getattr(book, "year", "") or "",
|
|
||||||
"pages": getattr(book, "pages", "") or "",
|
|
||||||
"language": getattr(book, "language", "") or "",
|
|
||||||
"filesize": filesize_bytes,
|
|
||||||
"filesize_str": size_str or "Unknown",
|
|
||||||
"extension": getattr(book, "extension", "") or "",
|
|
||||||
"isbn": isbn,
|
|
||||||
"mirror_url": mirror_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def search_libgen(
|
def search_libgen(
|
||||||
@@ -195,183 +298,160 @@ def search_libgen(
|
|||||||
log_error: ErrorFn = None,
|
log_error: ErrorFn = None,
|
||||||
session: Optional[requests.Session] = None,
|
session: Optional[requests.Session] = None,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Search Libgen returning formatted dictionaries with multiple mirrors.
|
"""Search Libgen using the robust scraper."""
|
||||||
|
searcher = LibgenSearch(session=session)
|
||||||
Uses HTML scraper (search_libgen_no_ads) to find books quickly.
|
|
||||||
Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
_call(log_info, f"[search] Searching Libgen for: {query}")
|
results = searcher.search(query, limit=limit)
|
||||||
session = session or requests.Session()
|
_call(log_info, f"[libgen] Found {len(results)} results")
|
||||||
|
return results
|
||||||
# Use HTML scraper - more reliable and doesn't hang on mirror resolution
|
except Exception as e:
|
||||||
_call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
|
_call(log_error, f"[libgen] Search failed: {e}")
|
||||||
results: List[Any] = search_libgen_no_ads(query, session=session)
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
_call(log_info, "[search] No results from HTML scraper")
|
|
||||||
return []
|
|
||||||
|
|
||||||
formatted: List[Dict[str, Any]] = []
|
|
||||||
mirrors_list = [
|
|
||||||
"https://libgen.gl",
|
|
||||||
"https://libgen.vg",
|
|
||||||
"https://libgen.la",
|
|
||||||
"https://libgen.bz",
|
|
||||||
"https://libgen.gs",
|
|
||||||
]
|
|
||||||
|
|
||||||
for book in results[:limit]:
|
|
||||||
if isinstance(book, dict):
|
|
||||||
# Result from search_libgen_no_ads (HTML scraper)
|
|
||||||
authors = book.get("authors", ["Unknown"])
|
|
||||||
if isinstance(authors, list):
|
|
||||||
author_value = ", ".join(str(a) for a in authors)
|
|
||||||
else:
|
|
||||||
author_value = str(authors)
|
|
||||||
|
|
||||||
# Extract book ID from mirror URL if available
|
|
||||||
mirror = book.get("mirror", "")
|
|
||||||
book_id = ""
|
|
||||||
if mirror and "/file.php?id=" in mirror:
|
|
||||||
try:
|
|
||||||
book_id = mirror.split("/file.php?id=")[1].split("&")[0]
|
|
||||||
except (IndexError, ValueError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Build list of alternative mirrors based on book ID
|
|
||||||
mirrors_dict = {}
|
|
||||||
if book_id:
|
|
||||||
for mirror_base in mirrors_list:
|
|
||||||
mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
|
|
||||||
elif mirror:
|
|
||||||
# Fallback: use the mirror we found
|
|
||||||
mirrors_dict["primary"] = mirror
|
|
||||||
|
|
||||||
formatted.append(
|
|
||||||
{
|
|
||||||
"title": book.get("title", "Unknown"),
|
|
||||||
"author": author_value,
|
|
||||||
"publisher": book.get("publisher", ""),
|
|
||||||
"year": book.get("year", ""),
|
|
||||||
"pages": book.get("pages", ""),
|
|
||||||
"language": book.get("language", ""),
|
|
||||||
"filesize": 0,
|
|
||||||
"filesize_str": book.get("size", "Unknown"),
|
|
||||||
"extension": book.get("extension", ""),
|
|
||||||
"isbn": book.get("isbn", ""),
|
|
||||||
"mirror_url": mirror, # Primary mirror
|
|
||||||
"mirrors": mirrors_dict, # Alternative mirrors
|
|
||||||
"book_id": book_id,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fallback: try to format as book object
|
|
||||||
try:
|
|
||||||
formatted.append(format_book_info(book))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
_call(log_info, f"[search] Found {len(formatted)} result(s)")
|
|
||||||
return formatted
|
|
||||||
except LibgenError as exc:
|
|
||||||
_call(log_error, f"[search] Libgen error: {exc}")
|
|
||||||
return []
|
|
||||||
except Exception as exc: # pragma: no cover - defensive
|
|
||||||
_call(log_error, f"[search] Error: {exc}")
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_download_url(
|
||||||
|
session: requests.Session,
|
||||||
|
url: str,
|
||||||
|
log_info: LogFn = None
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Resolve the final download URL by following the LibGen chain."""
|
||||||
|
current_url = url
|
||||||
|
visited = set()
|
||||||
|
|
||||||
|
# Max hops to prevent infinite loops
|
||||||
|
for _ in range(6):
|
||||||
|
if current_url in visited:
|
||||||
|
break
|
||||||
|
visited.add(current_url)
|
||||||
|
|
||||||
|
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||||
|
|
||||||
|
# Simple heuristic: if it looks like a file, return it
|
||||||
|
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
|
||||||
|
return current_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
|
||||||
|
# So we'll just GET with stream=True to peek headers/content without downloading everything
|
||||||
|
with session.get(current_url, stream=True, timeout=30) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
ct = resp.headers.get("Content-Type", "").lower()
|
||||||
|
|
||||||
|
if "text/html" not in ct:
|
||||||
|
# It's a binary file
|
||||||
|
return current_url
|
||||||
|
|
||||||
|
# It's HTML, read content
|
||||||
|
content = resp.text
|
||||||
|
except Exception as e:
|
||||||
|
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
|
||||||
|
# 1. Check for "GET" link (library.lol / ads.php style)
|
||||||
|
# Usually <h2>GET</h2> inside <a> or just text "GET"
|
||||||
|
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||||
|
if not get_link:
|
||||||
|
# Try finding <a> containing <h2>GET</h2>
|
||||||
|
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||||
|
if h2_get and h2_get.parent.name == "a":
|
||||||
|
get_link = h2_get.parent
|
||||||
|
|
||||||
|
if get_link and get_link.has_attr("href"):
|
||||||
|
return urljoin(current_url, get_link["href"])
|
||||||
|
|
||||||
|
# 2. Check for "series.php" -> "edition.php"
|
||||||
|
if "series.php" in current_url:
|
||||||
|
# Find first edition link
|
||||||
|
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
|
||||||
|
if edition_link:
|
||||||
|
current_url = urljoin(current_url, edition_link["href"])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 3. Check for "edition.php" -> "file.php"
|
||||||
|
if "edition.php" in current_url:
|
||||||
|
file_link = soup.find("a", href=re.compile(r"file\.php"))
|
||||||
|
if file_link:
|
||||||
|
current_url = urljoin(current_url, file_link["href"])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
|
||||||
|
if "file.php" in current_url:
|
||||||
|
# Look for link with title="libgen" or text "Libgen"
|
||||||
|
libgen_link = soup.find("a", title="libgen")
|
||||||
|
if not libgen_link:
|
||||||
|
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
|
||||||
|
|
||||||
|
if libgen_link and libgen_link.has_attr("href"):
|
||||||
|
current_url = urljoin(current_url, libgen_link["href"])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
|
||||||
|
if "ads.php" in current_url:
|
||||||
|
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
|
||||||
|
if get_php_link:
|
||||||
|
return urljoin(current_url, get_php_link["href"])
|
||||||
|
|
||||||
|
# 6. Library.lol / generic fallback
|
||||||
|
for text in ["Cloudflare", "IPFS.io", "Infura"]:
|
||||||
|
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
|
||||||
|
if link and link.has_attr("href"):
|
||||||
|
return urljoin(current_url, link["href"])
|
||||||
|
|
||||||
|
# If we found nothing new, stop
|
||||||
|
break
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def download_from_mirror(
|
def download_from_mirror(
|
||||||
mirror_url: str,
|
mirror_url: str,
|
||||||
output_path: str | Path,
|
output_path: Path,
|
||||||
*,
|
*,
|
||||||
log_info: LogFn = None,
|
log_info: LogFn = None,
|
||||||
log_error: ErrorFn = None,
|
log_error: ErrorFn = None,
|
||||||
session: Optional[requests.Session] = None,
|
session: Optional[requests.Session] = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Download a Libgen file and write it to disk.
|
"""Download file from a LibGen mirror URL."""
|
||||||
|
|
||||||
Handles Libgen redirects and ensures proper file download by:
|
|
||||||
- Following all redirects (default behavior)
|
|
||||||
- Setting User-Agent header (required by some mirrors)
|
|
||||||
- Validating that we're downloading binary content, not HTML
|
|
||||||
- Attempting alternative download method if HTML is returned
|
|
||||||
"""
|
|
||||||
session = session or requests.Session()
|
session = session or requests.Session()
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
output_path = Path(output_path)
|
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
_call(log_info, f"[download] Downloading from mirror: {mirror_url}")
|
|
||||||
|
|
||||||
# Ensure session has proper headers for Libgen
|
download_url = _resolve_download_url(session, mirror_url, log_info)
|
||||||
if 'User-Agent' not in session.headers:
|
|
||||||
session.headers['User-Agent'] = (
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
||||||
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Download with redirects enabled (default) and referer
|
if not download_url:
|
||||||
session.headers['Referer'] = 'https://libgen.gs/'
|
_call(log_error, "[download] Could not find direct download link")
|
||||||
response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
|
return False
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Check if we got HTML instead of a file (common Libgen issue)
|
|
||||||
content_type = response.headers.get('content-type', '').lower()
|
|
||||||
if 'text/html' in content_type:
|
|
||||||
_call(log_error, f"[download] Server returned HTML. Trying alternative method...")
|
|
||||||
|
|
||||||
# Try to extract file ID and use alternative CDN
|
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||||
try:
|
|
||||||
# Parse the HTML to extract MD5 or file ID
|
# Download the actual file
|
||||||
from bs4 import BeautifulSoup
|
with session.get(download_url, stream=True, timeout=60) as r:
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
r.raise_for_status()
|
||||||
|
|
||||||
# Look for download link in the HTML
|
|
||||||
# Common patterns: md5 hash in form, or direct link in anchor tags
|
|
||||||
download_link = None
|
|
||||||
|
|
||||||
# Try to find forms that might contain download functionality
|
|
||||||
forms = soup.find_all('form')
|
|
||||||
for form in forms:
|
|
||||||
action = form.get('action', '')
|
|
||||||
if 'download' in action.lower() or 'get' in action.lower():
|
|
||||||
download_link = action
|
|
||||||
break
|
|
||||||
|
|
||||||
if not download_link:
|
|
||||||
_call(log_error, f"[download] Could not extract alternative download link from HTML")
|
|
||||||
return False
|
|
||||||
|
|
||||||
_call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
|
|
||||||
# Try downloading from alternative link
|
|
||||||
response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
|
|
||||||
response2.raise_for_status()
|
|
||||||
response = response2 # Use the new response
|
|
||||||
|
|
||||||
except Exception as alt_error:
|
# Verify it's not HTML (error page)
|
||||||
_call(log_error, f"[download] Alternative method failed: {alt_error}")
|
ct = r.headers.get("content-type", "").lower()
|
||||||
|
if "text/html" in ct:
|
||||||
|
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
total_size = int(response.headers.get("content-length", 0))
|
total_size = int(r.headers.get("content-length", 0))
|
||||||
downloaded = 0
|
downloaded = 0
|
||||||
|
|
||||||
with open(output_path, "wb") as handle:
|
with open(output_path, "wb") as f:
|
||||||
for chunk in response.iter_content(chunk_size=8192):
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
if not chunk:
|
if chunk:
|
||||||
continue
|
f.write(chunk)
|
||||||
handle.write(chunk)
|
downloaded += len(chunk)
|
||||||
downloaded += len(chunk)
|
# Optional: progress logging
|
||||||
if total_size > 0:
|
|
||||||
percent = downloaded / total_size * 100
|
_call(log_info, f"[download] Saved to {output_path}")
|
||||||
_call(
|
|
||||||
log_info,
|
|
||||||
f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
|
|
||||||
)
|
|
||||||
|
|
||||||
_call(log_info, f"[download] Downloaded successfully to: {output_path}")
|
|
||||||
return True
|
return True
|
||||||
except Exception as exc: # pragma: no cover - defensive
|
|
||||||
_call(log_error, f"[download] Error: {exc}")
|
except Exception as e:
|
||||||
|
_call(log_error, f"[download] Download failed: {e}")
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
|
|||||||
# Sanitize title for M3U (remove newlines)
|
# Sanitize title for M3U (remove newlines)
|
||||||
safe_title = title.replace("\n", " ").replace("\r", "")
|
safe_title = title.replace("\n", " ").replace("\r", "")
|
||||||
# M3U format: #EXTM3U\n#EXTINF:-1,Title\nURL
|
# M3U format: #EXTM3U\n#EXTINF:-1,Title\nURL
|
||||||
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}\n"
|
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}"
|
||||||
target = f"memory://{m3u_content}"
|
target = f"memory://{m3u_content}"
|
||||||
else:
|
else:
|
||||||
target = file_url
|
target = file_url
|
||||||
@@ -256,9 +256,8 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
|
|||||||
|
|
||||||
# Command 3: Set title (metadata for display) - still useful for window title
|
# Command 3: Set title (metadata for display) - still useful for window title
|
||||||
if title:
|
if title:
|
||||||
safe_title_prop = title.replace('"', '\\"')
|
|
||||||
cmd_title = {
|
cmd_title = {
|
||||||
"command": ["set_property", "force-media-title", safe_title_prop],
|
"command": ["set_property", "force-media-title", title],
|
||||||
"request_id": 2
|
"request_id": 2
|
||||||
}
|
}
|
||||||
client.send_command(cmd_title)
|
client.send_command(cmd_title)
|
||||||
|
|||||||
@@ -510,6 +510,7 @@ class ResultTable:
|
|||||||
('title | name | filename', ['title', 'name', 'filename']),
|
('title | name | filename', ['title', 'name', 'filename']),
|
||||||
('ext', ['ext']),
|
('ext', ['ext']),
|
||||||
('origin | source | store', ['origin', 'source', 'store']),
|
('origin | source | store', ['origin', 'source', 'store']),
|
||||||
|
('size | size_bytes', ['size', 'size_bytes']),
|
||||||
('type | media_kind | kind', ['type', 'media_kind', 'kind']),
|
('type | media_kind | kind', ['type', 'media_kind', 'kind']),
|
||||||
('tags | tag_summary', ['tags', 'tag_summary']),
|
('tags | tag_summary', ['tags', 'tag_summary']),
|
||||||
('detail | description', ['detail', 'description']),
|
('detail | description', ['detail', 'description']),
|
||||||
|
|||||||
44
test_ssl.py
Normal file
44
test_ssl.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
import httpx
|
||||||
|
import ssl
|
||||||
|
|
||||||
|
def test_libgen_ssl():
|
||||||
|
url = "https://libgen.li/series.php?id=577851"
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"Testing connection to {url} with httpx...")
|
||||||
|
try:
|
||||||
|
with httpx.Client(verify=True, headers=headers, timeout=30.0) as client:
|
||||||
|
resp = client.get(url)
|
||||||
|
print(f"Status: {resp.status_code}")
|
||||||
|
print(f"Content length: {len(resp.content)}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with default settings: {e}")
|
||||||
|
|
||||||
|
print("\nTesting with http2=True...")
|
||||||
|
try:
|
||||||
|
with httpx.Client(verify=True, headers=headers, timeout=30.0, http2=True) as client:
|
||||||
|
resp = client.get(url)
|
||||||
|
print(f"Status: {resp.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with http2=True: {e}")
|
||||||
|
|
||||||
|
print("\nTesting with verify=False...")
|
||||||
|
try:
|
||||||
|
with httpx.Client(verify=False, headers=headers, timeout=30.0) as client:
|
||||||
|
resp = client.get(url)
|
||||||
|
print(f"Status: {resp.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with verify=False: {e}")
|
||||||
|
|
||||||
|
import requests
|
||||||
|
print("\nTesting with requests (HTTP/1.1)...")
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, headers=headers, timeout=30.0)
|
||||||
|
print(f"Status: {resp.status_code}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with requests: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_libgen_ssl()
|
||||||
Reference in New Issue
Block a user