jkjnkjkllkjjk
This commit is contained in:
76
CLI.py
76
CLI.py
@@ -676,8 +676,8 @@ def _create_cmdlet_cli():
|
||||
|
||||
try:
|
||||
from helper.hydrus import get_client
|
||||
get_client(config) # Pre-acquire and cache session key
|
||||
debug("✓ Hydrus session key acquired")
|
||||
# get_client(config) # Pre-acquire and cache session key
|
||||
# debug("✓ Hydrus session key acquired")
|
||||
except RuntimeError as e:
|
||||
# Hydrus is not available - this is expected and normal
|
||||
# Don't show a message, just continue without it
|
||||
@@ -697,6 +697,78 @@ def _create_cmdlet_cli():
|
||||
initialize_hydrus_health_check(config)
|
||||
initialize_matrix_health_check(config)
|
||||
initialize_local_library_scan(config)
|
||||
|
||||
# --- Startup File Counts ---
|
||||
# Count Local Files
|
||||
try:
|
||||
from helper.file_storage import LocalStorageBackend
|
||||
from config import get_local_storage_path
|
||||
storage_path = get_local_storage_path(config)
|
||||
if storage_path:
|
||||
# Use LocalStorageBackend to perform the search as requested
|
||||
# Pass a large limit to get all files
|
||||
storage = LocalStorageBackend(location=storage_path)
|
||||
local_files = storage.search("*", limit=100000)
|
||||
print(f"Local: {len(local_files)}")
|
||||
except Exception as e:
|
||||
debug(f"⚠ Could not count local files: {e}")
|
||||
|
||||
# Count Hydrus Files (if available)
|
||||
from hydrus_health_check import is_hydrus_available
|
||||
if is_hydrus_available():
|
||||
try:
|
||||
from helper.hydrus import get_client
|
||||
client = get_client(config)
|
||||
# Hydrus search for all files
|
||||
# search_files returns IDs.
|
||||
response = client.search_files(["system:everything"])
|
||||
hydrus_ids = response.get("file_ids", [])
|
||||
print(f"Hydrus: {len(hydrus_ids)}")
|
||||
except Exception as e:
|
||||
debug(f"⚠ Could not count Hydrus files: {e}")
|
||||
|
||||
# Count Debrid Magnets (if available)
|
||||
try:
|
||||
from config import get_api_key
|
||||
from helper.alldebrid import AllDebridClient
|
||||
|
||||
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
|
||||
if api_key:
|
||||
# Use AllDebridClient to get magnets
|
||||
# We can use magnet_status with ID or just list active magnets if there's an endpoint
|
||||
# The magnet/status endpoint without ID returns all magnets
|
||||
# But helper/alldebrid.py magnet_status requires ID.
|
||||
# Let's check if we can use the client directly to call magnet/status without ID
|
||||
# Or if there is a method for it.
|
||||
# Looking at alldebrid.py, magnet_status takes magnet_id.
|
||||
# But the API docs say /magnet/status returns all magnets if no ID provided?
|
||||
# Actually, usually /magnet/status requires ID or 'all' or something.
|
||||
# Let's try to use the client's _request method if possible, or instantiate client.
|
||||
|
||||
# We'll instantiate client and try to list magnets.
|
||||
# Since magnet_status in helper requires ID, we might need to bypass it or add a method.
|
||||
# But wait, let's check if we can just use the raw request via client.
|
||||
|
||||
client = AllDebridClient(api_key)
|
||||
# The helper class doesn't expose a "list all" method easily,
|
||||
# but we can try calling _request directly if we access it, or add a method.
|
||||
# Accessing protected member _request is okay for this CLI script.
|
||||
|
||||
# API: /magnet/status
|
||||
resp = client._request('magnet/status')
|
||||
if resp.get('status') == 'success':
|
||||
data = resp.get('data', {})
|
||||
magnets = data.get('magnets', [])
|
||||
if isinstance(magnets, list):
|
||||
print(f"Debrid: {len(magnets)}")
|
||||
elif isinstance(magnets, dict):
|
||||
# Sometimes it returns a dict if single item? Or dict of magnets?
|
||||
print(f"Debrid: {len(magnets)}")
|
||||
except Exception as e:
|
||||
# Don't show error if just not configured or failed
|
||||
# debug(f"⚠ Could not count Debrid magnets: {e}")
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
debug(f"⚠ Could not check service availability: {e}")
|
||||
except Exception:
|
||||
|
||||
@@ -209,7 +209,7 @@ class SharedArgs:
|
||||
STORAGE = CmdletArg(
|
||||
"storage",
|
||||
type="enum",
|
||||
choices=["hydrus", "local", "debrid", "ftp"],
|
||||
choices=["hydrus", "local", "debrid", "ftp", "matrix"],
|
||||
required=False,
|
||||
description="Storage location or destination for saving/uploading files.",
|
||||
alias="s",
|
||||
@@ -268,6 +268,7 @@ class SharedArgs:
|
||||
'hydrus': Path.home() / ".hydrus" / "client_files",
|
||||
'debrid': Path.home() / "Debrid",
|
||||
'ftp': Path.home() / "FTP",
|
||||
'matrix': Path.home() / "Matrix", # Placeholder, not used for upload path
|
||||
}
|
||||
|
||||
if storage_value is None:
|
||||
|
||||
@@ -541,8 +541,36 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Map provider 0x0 to storage 0x0 for download-data
|
||||
if provider_name == "0x0":
|
||||
dl_args.extend(["-storage", "0x0"])
|
||||
|
||||
return dl_module._run(result, dl_args, config)
|
||||
|
||||
# Capture results from download-data so we can add them to DB
|
||||
captured_results = []
|
||||
original_emit = ctx.emit
|
||||
|
||||
def capture_emit(obj):
|
||||
captured_results.append(obj)
|
||||
original_emit(obj)
|
||||
|
||||
ctx.emit = capture_emit
|
||||
|
||||
try:
|
||||
ret_code = dl_module._run(result, dl_args, config)
|
||||
finally:
|
||||
ctx.emit = original_emit
|
||||
|
||||
if ret_code != 0:
|
||||
return ret_code
|
||||
|
||||
# Process the downloaded files recursively to add them to DB
|
||||
if captured_results:
|
||||
log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr)
|
||||
success_count = 0
|
||||
for res in captured_results:
|
||||
# Recursively call add-file with the downloaded result
|
||||
if _run(res, _args, config) == 0:
|
||||
success_count += 1
|
||||
return 0 if success_count > 0 else 1
|
||||
|
||||
return 0
|
||||
|
||||
if media_path is None:
|
||||
log("File path could not be resolved")
|
||||
@@ -609,13 +637,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 0
|
||||
|
||||
# Handle storage-based operations (location is not None here)
|
||||
valid_locations = {'hydrus', 'local'}
|
||||
valid_locations = {'hydrus', 'local', 'matrix'}
|
||||
is_valid_location = location in valid_locations
|
||||
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
|
||||
|
||||
if not (is_valid_location or is_local_path):
|
||||
log(f"❌ Invalid location: {location}")
|
||||
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
|
||||
log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path")
|
||||
return 1
|
||||
|
||||
if location == 'local':
|
||||
@@ -704,6 +732,36 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
return exit_code
|
||||
|
||||
elif location == 'matrix':
|
||||
log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr)
|
||||
try:
|
||||
result_url = storage["matrix"].upload(media_path, config=config)
|
||||
log(f"Matrix: {result_url}", file=sys.stderr)
|
||||
|
||||
result_dict = create_pipe_object_result(
|
||||
source='matrix',
|
||||
identifier=result_url,
|
||||
file_path=str(media_path),
|
||||
cmdlet_name='add-file',
|
||||
title=media_path.name,
|
||||
target=result_url
|
||||
)
|
||||
ctx.emit(result_dict)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Failed: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if delete_after_upload:
|
||||
try:
|
||||
media_path.unlink()
|
||||
_cleanup_sidecar_files(media_path)
|
||||
log(f"✅ Deleted file and sidecar", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
# location == 'hydrus'
|
||||
# Compute file hash to check if already in Hydrus
|
||||
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
|
||||
|
||||
@@ -1594,6 +1594,25 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
if not urls_to_download and files_downloaded_directly == 0:
|
||||
debug(f"No downloadable URLs found")
|
||||
return 1
|
||||
|
||||
# Deduplicate URLs while preserving order
|
||||
unique_urls = []
|
||||
seen_keys = set()
|
||||
|
||||
for u in urls_to_download:
|
||||
key = None
|
||||
if isinstance(u, dict):
|
||||
key = u.get('url') or u.get('link') or u.get('target') or u.get('source_url')
|
||||
if not key:
|
||||
key = str(u)
|
||||
else:
|
||||
key = str(u)
|
||||
|
||||
if key and key not in seen_keys:
|
||||
seen_keys.add(key)
|
||||
unique_urls.append(u)
|
||||
|
||||
urls_to_download = unique_urls
|
||||
|
||||
debug(f"Processing {len(urls_to_download)} URL(s)")
|
||||
for i, u in enumerate(urls_to_download, 1):
|
||||
@@ -1749,6 +1768,108 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
debug(f" ✗ Error while borrowing: {e}")
|
||||
exit_code = 1
|
||||
continue
|
||||
except Exception as e:
|
||||
# Check for BookNotAvailableError (imported dynamically or by name)
|
||||
if type(e).__name__ == 'BookNotAvailableError':
|
||||
debug(f" ⚠ Book is waitlisted/unavailable on Archive.org")
|
||||
|
||||
# Fallback to LibGen if ISBN is available
|
||||
isbn = url.get('isbn')
|
||||
if isbn:
|
||||
debug(f" ▶ Falling back to LibGen search for ISBN: {isbn}")
|
||||
from helper.search_provider import LibGenProvider
|
||||
|
||||
provider = LibGenProvider(config)
|
||||
# Search specifically by ISBN
|
||||
results = provider.search(f"isbn:{isbn}", limit=1)
|
||||
|
||||
if results:
|
||||
debug(f" ✓ Found {len(results)} result(s) on LibGen")
|
||||
# Use the first result
|
||||
libgen_result = results[0]
|
||||
|
||||
# Construct a new URL entry for the main loop to process
|
||||
# We can't easily inject into the loop, so we'll process it here
|
||||
# LibGen results from provider have 'target' as mirror URL or libgen:ID
|
||||
|
||||
target = libgen_result.target
|
||||
debug(f" → Downloading from LibGen: {libgen_result.title}")
|
||||
|
||||
# We need to use the LibGen download logic.
|
||||
# The easiest way is to call the UnifiedBookDownloader directly or
|
||||
# delegate to the 'libgen' origin handler if we can.
|
||||
# But we are inside the loop.
|
||||
|
||||
# Let's use UnifiedBookDownloader directly to download to final_output_dir
|
||||
from helper.unified_book_downloader import UnifiedBookDownloader
|
||||
downloader = UnifiedBookDownloader(config)
|
||||
|
||||
# The target might be a mirror URL or libgen:ID
|
||||
# UnifiedBookDownloader.download_book expects a book dict or similar?
|
||||
# Actually, let's look at how 'libgen' origin is handled in the main loop.
|
||||
# It uses urls_to_download.append(url_entry).
|
||||
|
||||
# We can just process this result right here.
|
||||
# The provider result has full_metadata which is the book dict.
|
||||
book_data = libgen_result.full_metadata
|
||||
|
||||
# Download the book
|
||||
# We need to find a working mirror
|
||||
mirrors = book_data.get('mirrors', {})
|
||||
download_url = book_data.get('mirror_url')
|
||||
|
||||
if not download_url and mirrors:
|
||||
# Pick first mirror
|
||||
download_url = next(iter(mirrors.values()))
|
||||
|
||||
if download_url:
|
||||
debug(f" → Mirror: {download_url}")
|
||||
# Use helper.download.download_media or similar?
|
||||
# UnifiedBookDownloader has download_book(book, output_dir)
|
||||
|
||||
# Reconstruct book dict for downloader
|
||||
# It expects: title, author, year, extension, mirrors, etc.
|
||||
# book_data should have most of it.
|
||||
|
||||
filepath = downloader.download_book(book_data, final_output_dir)
|
||||
if filepath:
|
||||
debug(f" ✓ Successfully downloaded from LibGen: {filepath}")
|
||||
downloaded_files.append(str(filepath))
|
||||
|
||||
# Emit result
|
||||
file_hash = _compute_file_hash(filepath)
|
||||
emit_tags = ['book', 'libgen']
|
||||
if isbn: emit_tags.append(f'isbn:{isbn}')
|
||||
|
||||
pipe_obj = create_pipe_object_result(
|
||||
source='libgen',
|
||||
identifier=book_data.get('md5', 'unknown'),
|
||||
file_path=str(filepath),
|
||||
cmdlet_name='download-data',
|
||||
title=libgen_result.title,
|
||||
file_hash=file_hash,
|
||||
tags=emit_tags,
|
||||
source_url=download_url
|
||||
)
|
||||
pipeline_context.emit(pipe_obj)
|
||||
exit_code = 0
|
||||
continue # Success!
|
||||
else:
|
||||
debug(f" ✗ Failed to download from LibGen")
|
||||
else:
|
||||
debug(f" ✗ No download URL found in LibGen result")
|
||||
else:
|
||||
debug(f" ✗ No results found on LibGen for ISBN: {isbn}")
|
||||
else:
|
||||
debug(f" ⚠ No ISBN available for LibGen fallback")
|
||||
|
||||
# If fallback failed or wasn't possible, abort
|
||||
debug(f" ✗ Unable to borrow from Archive.org and LibGen fallback failed.")
|
||||
exit_code = 1
|
||||
continue
|
||||
else:
|
||||
# Re-raise other exceptions
|
||||
raise e
|
||||
|
||||
debug(f" → Extracting page information...")
|
||||
# Try both URL formats
|
||||
@@ -1806,8 +1927,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
import img2pdf
|
||||
debug(f" → Merging pages into PDF...")
|
||||
|
||||
filename = title if title else f"book_{book_id_str}"
|
||||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||||
# Use title from result item if available, otherwise fallback to extracted title
|
||||
filename_title = title_val if title_val and title_val != 'Unknown Book' else (title if title else f"book_{book_id_str}")
|
||||
# Allow underscores and spaces
|
||||
filename = "".join(c for c in filename_title if c.isalnum() or c in (' ', '.', '-', '_'))[:100]
|
||||
output_path = Path(final_output_dir) / f"{filename}.pdf"
|
||||
|
||||
# Make unique filename if needed
|
||||
@@ -1828,6 +1951,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
file_hash = _compute_file_hash(output_path)
|
||||
# Build tags including ISBN if available
|
||||
emit_tags = ['book', 'borrowed', 'pdf']
|
||||
if title_val and title_val != 'Unknown Book':
|
||||
emit_tags.append(f'title:{title_val}')
|
||||
isbn_tag = url.get('isbn')
|
||||
if isbn_tag:
|
||||
emit_tags.append(f'isbn:{isbn_tag}')
|
||||
@@ -2343,6 +2468,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
|
||||
debug(f"Downloading: {url}")
|
||||
|
||||
# Special handling for LibGen URLs
|
||||
if "libgen" in url or "library.lol" in url:
|
||||
debug(f"🔄 Detected LibGen URL, using specialized downloader: {url}")
|
||||
try:
|
||||
from helper.libgen_service import download_from_mirror, search_libgen
|
||||
|
||||
# If it's a search/details page, try to find the download link
|
||||
# e.g. https://libgen.li/series.php?id=577851
|
||||
# We can try to extract the ID and search for it, or just try to download if it's a mirror
|
||||
|
||||
# Extract ID if possible, BUT skip for series/edition pages which are handled by download_from_mirror
|
||||
libgen_id = ""
|
||||
results = []
|
||||
|
||||
if "series.php" not in url and "edition.php" not in url:
|
||||
match = re.search(r"id=(\d+)", url)
|
||||
if match:
|
||||
libgen_id = match.group(1)
|
||||
debug(f" Extracted LibGen ID: {libgen_id}")
|
||||
|
||||
# Search by ID to get fresh mirror links
|
||||
results = search_libgen(libgen_id, limit=1)
|
||||
if results:
|
||||
# Use the mirror URL from the result
|
||||
mirror_url = results[0].get("mirror_url")
|
||||
if mirror_url:
|
||||
debug(f" Resolved to mirror URL: {mirror_url}")
|
||||
url = mirror_url
|
||||
|
||||
# Attempt download with specialized function
|
||||
# We need a filename. LibGen doesn't always give one easily in the URL.
|
||||
# download_from_mirror expects a full path.
|
||||
# We'll try to guess a filename or use a temp one and rename later?
|
||||
# Actually download_from_mirror writes to output_path.
|
||||
|
||||
# Let's try to get metadata to make a good filename
|
||||
filename = "libgen_download.bin"
|
||||
if libgen_id and results:
|
||||
title = results[0].get("title", "book")
|
||||
ext = results[0].get("extension", "pdf")
|
||||
# Sanitize filename
|
||||
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
|
||||
filename = f"{safe_title}.{ext}"
|
||||
elif "series.php" in url:
|
||||
filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf"
|
||||
|
||||
output_path = final_output_dir / filename
|
||||
|
||||
if download_from_mirror(url, output_path, log_info=debug, log_error=log):
|
||||
debug(f"✓ LibGen download successful: {output_path}")
|
||||
|
||||
# Create a result object
|
||||
info = {
|
||||
"id": libgen_id or "libgen",
|
||||
"title": filename,
|
||||
"webpage_url": url,
|
||||
"ext": output_path.suffix.lstrip("."),
|
||||
}
|
||||
|
||||
# Emit result
|
||||
pipeline_context.emit(create_pipe_object_result(
|
||||
source="libgen",
|
||||
identifier=libgen_id or "libgen",
|
||||
file_path=str(output_path),
|
||||
cmdlet_name="download-data",
|
||||
title=filename,
|
||||
extra=info
|
||||
))
|
||||
downloaded_files.append(str(output_path))
|
||||
continue
|
||||
else:
|
||||
debug("⚠ LibGen specialized download failed, falling back to generic downloader...")
|
||||
except Exception as e:
|
||||
debug(f"⚠ LibGen specialized download error: {e}")
|
||||
# Fall through to generic downloader
|
||||
|
||||
# Resolve cookies path if specified
|
||||
final_cookies_path = None
|
||||
if cookies_path:
|
||||
|
||||
103
cmdlets/matrix.py
Normal file
103
cmdlets/matrix.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from typing import Any, Dict, Sequence, List
|
||||
import sys
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from helper.logger import log, debug
|
||||
from result_table import ResultTable
|
||||
from helper.file_storage import MatrixStorageBackend
|
||||
from config import save_config, load_config
|
||||
import pipeline as ctx
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
# Initialize backend
|
||||
backend = MatrixStorageBackend()
|
||||
|
||||
# Get current default room
|
||||
matrix_conf = config.get('storage', {}).get('matrix', {})
|
||||
current_room_id = matrix_conf.get('room_id')
|
||||
|
||||
# Fetch rooms
|
||||
debug("Fetching joined rooms from Matrix...")
|
||||
rooms = backend.list_rooms(config)
|
||||
|
||||
if not rooms:
|
||||
debug("No joined rooms found or Matrix not configured.")
|
||||
return 1
|
||||
|
||||
# Handle selection if provided
|
||||
selection = parsed.get("selection")
|
||||
if selection:
|
||||
new_room_id = None
|
||||
selected_room_name = None
|
||||
|
||||
# Try as index (1-based)
|
||||
try:
|
||||
idx = int(selection) - 1
|
||||
if 0 <= idx < len(rooms):
|
||||
selected_room = rooms[idx]
|
||||
new_room_id = selected_room['id']
|
||||
selected_room_name = selected_room['name']
|
||||
except ValueError:
|
||||
# Try as Room ID
|
||||
for room in rooms:
|
||||
if room['id'] == selection:
|
||||
new_room_id = selection
|
||||
selected_room_name = room['name']
|
||||
break
|
||||
|
||||
if new_room_id:
|
||||
# Update config
|
||||
# Load fresh config from disk to avoid saving runtime objects (like WorkerManager)
|
||||
disk_config = load_config()
|
||||
|
||||
if 'storage' not in disk_config: disk_config['storage'] = {}
|
||||
if 'matrix' not in disk_config['storage']: disk_config['storage']['matrix'] = {}
|
||||
|
||||
disk_config['storage']['matrix']['room_id'] = new_room_id
|
||||
save_config(disk_config)
|
||||
|
||||
debug(f"Default Matrix room set to: {selected_room_name} ({new_room_id})")
|
||||
current_room_id = new_room_id
|
||||
else:
|
||||
debug(f"Invalid selection: {selection}")
|
||||
return 1
|
||||
|
||||
# Display table
|
||||
table = ResultTable("Matrix Rooms")
|
||||
for i, room in enumerate(rooms):
|
||||
is_default = (room['id'] == current_room_id)
|
||||
|
||||
row = table.add_row()
|
||||
row.add_column("Default", "*" if is_default else "")
|
||||
row.add_column("Name", room['name'])
|
||||
row.add_column("ID", room['id'])
|
||||
|
||||
# Set selection args so user can type @N to select
|
||||
# This will run .matrix N
|
||||
table.set_row_selection_args(i, [str(i + 1)])
|
||||
|
||||
table.set_source_command(".matrix")
|
||||
|
||||
# Register results
|
||||
ctx.set_last_result_table_overlay(table, rooms)
|
||||
ctx.set_current_stage_table(table)
|
||||
|
||||
print(table)
|
||||
return 0
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name=".matrix",
|
||||
aliases=["matrix", "rooms"],
|
||||
summary="List and select default Matrix room",
|
||||
usage=".matrix [selection]",
|
||||
args=[
|
||||
CmdletArg(
|
||||
name="selection",
|
||||
type="string",
|
||||
description="Index or ID of the room to set as default",
|
||||
required=False
|
||||
)
|
||||
],
|
||||
exec=_run
|
||||
)
|
||||
@@ -70,12 +70,15 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
|
||||
|
||||
return title or filename or "Unknown"
|
||||
|
||||
def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
||||
def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
|
||||
"""Queue items to MPV, starting it if necessary.
|
||||
|
||||
Args:
|
||||
items: List of items to queue
|
||||
clear_first: If True, the first item will replace the current playlist
|
||||
|
||||
Returns:
|
||||
True if MPV was started, False if items were queued via IPC.
|
||||
"""
|
||||
for i, item in enumerate(items):
|
||||
# Extract URL/Path
|
||||
@@ -115,7 +118,7 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
||||
# MPV not running (or died)
|
||||
# Start MPV with remaining items
|
||||
_start_mpv(items[i:])
|
||||
return
|
||||
return True
|
||||
elif resp.get("error") == "success":
|
||||
# Also set property for good measure
|
||||
if title:
|
||||
@@ -125,14 +128,30 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
|
||||
else:
|
||||
error_msg = str(resp.get('error'))
|
||||
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Manage and play items in the MPV playlist via IPC."""
|
||||
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
|
||||
# Initialize mpv_started flag
|
||||
mpv_started = False
|
||||
|
||||
# Handle positional index argument if provided
|
||||
index_arg = parsed.get("index")
|
||||
url_arg = parsed.get("url")
|
||||
|
||||
# If index_arg is provided but is not an integer, treat it as a URL
|
||||
# This allows .pipe "http://..." without -url flag
|
||||
if index_arg is not None:
|
||||
try:
|
||||
int(index_arg)
|
||||
except ValueError:
|
||||
# Not an integer, treat as URL if url_arg is not set
|
||||
if not url_arg:
|
||||
url_arg = index_arg
|
||||
index_arg = None
|
||||
|
||||
clear_mode = parsed.get("clear")
|
||||
list_mode = parsed.get("list")
|
||||
@@ -141,6 +160,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
save_mode = parsed.get("save")
|
||||
load_mode = parsed.get("load")
|
||||
|
||||
# Handle URL queuing
|
||||
mpv_started = False
|
||||
if url_arg:
|
||||
mpv_started = _queue_items([url_arg])
|
||||
# If we just queued a URL, we probably want to list the playlist to show it was added
|
||||
# unless other flags are present
|
||||
if not (clear_mode or play_mode or pause_mode or save_mode or load_mode):
|
||||
list_mode = True
|
||||
|
||||
# Handle Save Playlist
|
||||
if save_mode:
|
||||
playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}"
|
||||
@@ -296,7 +324,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Handle piped input (add to playlist)
|
||||
# Skip adding if -list is specified (user just wants to see current playlist)
|
||||
if result and not list_mode:
|
||||
if result and not list_mode and not url_arg:
|
||||
# If result is a list of items, add them to playlist
|
||||
items_to_add = []
|
||||
if isinstance(result, list):
|
||||
@@ -304,7 +332,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
elif isinstance(result, dict):
|
||||
items_to_add = [result]
|
||||
|
||||
_queue_items(items_to_add)
|
||||
if _queue_items(items_to_add):
|
||||
mpv_started = True
|
||||
|
||||
if items_to_add:
|
||||
# If we added items, we might want to play the first one if nothing is playing?
|
||||
@@ -315,6 +344,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
items = _get_playlist()
|
||||
|
||||
if items is None:
|
||||
if mpv_started:
|
||||
# MPV was just started, so we can't list items yet.
|
||||
# But we know it's running (or trying to start), so don't start another instance.
|
||||
return 0
|
||||
|
||||
debug("MPV is not running. Starting new instance...")
|
||||
_start_mpv([])
|
||||
return 0
|
||||
@@ -369,7 +403,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 1
|
||||
|
||||
# List items (Default action or after clear)
|
||||
if list_mode or index_arg is None:
|
||||
if list_mode or (index_arg is None and not url_arg):
|
||||
if not items:
|
||||
debug("MPV playlist is empty.")
|
||||
return 0
|
||||
@@ -451,12 +485,18 @@ CMDLET = Cmdlet(
|
||||
name=".pipe",
|
||||
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
||||
summary="Manage and play items in the MPV playlist via IPC",
|
||||
usage=".pipe [index] [-clear]",
|
||||
usage=".pipe [index|url] [-clear] [-url URL]",
|
||||
args=[
|
||||
CmdletArg(
|
||||
name="index",
|
||||
type="int",
|
||||
description="Index of item to play or clear",
|
||||
type="string", # Changed to string to allow URL detection
|
||||
description="Index of item to play/clear, or URL to queue",
|
||||
required=False
|
||||
),
|
||||
CmdletArg(
|
||||
name="url",
|
||||
type="string",
|
||||
description="URL to queue",
|
||||
required=False
|
||||
),
|
||||
CmdletArg(
|
||||
|
||||
@@ -141,8 +141,33 @@ def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return payload
|
||||
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
store_label = payload.get("origin") or payload.get("source") or origin_value
|
||||
|
||||
# Handle extension
|
||||
extension = payload.get("ext", "")
|
||||
if not extension and title:
|
||||
path_obj = Path(str(title))
|
||||
if path_obj.suffix:
|
||||
extension = path_obj.suffix.lstrip('.')
|
||||
title = path_obj.stem
|
||||
|
||||
# Handle size
|
||||
size_val = payload.get("size") or payload.get("size_bytes")
|
||||
size_str = ""
|
||||
if size_val:
|
||||
try:
|
||||
size_bytes = int(size_val)
|
||||
size_mb = size_bytes / (1024 * 1024)
|
||||
size_str = f"{size_mb:.1f} MB"
|
||||
except (ValueError, TypeError):
|
||||
size_str = str(size_val)
|
||||
|
||||
normalized = dict(payload)
|
||||
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
|
||||
normalized["columns"] = [
|
||||
("Title", str(title)),
|
||||
("Ext", str(extension)),
|
||||
("Store", str(store_label)),
|
||||
("Size", str(size_str))
|
||||
]
|
||||
return normalized
|
||||
|
||||
|
||||
|
||||
19
debug_db.py
Normal file
19
debug_db.py
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
db_path = Path("C:/Media Machina/.downlow_library.db")
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"DB not found at {db_path}")
|
||||
else:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
print("Files in DB:")
|
||||
cursor.execute("SELECT id, file_path FROM files")
|
||||
for row in cursor.fetchall():
|
||||
print(f"ID: {row[0]}, Path: {row[1]}")
|
||||
|
||||
conn.close()
|
||||
@@ -75,6 +75,11 @@ def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optio
|
||||
return email, password
|
||||
|
||||
|
||||
class BookNotAvailableError(Exception):
|
||||
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||
pass
|
||||
|
||||
|
||||
def display_error(response: requests.Response, message: str) -> None:
|
||||
"""Display error and exit."""
|
||||
log(message, file=sys.stderr)
|
||||
@@ -133,9 +138,11 @@ def loan(session: requests.Session, book_id: str, verbose: bool = True) -> reque
|
||||
if response.status_code == 400:
|
||||
try:
|
||||
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
|
||||
debug("This book doesn't need to be borrowed")
|
||||
return session
|
||||
debug("Book is not available for borrowing (waitlisted or in use)")
|
||||
raise BookNotAvailableError("Book is waitlisted or in use")
|
||||
display_error(response, "Something went wrong when trying to borrow the book.")
|
||||
except BookNotAvailableError:
|
||||
raise
|
||||
except:
|
||||
display_error(response, "The book cannot be borrowed")
|
||||
|
||||
@@ -182,11 +189,21 @@ def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str],
|
||||
|
||||
# Try to extract the infos URL from the response
|
||||
try:
|
||||
# Look for the "url" field in the response
|
||||
if '"url":"' not in r:
|
||||
raise ValueError("No 'url' field found in response")
|
||||
infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
|
||||
except (IndexError, ValueError) as e:
|
||||
# Look for the "url" field in the response using regex
|
||||
# Matches "url":"//archive.org/..."
|
||||
import re
|
||||
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
|
||||
if not match:
|
||||
raise ValueError("No 'url' field found in response")
|
||||
|
||||
url_path = match.group(1)
|
||||
if url_path.startswith("//"):
|
||||
infos_url = "https:" + url_path
|
||||
else:
|
||||
infos_url = url_path
|
||||
|
||||
infos_url = infos_url.replace("\\u0026", "&")
|
||||
except (IndexError, ValueError, AttributeError) as e:
|
||||
# If URL extraction fails, raise with better error message
|
||||
raise RuntimeError(f"Failed to extract book info URL from response: {e}")
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import requests
|
||||
import re
|
||||
|
||||
from helper.logger import log, debug
|
||||
from helper.utils_constant import mime_maps
|
||||
|
||||
|
||||
class StorageBackend(ABC):
|
||||
@@ -707,6 +708,18 @@ class HydrusStorageBackend(StorageBackend):
|
||||
if title != f"Hydrus File {file_id}":
|
||||
break
|
||||
|
||||
# Resolve extension from MIME type
|
||||
mime_type = meta.get("mime")
|
||||
ext = ""
|
||||
if mime_type:
|
||||
for category in mime_maps.values():
|
||||
for ext_key, info in category.items():
|
||||
if mime_type in info.get("mimes", []):
|
||||
ext = info.get("ext", "").lstrip('.')
|
||||
break
|
||||
if ext:
|
||||
break
|
||||
|
||||
# Filter results based on query type
|
||||
# If user provided explicit namespace (has ':'), don't do substring filtering
|
||||
# Just include what the tag search returned
|
||||
@@ -726,7 +739,8 @@ class HydrusStorageBackend(StorageBackend):
|
||||
"origin": "hydrus",
|
||||
"tags": all_tags,
|
||||
"file_id": file_id,
|
||||
"mime": meta.get("mime"),
|
||||
"mime": mime_type,
|
||||
"ext": ext,
|
||||
})
|
||||
else:
|
||||
# Free-form search: check if search terms match the title or tags
|
||||
@@ -758,7 +772,8 @@ class HydrusStorageBackend(StorageBackend):
|
||||
"origin": "hydrus",
|
||||
"tags": all_tags,
|
||||
"file_id": file_id,
|
||||
"mime": meta.get("mime"),
|
||||
"mime": mime_type,
|
||||
"ext": ext,
|
||||
})
|
||||
|
||||
debug(f"Found {len(results)} result(s)")
|
||||
@@ -971,6 +986,60 @@ class MatrixStorageBackend(StorageBackend):
|
||||
def get_name(self) -> str:
|
||||
return "matrix"
|
||||
|
||||
def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""List joined rooms with their names."""
|
||||
matrix_conf = config.get('storage', {}).get('matrix', {})
|
||||
homeserver = matrix_conf.get('homeserver')
|
||||
access_token = matrix_conf.get('access_token')
|
||||
|
||||
if not homeserver or not access_token:
|
||||
return []
|
||||
|
||||
if not homeserver.startswith('http'):
|
||||
homeserver = f"https://{homeserver}"
|
||||
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
try:
|
||||
# Get joined rooms
|
||||
resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
|
||||
room_ids = resp.json().get('joined_rooms', [])
|
||||
rooms = []
|
||||
|
||||
for rid in room_ids:
|
||||
# Try to get room name
|
||||
name = "Unknown Room"
|
||||
try:
|
||||
# Get state event for name
|
||||
name_resp = requests.get(
|
||||
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name",
|
||||
headers=headers,
|
||||
timeout=2
|
||||
)
|
||||
if name_resp.status_code == 200:
|
||||
name = name_resp.json().get('name', name)
|
||||
else:
|
||||
# Try canonical alias
|
||||
alias_resp = requests.get(
|
||||
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias",
|
||||
headers=headers,
|
||||
timeout=2
|
||||
)
|
||||
if alias_resp.status_code == 200:
|
||||
name = alias_resp.json().get('alias', name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
rooms.append({'id': rid, 'name': name})
|
||||
|
||||
return rooms
|
||||
except Exception as e:
|
||||
log(f"Error listing Matrix rooms: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||||
"""Upload file to Matrix room.
|
||||
|
||||
@@ -993,8 +1062,8 @@ class MatrixStorageBackend(StorageBackend):
|
||||
access_token = matrix_conf.get('access_token')
|
||||
room_id = matrix_conf.get('room_id')
|
||||
|
||||
if not homeserver or not room_id:
|
||||
raise ValueError("Matrix homeserver and room_id required")
|
||||
if not homeserver:
|
||||
raise ValueError("Matrix homeserver required")
|
||||
|
||||
# Ensure homeserver has protocol
|
||||
if not homeserver.startswith('http'):
|
||||
@@ -1004,6 +1073,39 @@ class MatrixStorageBackend(StorageBackend):
|
||||
if not access_token:
|
||||
raise ValueError("Matrix access_token required (login not yet implemented)")
|
||||
|
||||
# Handle room selection if not provided
|
||||
if not room_id:
|
||||
log("No room_id configured. Fetching joined rooms...", file=sys.stderr)
|
||||
rooms = self.list_rooms(config)
|
||||
|
||||
if not rooms:
|
||||
raise ValueError("No joined rooms found or failed to fetch rooms.")
|
||||
|
||||
from result_table import ResultTable
|
||||
table = ResultTable("Matrix Rooms")
|
||||
for i, room in enumerate(rooms):
|
||||
row = table.add_row()
|
||||
row.add_column("#", str(i + 1))
|
||||
row.add_column("Name", room['name'])
|
||||
row.add_column("ID", room['id'])
|
||||
|
||||
print(table)
|
||||
|
||||
# Simple interactive selection
|
||||
try:
|
||||
selection = input("Select room # to upload to: ")
|
||||
idx = int(selection) - 1
|
||||
if 0 <= idx < len(rooms):
|
||||
room_id = rooms[idx]['id']
|
||||
log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr)
|
||||
else:
|
||||
raise ValueError("Invalid selection")
|
||||
except Exception:
|
||||
raise ValueError("Invalid room selection")
|
||||
|
||||
if not room_id:
|
||||
raise ValueError("Matrix room_id required")
|
||||
|
||||
# 1. Upload Media
|
||||
upload_url = f"{homeserver}/_matrix/media/r3/upload"
|
||||
headers = {
|
||||
|
||||
@@ -1337,19 +1337,44 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
|
||||
timeout = 10.0
|
||||
|
||||
try:
|
||||
client = HydrusClient(url, access_key, timeout)
|
||||
# Lightweight probe: get services
|
||||
# Temporarily suppress error logging for health checks (expected to fail if Hydrus unavailable)
|
||||
hydrus_logger = logging.getLogger("helper.hydrus")
|
||||
original_level = hydrus_logger.level
|
||||
hydrus_logger.setLevel(logging.CRITICAL) # Suppress errors/warnings
|
||||
# Use HTTPClient directly to avoid session key logic and reduce retries
|
||||
# This prevents log spam when Hydrus is offline (avoiding 3 retries x 2 requests)
|
||||
from helper.http_client import HTTPClient
|
||||
|
||||
probe_url = f"{url.rstrip('/')}/get_services"
|
||||
|
||||
headers = {}
|
||||
if access_key:
|
||||
headers["Hydrus-Client-API-Access-Key"] = access_key
|
||||
|
||||
# Suppress HTTPClient logging during probe to avoid "Request failed" logs on startup
|
||||
http_logger = logging.getLogger("helper.http_client")
|
||||
original_level = http_logger.level
|
||||
http_logger.setLevel(logging.CRITICAL)
|
||||
|
||||
try:
|
||||
_ = client.get_services()
|
||||
_HYDRUS_AVAILABLE = True
|
||||
_HYDRUS_UNAVAILABLE_REASON = None
|
||||
return True, None
|
||||
# Use retries=1 (single attempt, no retry) to fail fast
|
||||
with HTTPClient(timeout=timeout, retries=1, headers=headers, verify_ssl=False) as http:
|
||||
try:
|
||||
response = http.get(probe_url)
|
||||
if response.status_code == 200:
|
||||
_HYDRUS_AVAILABLE = True
|
||||
_HYDRUS_UNAVAILABLE_REASON = None
|
||||
return True, None
|
||||
else:
|
||||
# Even if we get a 4xx/5xx, the service is "reachable" but maybe auth failed
|
||||
# But for "availability" we usually mean "usable".
|
||||
# If auth fails (403), we can't use it, so return False.
|
||||
reason = f"HTTP {response.status_code}: {response.reason_phrase}"
|
||||
_HYDRUS_AVAILABLE = False
|
||||
_HYDRUS_UNAVAILABLE_REASON = reason
|
||||
return False, reason
|
||||
except Exception as e:
|
||||
# This catches connection errors from HTTPClient
|
||||
raise e
|
||||
finally:
|
||||
hydrus_logger.setLevel(original_level)
|
||||
http_logger.setLevel(original_level)
|
||||
|
||||
except Exception as exc:
|
||||
reason = str(exc)
|
||||
_HYDRUS_AVAILABLE = False
|
||||
|
||||
@@ -1,21 +1,44 @@
|
||||
"""Shared Library Genesis search and download helpers."""
|
||||
"""Shared Library Genesis search and download helpers.
|
||||
|
||||
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
|
||||
Targets libgen.is/rs/st mirrors and parses the results table directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from urllib.parse import quote, urljoin
|
||||
|
||||
from libgen import search_sync, LibgenError
|
||||
# Optional dependencies
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
BeautifulSoup = None
|
||||
|
||||
LogFn = Optional[Callable[[str], None]]
|
||||
ErrorFn = Optional[Callable[[str], None]]
|
||||
|
||||
DEFAULT_TIMEOUT = 10.0
|
||||
DEFAULT_TIMEOUT = 20.0
|
||||
DEFAULT_LIMIT = 50
|
||||
|
||||
logging.getLogger(__name__).setLevel(logging.WARNING)
|
||||
# Mirrors to try in order
|
||||
MIRRORS = [
|
||||
"https://libgen.is",
|
||||
"https://libgen.rs",
|
||||
"https://libgen.st",
|
||||
"http://libgen.is",
|
||||
"http://libgen.rs",
|
||||
"http://libgen.st",
|
||||
"https://libgen.li", # Different structure, fallback
|
||||
"http://libgen.li",
|
||||
"https://libgen.gl", # Different structure, fallback
|
||||
"http://libgen.gl",
|
||||
]
|
||||
|
||||
logging.getLogger(__name__).setLevel(logging.INFO)
|
||||
|
||||
|
||||
def _call(logger: LogFn, message: str) -> None:
|
||||
@@ -23,168 +46,248 @@ def _call(logger: LogFn, message: str) -> None:
|
||||
logger(message)
|
||||
|
||||
|
||||
def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
|
||||
"""Search Libgen without triggering ads.php requests."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError: # pragma: no cover
|
||||
logging.warning("BeautifulSoup not available; falling back to standard search")
|
||||
class LibgenSearch:
|
||||
"""Robust LibGen searcher."""
|
||||
|
||||
def __init__(self, session: Optional[requests.Session] = None):
|
||||
self.session = session or requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
})
|
||||
|
||||
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
|
||||
"""Search LibGen mirrors."""
|
||||
if not BeautifulSoup:
|
||||
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
|
||||
return []
|
||||
|
||||
for mirror in MIRRORS:
|
||||
try:
|
||||
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||
results = self._search_libgen_li(mirror, query, limit)
|
||||
else:
|
||||
results = self._search_libgen_rs(mirror, query, limit)
|
||||
|
||||
if results:
|
||||
return results
|
||||
except Exception as e:
|
||||
logging.debug(f"Mirror {mirror} failed: {e}")
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
mirrors = [
|
||||
"https://libgen.gl",
|
||||
"https://libgen.vg",
|
||||
"https://libgen.la",
|
||||
"https://libgen.bz",
|
||||
"https://libgen.gs",
|
||||
]
|
||||
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.rs/is/st style mirrors."""
|
||||
# Search URL: /search.php?req=QUERY&res=100&column=def
|
||||
url = f"{mirror}/search.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100, # Request more to filter later
|
||||
"column": "def",
|
||||
"open": 0,
|
||||
"view": "simple",
|
||||
"phrase": 1,
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
# Find the table with results. usually class 'c'
|
||||
table = soup.find("table", {"class": "c"})
|
||||
if not table:
|
||||
# Try finding by structure (table with many rows)
|
||||
tables = soup.find_all("table")
|
||||
for t in tables:
|
||||
if len(t.find_all("tr")) > 5:
|
||||
table = t
|
||||
break
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
session = session or requests.Session()
|
||||
session.headers.setdefault(
|
||||
"User-Agent",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
)
|
||||
|
||||
for mirror in mirrors:
|
||||
try:
|
||||
search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
|
||||
response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
|
||||
if response.status_code != 200:
|
||||
results = []
|
||||
# Skip header row
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
# Columns:
|
||||
# 0: ID
|
||||
# 1: Author(s)
|
||||
# 2: Title
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Pages
|
||||
# 6: Language
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9+: Mirrors
|
||||
|
||||
try:
|
||||
libgen_id = cols[0].get_text(strip=True)
|
||||
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
|
||||
if not authors:
|
||||
authors = [cols[1].get_text(strip=True)]
|
||||
|
||||
title_tag = cols[2].find("a")
|
||||
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
|
||||
|
||||
# Extract MD5 from title link if possible (often in href)
|
||||
# href='book/index.php?md5=...'
|
||||
md5 = ""
|
||||
if title_tag and title_tag.has_attr("href"):
|
||||
href = title_tag["href"]
|
||||
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
|
||||
if match:
|
||||
md5 = match.group(1)
|
||||
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
pages = cols[5].get_text(strip=True)
|
||||
language = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirrors
|
||||
# Usually col 9 is http://library.lol/main/MD5
|
||||
mirror_links = []
|
||||
for i in range(9, len(cols)):
|
||||
a = cols[i].find("a")
|
||||
if a and a.has_attr("href"):
|
||||
mirror_links.append(a["href"])
|
||||
|
||||
# Construct direct download page link (library.lol)
|
||||
# If we have MD5, we can guess it: http://library.lol/main/{md5}
|
||||
if md5:
|
||||
download_link = f"http://library.lol/main/{md5}"
|
||||
elif mirror_links:
|
||||
download_link = mirror_links[0]
|
||||
else:
|
||||
download_link = ""
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
table = soup.find("table", {"class": "catalog"})
|
||||
if table is None:
|
||||
for candidate in soup.find_all("table"):
|
||||
rows = candidate.find_all("tr")
|
||||
if len(rows) > 2:
|
||||
table = candidate
|
||||
break
|
||||
if table is None:
|
||||
logging.debug("[libgen_no_ads] No results table on %s", mirror)
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": ", ".join(authors),
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": md5,
|
||||
"mirror_url": download_link,
|
||||
"cover": "", # Could extract from hover if needed
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.debug(f"Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
rows = table.find_all("tr")[1:]
|
||||
results: List[Dict[str, Any]] = []
|
||||
for row in rows:
|
||||
try:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 9:
|
||||
continue
|
||||
|
||||
size_cell = cells[7]
|
||||
file_link = size_cell.find("a")
|
||||
mirror_link = ""
|
||||
if file_link:
|
||||
href = str(file_link.get("href", ""))
|
||||
if href.startswith("/"):
|
||||
mirror_link = mirror + href
|
||||
elif href:
|
||||
mirror_link = urljoin(mirror, href)
|
||||
|
||||
if not mirror_link:
|
||||
title_link = cells[1].find("a") if len(cells) > 1 else None
|
||||
if title_link:
|
||||
href = str(title_link.get("href", ""))
|
||||
if href.startswith("/"):
|
||||
mirror_link = mirror + href
|
||||
elif href:
|
||||
mirror_link = urljoin(mirror, href)
|
||||
|
||||
if not mirror_link:
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
"id": "",
|
||||
"mirror": mirror_link,
|
||||
"cover": "",
|
||||
"title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
|
||||
"authors": [cells[2].get_text(strip=True)]
|
||||
if len(cells) > 2
|
||||
else ["Unknown"],
|
||||
"publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
|
||||
"year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
|
||||
"pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
|
||||
"language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
|
||||
"size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
|
||||
"extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
|
||||
"isbn": "",
|
||||
}
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
|
||||
continue
|
||||
|
||||
if results:
|
||||
logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
|
||||
return results
|
||||
except Exception as exc: # pragma: no cover - mirror issues
|
||||
logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def format_book_info(book: Any) -> Dict[str, Any]:
|
||||
"""Format Libgen search result into a consistent dictionary."""
|
||||
filesize_bytes = 0
|
||||
size_str = getattr(book, "size", "") or ""
|
||||
if size_str:
|
||||
parts = size_str.strip().split()
|
||||
try:
|
||||
value = float(parts[0])
|
||||
unit = parts[1].upper() if len(parts) > 1 else "B"
|
||||
if unit in {"MB", "M"}:
|
||||
filesize_bytes = int(value * 1024 * 1024)
|
||||
elif unit in {"GB", "G"}:
|
||||
filesize_bytes = int(value * 1024 * 1024 * 1024)
|
||||
elif unit in {"KB", "K"}:
|
||||
filesize_bytes = int(value * 1024)
|
||||
else:
|
||||
filesize_bytes = int(value)
|
||||
except (ValueError, IndexError): # pragma: no cover - defensive
|
||||
filesize_bytes = 0
|
||||
|
||||
title = getattr(book, "title", "") or ""
|
||||
isbn = getattr(book, "isbn", "") or ""
|
||||
if not isbn and title:
|
||||
import re
|
||||
|
||||
match = re.search(
|
||||
r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
|
||||
title,
|
||||
)
|
||||
if match:
|
||||
potential_isbn = match.group(0).strip()
|
||||
if re.search(r"\d{10,13}", potential_isbn):
|
||||
isbn = potential_isbn
|
||||
title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
|
||||
|
||||
authors_value = getattr(book, "authors", None)
|
||||
if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
|
||||
authors_str = ", ".join(str(author) for author in authors_value)
|
||||
else:
|
||||
authors_str = str(authors_value or "Unknown")
|
||||
|
||||
download_links = getattr(book, "download_links", None)
|
||||
mirror_url = None
|
||||
if download_links and getattr(download_links, "get_link", None):
|
||||
mirror_url = download_links.get_link
|
||||
|
||||
return {
|
||||
"title": title or "Unknown",
|
||||
"author": authors_str,
|
||||
"publisher": getattr(book, "publisher", "") or "",
|
||||
"year": getattr(book, "year", "") or "",
|
||||
"pages": getattr(book, "pages", "") or "",
|
||||
"language": getattr(book, "language", "") or "",
|
||||
"filesize": filesize_bytes,
|
||||
"filesize_str": size_str or "Unknown",
|
||||
"extension": getattr(book, "extension", "") or "",
|
||||
"isbn": isbn,
|
||||
"mirror_url": mirror_url,
|
||||
}
|
||||
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.li/gl style mirrors."""
|
||||
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
|
||||
url = f"{mirror}/index.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100,
|
||||
"covers": "on",
|
||||
"filesuns": "all",
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
table = soup.find("table", {"id": "tablelibgen"})
|
||||
if not table:
|
||||
table = soup.find("table", {"class": "table table-striped"})
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results = []
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Structure is different
|
||||
# 0: Cover
|
||||
# 1: Title (with link to file.php?id=...)
|
||||
# 2: Author
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Language
|
||||
# 6: Pages
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9: Mirrors
|
||||
|
||||
title_col = cols[1]
|
||||
title_link = title_col.find("a")
|
||||
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
|
||||
|
||||
# Extract ID from link
|
||||
libgen_id = ""
|
||||
if title_link and title_link.has_attr("href"):
|
||||
href = title_link["href"]
|
||||
# href is usually "file.php?id=..." or "edition.php?id=..."
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
if match:
|
||||
libgen_id = match.group(1)
|
||||
|
||||
authors = cols[2].get_text(strip=True)
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
language = cols[5].get_text(strip=True)
|
||||
pages = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirror link
|
||||
# Usually in col 9 or title link
|
||||
mirror_url = ""
|
||||
if title_link:
|
||||
href = title_link["href"]
|
||||
if href.startswith("/"):
|
||||
mirror_url = mirror + href
|
||||
else:
|
||||
mirror_url = urljoin(mirror, href)
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": authors,
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": "", # .li doesn't show MD5 easily in table
|
||||
"mirror_url": mirror_url,
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def search_libgen(
|
||||
@@ -195,183 +298,160 @@ def search_libgen(
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Search Libgen returning formatted dictionaries with multiple mirrors.
|
||||
|
||||
Uses HTML scraper (search_libgen_no_ads) to find books quickly.
|
||||
Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
|
||||
"""
|
||||
"""Search Libgen using the robust scraper."""
|
||||
searcher = LibgenSearch(session=session)
|
||||
try:
|
||||
_call(log_info, f"[search] Searching Libgen for: {query}")
|
||||
session = session or requests.Session()
|
||||
|
||||
# Use HTML scraper - more reliable and doesn't hang on mirror resolution
|
||||
_call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
|
||||
results: List[Any] = search_libgen_no_ads(query, session=session)
|
||||
|
||||
if not results:
|
||||
_call(log_info, "[search] No results from HTML scraper")
|
||||
return []
|
||||
|
||||
formatted: List[Dict[str, Any]] = []
|
||||
mirrors_list = [
|
||||
"https://libgen.gl",
|
||||
"https://libgen.vg",
|
||||
"https://libgen.la",
|
||||
"https://libgen.bz",
|
||||
"https://libgen.gs",
|
||||
]
|
||||
|
||||
for book in results[:limit]:
|
||||
if isinstance(book, dict):
|
||||
# Result from search_libgen_no_ads (HTML scraper)
|
||||
authors = book.get("authors", ["Unknown"])
|
||||
if isinstance(authors, list):
|
||||
author_value = ", ".join(str(a) for a in authors)
|
||||
else:
|
||||
author_value = str(authors)
|
||||
|
||||
# Extract book ID from mirror URL if available
|
||||
mirror = book.get("mirror", "")
|
||||
book_id = ""
|
||||
if mirror and "/file.php?id=" in mirror:
|
||||
try:
|
||||
book_id = mirror.split("/file.php?id=")[1].split("&")[0]
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
|
||||
# Build list of alternative mirrors based on book ID
|
||||
mirrors_dict = {}
|
||||
if book_id:
|
||||
for mirror_base in mirrors_list:
|
||||
mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
|
||||
elif mirror:
|
||||
# Fallback: use the mirror we found
|
||||
mirrors_dict["primary"] = mirror
|
||||
|
||||
formatted.append(
|
||||
{
|
||||
"title": book.get("title", "Unknown"),
|
||||
"author": author_value,
|
||||
"publisher": book.get("publisher", ""),
|
||||
"year": book.get("year", ""),
|
||||
"pages": book.get("pages", ""),
|
||||
"language": book.get("language", ""),
|
||||
"filesize": 0,
|
||||
"filesize_str": book.get("size", "Unknown"),
|
||||
"extension": book.get("extension", ""),
|
||||
"isbn": book.get("isbn", ""),
|
||||
"mirror_url": mirror, # Primary mirror
|
||||
"mirrors": mirrors_dict, # Alternative mirrors
|
||||
"book_id": book_id,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Fallback: try to format as book object
|
||||
try:
|
||||
formatted.append(format_book_info(book))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_call(log_info, f"[search] Found {len(formatted)} result(s)")
|
||||
return formatted
|
||||
except LibgenError as exc:
|
||||
_call(log_error, f"[search] Libgen error: {exc}")
|
||||
return []
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
_call(log_error, f"[search] Error: {exc}")
|
||||
results = searcher.search(query, limit=limit)
|
||||
_call(log_info, f"[libgen] Found {len(results)} results")
|
||||
return results
|
||||
except Exception as e:
|
||||
_call(log_error, f"[libgen] Search failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _resolve_download_url(
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
log_info: LogFn = None
|
||||
) -> Optional[str]:
|
||||
"""Resolve the final download URL by following the LibGen chain."""
|
||||
current_url = url
|
||||
visited = set()
|
||||
|
||||
# Max hops to prevent infinite loops
|
||||
for _ in range(6):
|
||||
if current_url in visited:
|
||||
break
|
||||
visited.add(current_url)
|
||||
|
||||
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||
|
||||
# Simple heuristic: if it looks like a file, return it
|
||||
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
|
||||
return current_url
|
||||
|
||||
try:
|
||||
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
|
||||
# So we'll just GET with stream=True to peek headers/content without downloading everything
|
||||
with session.get(current_url, stream=True, timeout=30) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = resp.headers.get("Content-Type", "").lower()
|
||||
|
||||
if "text/html" not in ct:
|
||||
# It's a binary file
|
||||
return current_url
|
||||
|
||||
# It's HTML, read content
|
||||
content = resp.text
|
||||
except Exception as e:
|
||||
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
# 1. Check for "GET" link (library.lol / ads.php style)
|
||||
# Usually <h2>GET</h2> inside <a> or just text "GET"
|
||||
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if not get_link:
|
||||
# Try finding <a> containing <h2>GET</h2>
|
||||
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if h2_get and h2_get.parent.name == "a":
|
||||
get_link = h2_get.parent
|
||||
|
||||
if get_link and get_link.has_attr("href"):
|
||||
return urljoin(current_url, get_link["href"])
|
||||
|
||||
# 2. Check for "series.php" -> "edition.php"
|
||||
if "series.php" in current_url:
|
||||
# Find first edition link
|
||||
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
|
||||
if edition_link:
|
||||
current_url = urljoin(current_url, edition_link["href"])
|
||||
continue
|
||||
|
||||
# 3. Check for "edition.php" -> "file.php"
|
||||
if "edition.php" in current_url:
|
||||
file_link = soup.find("a", href=re.compile(r"file\.php"))
|
||||
if file_link:
|
||||
current_url = urljoin(current_url, file_link["href"])
|
||||
continue
|
||||
|
||||
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
|
||||
if "file.php" in current_url:
|
||||
# Look for link with title="libgen" or text "Libgen"
|
||||
libgen_link = soup.find("a", title="libgen")
|
||||
if not libgen_link:
|
||||
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
|
||||
|
||||
if libgen_link and libgen_link.has_attr("href"):
|
||||
current_url = urljoin(current_url, libgen_link["href"])
|
||||
continue
|
||||
|
||||
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
|
||||
if "ads.php" in current_url:
|
||||
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
|
||||
if get_php_link:
|
||||
return urljoin(current_url, get_php_link["href"])
|
||||
|
||||
# 6. Library.lol / generic fallback
|
||||
for text in ["Cloudflare", "IPFS.io", "Infura"]:
|
||||
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
|
||||
if link and link.has_attr("href"):
|
||||
return urljoin(current_url, link["href"])
|
||||
|
||||
# If we found nothing new, stop
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: str | Path,
|
||||
output_path: Path,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> bool:
|
||||
"""Download a Libgen file and write it to disk.
|
||||
|
||||
Handles Libgen redirects and ensures proper file download by:
|
||||
- Following all redirects (default behavior)
|
||||
- Setting User-Agent header (required by some mirrors)
|
||||
- Validating that we're downloading binary content, not HTML
|
||||
- Attempting alternative download method if HTML is returned
|
||||
"""
|
||||
"""Download file from a LibGen mirror URL."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
_call(log_info, f"[download] Downloading from mirror: {mirror_url}")
|
||||
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
|
||||
|
||||
# Ensure session has proper headers for Libgen
|
||||
if 'User-Agent' not in session.headers:
|
||||
session.headers['User-Agent'] = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
)
|
||||
download_url = _resolve_download_url(session, mirror_url, log_info)
|
||||
|
||||
# Download with redirects enabled (default) and referer
|
||||
session.headers['Referer'] = 'https://libgen.gs/'
|
||||
response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if we got HTML instead of a file (common Libgen issue)
|
||||
content_type = response.headers.get('content-type', '').lower()
|
||||
if 'text/html' in content_type:
|
||||
_call(log_error, f"[download] Server returned HTML. Trying alternative method...")
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False
|
||||
|
||||
# Try to extract file ID and use alternative CDN
|
||||
try:
|
||||
# Parse the HTML to extract MD5 or file ID
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Look for download link in the HTML
|
||||
# Common patterns: md5 hash in form, or direct link in anchor tags
|
||||
download_link = None
|
||||
|
||||
# Try to find forms that might contain download functionality
|
||||
forms = soup.find_all('form')
|
||||
for form in forms:
|
||||
action = form.get('action', '')
|
||||
if 'download' in action.lower() or 'get' in action.lower():
|
||||
download_link = action
|
||||
break
|
||||
|
||||
if not download_link:
|
||||
_call(log_error, f"[download] Could not extract alternative download link from HTML")
|
||||
return False
|
||||
|
||||
_call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
|
||||
# Try downloading from alternative link
|
||||
response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
|
||||
response2.raise_for_status()
|
||||
response = response2 # Use the new response
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
# Download the actual file
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
except Exception as alt_error:
|
||||
_call(log_error, f"[download] Alternative method failed: {alt_error}")
|
||||
# Verify it's not HTML (error page)
|
||||
ct = r.headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False
|
||||
|
||||
total_size = int(response.headers.get("content-length", 0))
|
||||
downloaded = 0
|
||||
|
||||
with open(output_path, "wb") as handle:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if not chunk:
|
||||
continue
|
||||
handle.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
if total_size > 0:
|
||||
percent = downloaded / total_size * 100
|
||||
_call(
|
||||
log_info,
|
||||
f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
|
||||
)
|
||||
|
||||
_call(log_info, f"[download] Downloaded successfully to: {output_path}")
|
||||
total_size = int(r.headers.get("content-length", 0))
|
||||
downloaded = 0
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
# Optional: progress logging
|
||||
|
||||
_call(log_info, f"[download] Saved to {output_path}")
|
||||
return True
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
_call(log_error, f"[download] Error: {exc}")
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False
|
||||
|
||||
@@ -238,7 +238,7 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
|
||||
# Sanitize title for M3U (remove newlines)
|
||||
safe_title = title.replace("\n", " ").replace("\r", "")
|
||||
# M3U format: #EXTM3U\n#EXTINF:-1,Title\nURL
|
||||
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}\n"
|
||||
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}"
|
||||
target = f"memory://{m3u_content}"
|
||||
else:
|
||||
target = file_url
|
||||
@@ -256,9 +256,8 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
|
||||
|
||||
# Command 3: Set title (metadata for display) - still useful for window title
|
||||
if title:
|
||||
safe_title_prop = title.replace('"', '\\"')
|
||||
cmd_title = {
|
||||
"command": ["set_property", "force-media-title", safe_title_prop],
|
||||
"command": ["set_property", "force-media-title", title],
|
||||
"request_id": 2
|
||||
}
|
||||
client.send_command(cmd_title)
|
||||
|
||||
@@ -510,6 +510,7 @@ class ResultTable:
|
||||
('title | name | filename', ['title', 'name', 'filename']),
|
||||
('ext', ['ext']),
|
||||
('origin | source | store', ['origin', 'source', 'store']),
|
||||
('size | size_bytes', ['size', 'size_bytes']),
|
||||
('type | media_kind | kind', ['type', 'media_kind', 'kind']),
|
||||
('tags | tag_summary', ['tags', 'tag_summary']),
|
||||
('detail | description', ['detail', 'description']),
|
||||
|
||||
44
test_ssl.py
Normal file
44
test_ssl.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import httpx
|
||||
import ssl
|
||||
|
||||
def test_libgen_ssl():
|
||||
url = "https://libgen.li/series.php?id=577851"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
|
||||
print(f"Testing connection to {url} with httpx...")
|
||||
try:
|
||||
with httpx.Client(verify=True, headers=headers, timeout=30.0) as client:
|
||||
resp = client.get(url)
|
||||
print(f"Status: {resp.status_code}")
|
||||
print(f"Content length: {len(resp.content)}")
|
||||
except Exception as e:
|
||||
print(f"Error with default settings: {e}")
|
||||
|
||||
print("\nTesting with http2=True...")
|
||||
try:
|
||||
with httpx.Client(verify=True, headers=headers, timeout=30.0, http2=True) as client:
|
||||
resp = client.get(url)
|
||||
print(f"Status: {resp.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error with http2=True: {e}")
|
||||
|
||||
print("\nTesting with verify=False...")
|
||||
try:
|
||||
with httpx.Client(verify=False, headers=headers, timeout=30.0) as client:
|
||||
resp = client.get(url)
|
||||
print(f"Status: {resp.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error with verify=False: {e}")
|
||||
|
||||
import requests
|
||||
print("\nTesting with requests (HTTP/1.1)...")
|
||||
try:
|
||||
resp = requests.get(url, headers=headers, timeout=30.0)
|
||||
print(f"Status: {resp.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error with requests: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_libgen_ssl()
|
||||
Reference in New Issue
Block a user