jkjnkjkllkjjk

This commit is contained in:
nose
2025-11-30 11:39:04 -08:00
parent ed417c8200
commit 7a13af9a1f
15 changed files with 1150 additions and 363 deletions

76
CLI.py
View File

@@ -676,8 +676,8 @@ def _create_cmdlet_cli():
try:
from helper.hydrus import get_client
get_client(config) # Pre-acquire and cache session key
debug("✓ Hydrus session key acquired")
# get_client(config) # Pre-acquire and cache session key
# debug("✓ Hydrus session key acquired")
except RuntimeError as e:
# Hydrus is not available - this is expected and normal
# Don't show a message, just continue without it
@@ -697,6 +697,78 @@ def _create_cmdlet_cli():
initialize_hydrus_health_check(config)
initialize_matrix_health_check(config)
initialize_local_library_scan(config)
# --- Startup File Counts ---
# Count Local Files
try:
from helper.file_storage import LocalStorageBackend
from config import get_local_storage_path
storage_path = get_local_storage_path(config)
if storage_path:
# Use LocalStorageBackend to perform the search as requested
# Pass a large limit to get all files
storage = LocalStorageBackend(location=storage_path)
local_files = storage.search("*", limit=100000)
print(f"Local: {len(local_files)}")
except Exception as e:
debug(f"⚠ Could not count local files: {e}")
# Count Hydrus Files (if available)
from hydrus_health_check import is_hydrus_available
if is_hydrus_available():
try:
from helper.hydrus import get_client
client = get_client(config)
# Hydrus search for all files
# search_files returns IDs.
response = client.search_files(["system:everything"])
hydrus_ids = response.get("file_ids", [])
print(f"Hydrus: {len(hydrus_ids)}")
except Exception as e:
debug(f"⚠ Could not count Hydrus files: {e}")
# Count Debrid Magnets (if available)
try:
from config import get_api_key
from helper.alldebrid import AllDebridClient
api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid")
if api_key:
# Use AllDebridClient to get magnets
# We can use magnet_status with ID or just list active magnets if there's an endpoint
# The magnet/status endpoint without ID returns all magnets
# But helper/alldebrid.py magnet_status requires ID.
# Let's check if we can use the client directly to call magnet/status without ID
# Or if there is a method for it.
# Looking at alldebrid.py, magnet_status takes magnet_id.
# But the API docs say /magnet/status returns all magnets if no ID provided?
# Actually, usually /magnet/status requires ID or 'all' or something.
# Let's try to use the client's _request method if possible, or instantiate client.
# We'll instantiate client and try to list magnets.
# Since magnet_status in helper requires ID, we might need to bypass it or add a method.
# But wait, let's check if we can just use the raw request via client.
client = AllDebridClient(api_key)
# The helper class doesn't expose a "list all" method easily,
# but we can try calling _request directly if we access it, or add a method.
# Accessing protected member _request is okay for this CLI script.
# API: /magnet/status
resp = client._request('magnet/status')
if resp.get('status') == 'success':
data = resp.get('data', {})
magnets = data.get('magnets', [])
if isinstance(magnets, list):
print(f"Debrid: {len(magnets)}")
elif isinstance(magnets, dict):
# Sometimes it returns a dict if single item? Or dict of magnets?
print(f"Debrid: {len(magnets)}")
except Exception as e:
# Don't show error if just not configured or failed
# debug(f"⚠ Could not count Debrid magnets: {e}")
pass
except Exception as e:
debug(f"⚠ Could not check service availability: {e}")
except Exception:

View File

@@ -209,7 +209,7 @@ class SharedArgs:
STORAGE = CmdletArg(
"storage",
type="enum",
choices=["hydrus", "local", "debrid", "ftp"],
choices=["hydrus", "local", "debrid", "ftp", "matrix"],
required=False,
description="Storage location or destination for saving/uploading files.",
alias="s",
@@ -268,6 +268,7 @@ class SharedArgs:
'hydrus': Path.home() / ".hydrus" / "client_files",
'debrid': Path.home() / "Debrid",
'ftp': Path.home() / "FTP",
'matrix': Path.home() / "Matrix", # Placeholder, not used for upload path
}
if storage_value is None:

View File

@@ -542,7 +542,35 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if provider_name == "0x0":
dl_args.extend(["-storage", "0x0"])
return dl_module._run(result, dl_args, config)
# Capture results from download-data so we can add them to DB
captured_results = []
original_emit = ctx.emit
def capture_emit(obj):
captured_results.append(obj)
original_emit(obj)
ctx.emit = capture_emit
try:
ret_code = dl_module._run(result, dl_args, config)
finally:
ctx.emit = original_emit
if ret_code != 0:
return ret_code
# Process the downloaded files recursively to add them to DB
if captured_results:
log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr)
success_count = 0
for res in captured_results:
# Recursively call add-file with the downloaded result
if _run(res, _args, config) == 0:
success_count += 1
return 0 if success_count > 0 else 1
return 0
if media_path is None:
log("File path could not be resolved")
@@ -609,13 +637,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
# Handle storage-based operations (location is not None here)
valid_locations = {'hydrus', 'local'}
valid_locations = {'hydrus', 'local', 'matrix'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
if not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path")
return 1
if location == 'local':
@@ -704,6 +732,36 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return exit_code
elif location == 'matrix':
log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr)
try:
result_url = storage["matrix"].upload(media_path, config=config)
log(f"Matrix: {result_url}", file=sys.stderr)
result_dict = create_pipe_object_result(
source='matrix',
identifier=result_url,
file_path=str(media_path),
cmdlet_name='add-file',
title=media_path.name,
target=result_url
)
ctx.emit(result_dict)
except Exception as exc:
log(f"Failed: {exc}", file=sys.stderr)
return 1
if delete_after_upload:
try:
media_path.unlink()
_cleanup_sidecar_files(media_path)
log(f"✅ Deleted file and sidecar", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
return 0
# location == 'hydrus'
# Compute file hash to check if already in Hydrus
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)

View File

@@ -1595,6 +1595,25 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
debug(f"No downloadable URLs found")
return 1
# Deduplicate URLs while preserving order
unique_urls = []
seen_keys = set()
for u in urls_to_download:
key = None
if isinstance(u, dict):
key = u.get('url') or u.get('link') or u.get('target') or u.get('source_url')
if not key:
key = str(u)
else:
key = str(u)
if key and key not in seen_keys:
seen_keys.add(key)
unique_urls.append(u)
urls_to_download = unique_urls
debug(f"Processing {len(urls_to_download)} URL(s)")
for i, u in enumerate(urls_to_download, 1):
if isinstance(u, dict):
@@ -1749,6 +1768,108 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
debug(f" ✗ Error while borrowing: {e}")
exit_code = 1
continue
except Exception as e:
# Check for BookNotAvailableError (imported dynamically or by name)
if type(e).__name__ == 'BookNotAvailableError':
debug(f" ⚠ Book is waitlisted/unavailable on Archive.org")
# Fallback to LibGen if ISBN is available
isbn = url.get('isbn')
if isbn:
debug(f" ▶ Falling back to LibGen search for ISBN: {isbn}")
from helper.search_provider import LibGenProvider
provider = LibGenProvider(config)
# Search specifically by ISBN
results = provider.search(f"isbn:{isbn}", limit=1)
if results:
debug(f" ✓ Found {len(results)} result(s) on LibGen")
# Use the first result
libgen_result = results[0]
# Construct a new URL entry for the main loop to process
# We can't easily inject into the loop, so we'll process it here
# LibGen results from provider have 'target' as mirror URL or libgen:ID
target = libgen_result.target
debug(f" → Downloading from LibGen: {libgen_result.title}")
# We need to use the LibGen download logic.
# The easiest way is to call the UnifiedBookDownloader directly or
# delegate to the 'libgen' origin handler if we can.
# But we are inside the loop.
# Let's use UnifiedBookDownloader directly to download to final_output_dir
from helper.unified_book_downloader import UnifiedBookDownloader
downloader = UnifiedBookDownloader(config)
# The target might be a mirror URL or libgen:ID
# UnifiedBookDownloader.download_book expects a book dict or similar?
# Actually, let's look at how 'libgen' origin is handled in the main loop.
# It uses urls_to_download.append(url_entry).
# We can just process this result right here.
# The provider result has full_metadata which is the book dict.
book_data = libgen_result.full_metadata
# Download the book
# We need to find a working mirror
mirrors = book_data.get('mirrors', {})
download_url = book_data.get('mirror_url')
if not download_url and mirrors:
# Pick first mirror
download_url = next(iter(mirrors.values()))
if download_url:
debug(f" → Mirror: {download_url}")
# Use helper.download.download_media or similar?
# UnifiedBookDownloader has download_book(book, output_dir)
# Reconstruct book dict for downloader
# It expects: title, author, year, extension, mirrors, etc.
# book_data should have most of it.
filepath = downloader.download_book(book_data, final_output_dir)
if filepath:
debug(f" ✓ Successfully downloaded from LibGen: {filepath}")
downloaded_files.append(str(filepath))
# Emit result
file_hash = _compute_file_hash(filepath)
emit_tags = ['book', 'libgen']
if isbn: emit_tags.append(f'isbn:{isbn}')
pipe_obj = create_pipe_object_result(
source='libgen',
identifier=book_data.get('md5', 'unknown'),
file_path=str(filepath),
cmdlet_name='download-data',
title=libgen_result.title,
file_hash=file_hash,
tags=emit_tags,
source_url=download_url
)
pipeline_context.emit(pipe_obj)
exit_code = 0
continue # Success!
else:
debug(f" ✗ Failed to download from LibGen")
else:
debug(f" ✗ No download URL found in LibGen result")
else:
debug(f" ✗ No results found on LibGen for ISBN: {isbn}")
else:
debug(f" ⚠ No ISBN available for LibGen fallback")
# If fallback failed or wasn't possible, abort
debug(f" ✗ Unable to borrow from Archive.org and LibGen fallback failed.")
exit_code = 1
continue
else:
# Re-raise other exceptions
raise e
debug(f" → Extracting page information...")
# Try both URL formats
@@ -1806,8 +1927,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
import img2pdf
debug(f" → Merging pages into PDF...")
filename = title if title else f"book_{book_id_str}"
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
# Use title from result item if available, otherwise fallback to extracted title
filename_title = title_val if title_val and title_val != 'Unknown Book' else (title if title else f"book_{book_id_str}")
# Allow underscores and spaces
filename = "".join(c for c in filename_title if c.isalnum() or c in (' ', '.', '-', '_'))[:100]
output_path = Path(final_output_dir) / f"{filename}.pdf"
# Make unique filename if needed
@@ -1828,6 +1951,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
file_hash = _compute_file_hash(output_path)
# Build tags including ISBN if available
emit_tags = ['book', 'borrowed', 'pdf']
if title_val and title_val != 'Unknown Book':
emit_tags.append(f'title:{title_val}')
isbn_tag = url.get('isbn')
if isbn_tag:
emit_tags.append(f'isbn:{isbn_tag}')
@@ -2343,6 +2468,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
debug(f"Downloading: {url}")
# Special handling for LibGen URLs
if "libgen" in url or "library.lol" in url:
debug(f"🔄 Detected LibGen URL, using specialized downloader: {url}")
try:
from helper.libgen_service import download_from_mirror, search_libgen
# If it's a search/details page, try to find the download link
# e.g. https://libgen.li/series.php?id=577851
# We can try to extract the ID and search for it, or just try to download if it's a mirror
# Extract ID if possible, BUT skip for series/edition pages which are handled by download_from_mirror
libgen_id = ""
results = []
if "series.php" not in url and "edition.php" not in url:
match = re.search(r"id=(\d+)", url)
if match:
libgen_id = match.group(1)
debug(f" Extracted LibGen ID: {libgen_id}")
# Search by ID to get fresh mirror links
results = search_libgen(libgen_id, limit=1)
if results:
# Use the mirror URL from the result
mirror_url = results[0].get("mirror_url")
if mirror_url:
debug(f" Resolved to mirror URL: {mirror_url}")
url = mirror_url
# Attempt download with specialized function
# We need a filename. LibGen doesn't always give one easily in the URL.
# download_from_mirror expects a full path.
# We'll try to guess a filename or use a temp one and rename later?
# Actually download_from_mirror writes to output_path.
# Let's try to get metadata to make a good filename
filename = "libgen_download.bin"
if libgen_id and results:
title = results[0].get("title", "book")
ext = results[0].get("extension", "pdf")
# Sanitize filename
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
filename = f"{safe_title}.{ext}"
elif "series.php" in url:
filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf"
output_path = final_output_dir / filename
if download_from_mirror(url, output_path, log_info=debug, log_error=log):
debug(f"✓ LibGen download successful: {output_path}")
# Create a result object
info = {
"id": libgen_id or "libgen",
"title": filename,
"webpage_url": url,
"ext": output_path.suffix.lstrip("."),
}
# Emit result
pipeline_context.emit(create_pipe_object_result(
source="libgen",
identifier=libgen_id or "libgen",
file_path=str(output_path),
cmdlet_name="download-data",
title=filename,
extra=info
))
downloaded_files.append(str(output_path))
continue
else:
debug("⚠ LibGen specialized download failed, falling back to generic downloader...")
except Exception as e:
debug(f"⚠ LibGen specialized download error: {e}")
# Fall through to generic downloader
# Resolve cookies path if specified
final_cookies_path = None
if cookies_path:

103
cmdlets/matrix.py Normal file
View File

@@ -0,0 +1,103 @@
from typing import Any, Dict, Sequence, List
import sys
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log, debug
from result_table import ResultTable
from helper.file_storage import MatrixStorageBackend
from config import save_config, load_config
import pipeline as ctx
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
# Initialize backend
backend = MatrixStorageBackend()
# Get current default room
matrix_conf = config.get('storage', {}).get('matrix', {})
current_room_id = matrix_conf.get('room_id')
# Fetch rooms
debug("Fetching joined rooms from Matrix...")
rooms = backend.list_rooms(config)
if not rooms:
debug("No joined rooms found or Matrix not configured.")
return 1
# Handle selection if provided
selection = parsed.get("selection")
if selection:
new_room_id = None
selected_room_name = None
# Try as index (1-based)
try:
idx = int(selection) - 1
if 0 <= idx < len(rooms):
selected_room = rooms[idx]
new_room_id = selected_room['id']
selected_room_name = selected_room['name']
except ValueError:
# Try as Room ID
for room in rooms:
if room['id'] == selection:
new_room_id = selection
selected_room_name = room['name']
break
if new_room_id:
# Update config
# Load fresh config from disk to avoid saving runtime objects (like WorkerManager)
disk_config = load_config()
if 'storage' not in disk_config: disk_config['storage'] = {}
if 'matrix' not in disk_config['storage']: disk_config['storage']['matrix'] = {}
disk_config['storage']['matrix']['room_id'] = new_room_id
save_config(disk_config)
debug(f"Default Matrix room set to: {selected_room_name} ({new_room_id})")
current_room_id = new_room_id
else:
debug(f"Invalid selection: {selection}")
return 1
# Display table
table = ResultTable("Matrix Rooms")
for i, room in enumerate(rooms):
is_default = (room['id'] == current_room_id)
row = table.add_row()
row.add_column("Default", "*" if is_default else "")
row.add_column("Name", room['name'])
row.add_column("ID", room['id'])
# Set selection args so user can type @N to select
# This will run .matrix N
table.set_row_selection_args(i, [str(i + 1)])
table.set_source_command(".matrix")
# Register results
ctx.set_last_result_table_overlay(table, rooms)
ctx.set_current_stage_table(table)
print(table)
return 0
CMDLET = Cmdlet(
name=".matrix",
aliases=["matrix", "rooms"],
summary="List and select default Matrix room",
usage=".matrix [selection]",
args=[
CmdletArg(
name="selection",
type="string",
description="Index or ID of the room to set as default",
required=False
)
],
exec=_run
)

View File

@@ -70,12 +70,15 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
return title or filename or "Unknown"
def _queue_items(items: List[Any], clear_first: bool = False) -> None:
def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
"""Queue items to MPV, starting it if necessary.
Args:
items: List of items to queue
clear_first: If True, the first item will replace the current playlist
Returns:
True if MPV was started, False if items were queued via IPC.
"""
for i, item in enumerate(items):
# Extract URL/Path
@@ -115,7 +118,7 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
# MPV not running (or died)
# Start MPV with remaining items
_start_mpv(items[i:])
return
return True
elif resp.get("error") == "success":
# Also set property for good measure
if title:
@@ -125,14 +128,30 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> None:
else:
error_msg = str(resp.get('error'))
debug(f"Failed to queue item: {error_msg}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Manage and play items in the MPV playlist via IPC."""
parsed = parse_cmdlet_args(args, CMDLET)
# Initialize mpv_started flag
mpv_started = False
# Handle positional index argument if provided
index_arg = parsed.get("index")
url_arg = parsed.get("url")
# If index_arg is provided but is not an integer, treat it as a URL
# This allows .pipe "http://..." without -url flag
if index_arg is not None:
try:
int(index_arg)
except ValueError:
# Not an integer, treat as URL if url_arg is not set
if not url_arg:
url_arg = index_arg
index_arg = None
clear_mode = parsed.get("clear")
list_mode = parsed.get("list")
@@ -141,6 +160,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
save_mode = parsed.get("save")
load_mode = parsed.get("load")
# Handle URL queuing
mpv_started = False
if url_arg:
mpv_started = _queue_items([url_arg])
# If we just queued a URL, we probably want to list the playlist to show it was added
# unless other flags are present
if not (clear_mode or play_mode or pause_mode or save_mode or load_mode):
list_mode = True
# Handle Save Playlist
if save_mode:
playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}"
@@ -296,7 +324,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Handle piped input (add to playlist)
# Skip adding if -list is specified (user just wants to see current playlist)
if result and not list_mode:
if result and not list_mode and not url_arg:
# If result is a list of items, add them to playlist
items_to_add = []
if isinstance(result, list):
@@ -304,7 +332,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
elif isinstance(result, dict):
items_to_add = [result]
_queue_items(items_to_add)
if _queue_items(items_to_add):
mpv_started = True
if items_to_add:
# If we added items, we might want to play the first one if nothing is playing?
@@ -315,6 +344,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
items = _get_playlist()
if items is None:
if mpv_started:
# MPV was just started, so we can't list items yet.
# But we know it's running (or trying to start), so don't start another instance.
return 0
debug("MPV is not running. Starting new instance...")
_start_mpv([])
return 0
@@ -369,7 +403,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# List items (Default action or after clear)
if list_mode or index_arg is None:
if list_mode or (index_arg is None and not url_arg):
if not items:
debug("MPV playlist is empty.")
return 0
@@ -451,12 +485,18 @@ CMDLET = Cmdlet(
name=".pipe",
aliases=["pipe", "playlist", "queue", "ls-pipe"],
summary="Manage and play items in the MPV playlist via IPC",
usage=".pipe [index] [-clear]",
usage=".pipe [index|url] [-clear] [-url URL]",
args=[
CmdletArg(
name="index",
type="int",
description="Index of item to play or clear",
type="string", # Changed to string to allow URL detection
description="Index of item to play/clear, or URL to queue",
required=False
),
CmdletArg(
name="url",
type="string",
description="URL to queue",
required=False
),
CmdletArg(

View File

@@ -141,8 +141,33 @@ def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
return payload
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
store_label = payload.get("origin") or payload.get("source") or origin_value
# Handle extension
extension = payload.get("ext", "")
if not extension and title:
path_obj = Path(str(title))
if path_obj.suffix:
extension = path_obj.suffix.lstrip('.')
title = path_obj.stem
# Handle size
size_val = payload.get("size") or payload.get("size_bytes")
size_str = ""
if size_val:
try:
size_bytes = int(size_val)
size_mb = size_bytes / (1024 * 1024)
size_str = f"{size_mb:.1f} MB"
except (ValueError, TypeError):
size_str = str(size_val)
normalized = dict(payload)
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
normalized["columns"] = [
("Title", str(title)),
("Ext", str(extension)),
("Store", str(store_label)),
("Size", str(size_str))
]
return normalized

19
debug_db.py Normal file
View File

@@ -0,0 +1,19 @@
import sqlite3
import os
from pathlib import Path
db_path = Path("C:/Media Machina/.downlow_library.db")
if not db_path.exists():
print(f"DB not found at {db_path}")
else:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("Files in DB:")
cursor.execute("SELECT id, file_path FROM files")
for row in cursor.fetchall():
print(f"ID: {row[0]}, Path: {row[1]}")
conn.close()

View File

@@ -75,6 +75,11 @@ def credential_openlibrary(config: Dict[str, Any]) -> Tuple[Optional[str], Optio
return email, password
class BookNotAvailableError(Exception):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
pass
def display_error(response: requests.Response, message: str) -> None:
"""Display error and exit."""
log(message, file=sys.stderr)
@@ -133,9 +138,11 @@ def loan(session: requests.Session, book_id: str, verbose: bool = True) -> reque
if response.status_code == 400:
try:
if response.json()["error"] == "This book is not available to borrow at this time. Please try again later.":
debug("This book doesn't need to be borrowed")
return session
debug("Book is not available for borrowing (waitlisted or in use)")
raise BookNotAvailableError("Book is waitlisted or in use")
display_error(response, "Something went wrong when trying to borrow the book.")
except BookNotAvailableError:
raise
except:
display_error(response, "The book cannot be borrowed")
@@ -182,11 +189,21 @@ def get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str],
# Try to extract the infos URL from the response
try:
# Look for the "url" field in the response
if '"url":"' not in r:
# Look for the "url" field in the response using regex
# Matches "url":"//archive.org/..."
import re
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
if not match:
raise ValueError("No 'url' field found in response")
infos_url = "https:" + r.split('"url":"')[1].split('"')[0].replace("\\u0026", "&")
except (IndexError, ValueError) as e:
url_path = match.group(1)
if url_path.startswith("//"):
infos_url = "https:" + url_path
else:
infos_url = url_path
infos_url = infos_url.replace("\\u0026", "&")
except (IndexError, ValueError, AttributeError) as e:
# If URL extraction fails, raise with better error message
raise RuntimeError(f"Failed to extract book info URL from response: {e}")

View File

@@ -27,6 +27,7 @@ import requests
import re
from helper.logger import log, debug
from helper.utils_constant import mime_maps
class StorageBackend(ABC):
@@ -707,6 +708,18 @@ class HydrusStorageBackend(StorageBackend):
if title != f"Hydrus File {file_id}":
break
# Resolve extension from MIME type
mime_type = meta.get("mime")
ext = ""
if mime_type:
for category in mime_maps.values():
for ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = info.get("ext", "").lstrip('.')
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
@@ -726,7 +739,8 @@ class HydrusStorageBackend(StorageBackend):
"origin": "hydrus",
"tags": all_tags,
"file_id": file_id,
"mime": meta.get("mime"),
"mime": mime_type,
"ext": ext,
})
else:
# Free-form search: check if search terms match the title or tags
@@ -758,7 +772,8 @@ class HydrusStorageBackend(StorageBackend):
"origin": "hydrus",
"tags": all_tags,
"file_id": file_id,
"mime": meta.get("mime"),
"mime": mime_type,
"ext": ext,
})
debug(f"Found {len(results)} result(s)")
@@ -971,6 +986,60 @@ class MatrixStorageBackend(StorageBackend):
def get_name(self) -> str:
return "matrix"
def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
"""List joined rooms with their names."""
matrix_conf = config.get('storage', {}).get('matrix', {})
homeserver = matrix_conf.get('homeserver')
access_token = matrix_conf.get('access_token')
if not homeserver or not access_token:
return []
if not homeserver.startswith('http'):
homeserver = f"https://{homeserver}"
headers = {"Authorization": f"Bearer {access_token}"}
try:
# Get joined rooms
resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
if resp.status_code != 200:
return []
room_ids = resp.json().get('joined_rooms', [])
rooms = []
for rid in room_ids:
# Try to get room name
name = "Unknown Room"
try:
# Get state event for name
name_resp = requests.get(
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name",
headers=headers,
timeout=2
)
if name_resp.status_code == 200:
name = name_resp.json().get('name', name)
else:
# Try canonical alias
alias_resp = requests.get(
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias",
headers=headers,
timeout=2
)
if alias_resp.status_code == 200:
name = alias_resp.json().get('alias', name)
except Exception:
pass
rooms.append({'id': rid, 'name': name})
return rooms
except Exception as e:
log(f"Error listing Matrix rooms: {e}", file=sys.stderr)
return []
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Matrix room.
@@ -993,8 +1062,8 @@ class MatrixStorageBackend(StorageBackend):
access_token = matrix_conf.get('access_token')
room_id = matrix_conf.get('room_id')
if not homeserver or not room_id:
raise ValueError("Matrix homeserver and room_id required")
if not homeserver:
raise ValueError("Matrix homeserver required")
# Ensure homeserver has protocol
if not homeserver.startswith('http'):
@@ -1004,6 +1073,39 @@ class MatrixStorageBackend(StorageBackend):
if not access_token:
raise ValueError("Matrix access_token required (login not yet implemented)")
# Handle room selection if not provided
if not room_id:
log("No room_id configured. Fetching joined rooms...", file=sys.stderr)
rooms = self.list_rooms(config)
if not rooms:
raise ValueError("No joined rooms found or failed to fetch rooms.")
from result_table import ResultTable
table = ResultTable("Matrix Rooms")
for i, room in enumerate(rooms):
row = table.add_row()
row.add_column("#", str(i + 1))
row.add_column("Name", room['name'])
row.add_column("ID", room['id'])
print(table)
# Simple interactive selection
try:
selection = input("Select room # to upload to: ")
idx = int(selection) - 1
if 0 <= idx < len(rooms):
room_id = rooms[idx]['id']
log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr)
else:
raise ValueError("Invalid selection")
except Exception:
raise ValueError("Invalid room selection")
if not room_id:
raise ValueError("Matrix room_id required")
# 1. Upload Media
upload_url = f"{homeserver}/_matrix/media/r3/upload"
headers = {

View File

@@ -1337,19 +1337,44 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool,
timeout = 10.0
try:
client = HydrusClient(url, access_key, timeout)
# Lightweight probe: get services
# Temporarily suppress error logging for health checks (expected to fail if Hydrus unavailable)
hydrus_logger = logging.getLogger("helper.hydrus")
original_level = hydrus_logger.level
hydrus_logger.setLevel(logging.CRITICAL) # Suppress errors/warnings
# Use HTTPClient directly to avoid session key logic and reduce retries
# This prevents log spam when Hydrus is offline (avoiding 3 retries x 2 requests)
from helper.http_client import HTTPClient
probe_url = f"{url.rstrip('/')}/get_services"
headers = {}
if access_key:
headers["Hydrus-Client-API-Access-Key"] = access_key
# Suppress HTTPClient logging during probe to avoid "Request failed" logs on startup
http_logger = logging.getLogger("helper.http_client")
original_level = http_logger.level
http_logger.setLevel(logging.CRITICAL)
try:
_ = client.get_services()
# Use retries=1 (single attempt, no retry) to fail fast
with HTTPClient(timeout=timeout, retries=1, headers=headers, verify_ssl=False) as http:
try:
response = http.get(probe_url)
if response.status_code == 200:
_HYDRUS_AVAILABLE = True
_HYDRUS_UNAVAILABLE_REASON = None
return True, None
else:
# Even if we get a 4xx/5xx, the service is "reachable" but maybe auth failed
# But for "availability" we usually mean "usable".
# If auth fails (403), we can't use it, so return False.
reason = f"HTTP {response.status_code}: {response.reason_phrase}"
_HYDRUS_AVAILABLE = False
_HYDRUS_UNAVAILABLE_REASON = reason
return False, reason
except Exception as e:
# This catches connection errors from HTTPClient
raise e
finally:
hydrus_logger.setLevel(original_level)
http_logger.setLevel(original_level)
except Exception as exc:
reason = str(exc)
_HYDRUS_AVAILABLE = False

View File

@@ -1,21 +1,44 @@
"""Shared Library Genesis search and download helpers."""
"""Shared Library Genesis search and download helpers.
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
Targets libgen.is/rs/st mirrors and parses the results table directly.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional
import logging
import re
import requests
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from urllib.parse import quote, urljoin
from libgen import search_sync, LibgenError
# Optional dependencies
try:
from bs4 import BeautifulSoup
except ImportError:
BeautifulSoup = None
LogFn = Optional[Callable[[str], None]]
ErrorFn = Optional[Callable[[str], None]]
DEFAULT_TIMEOUT = 10.0
DEFAULT_TIMEOUT = 20.0
DEFAULT_LIMIT = 50
logging.getLogger(__name__).setLevel(logging.WARNING)
# Mirrors to try in order
MIRRORS = [
"https://libgen.is",
"https://libgen.rs",
"https://libgen.st",
"http://libgen.is",
"http://libgen.rs",
"http://libgen.st",
"https://libgen.li", # Different structure, fallback
"http://libgen.li",
"https://libgen.gl", # Different structure, fallback
"http://libgen.gl",
]
logging.getLogger(__name__).setLevel(logging.INFO)
def _call(logger: LogFn, message: str) -> None:
@@ -23,169 +46,249 @@ def _call(logger: LogFn, message: str) -> None:
logger(message)
def search_libgen_no_ads(query: str, session: Optional[requests.Session] = None) -> List[Dict[str, Any]]:
"""Search Libgen without triggering ads.php requests."""
try:
from bs4 import BeautifulSoup
except ImportError: # pragma: no cover
logging.warning("BeautifulSoup not available; falling back to standard search")
class LibgenSearch:
"""Robust LibGen searcher."""
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
"""Search LibGen mirrors."""
if not BeautifulSoup:
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
return []
mirrors = [
"https://libgen.gl",
"https://libgen.vg",
"https://libgen.la",
"https://libgen.bz",
"https://libgen.gs",
]
session = session or requests.Session()
session.headers.setdefault(
"User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
)
for mirror in mirrors:
for mirror in MIRRORS:
try:
search_url = f"{mirror}/index.php?req={quote(query)}&res=100&covers=on&filesuns=all"
response = session.get(search_url, timeout=DEFAULT_TIMEOUT)
if response.status_code != 200:
continue
soup = BeautifulSoup(response.content, "html.parser")
table = soup.find("table", {"class": "catalog"})
if table is None:
for candidate in soup.find_all("table"):
rows = candidate.find_all("tr")
if len(rows) > 2:
table = candidate
break
if table is None:
logging.debug("[libgen_no_ads] No results table on %s", mirror)
continue
rows = table.find_all("tr")[1:]
results: List[Dict[str, Any]] = []
for row in rows:
try:
cells = row.find_all("td")
if len(cells) < 9:
continue
size_cell = cells[7]
file_link = size_cell.find("a")
mirror_link = ""
if file_link:
href = str(file_link.get("href", ""))
if href.startswith("/"):
mirror_link = mirror + href
elif href:
mirror_link = urljoin(mirror, href)
if not mirror_link:
title_link = cells[1].find("a") if len(cells) > 1 else None
if title_link:
href = str(title_link.get("href", ""))
if href.startswith("/"):
mirror_link = mirror + href
elif href:
mirror_link = urljoin(mirror, href)
if not mirror_link:
continue
results.append(
{
"id": "",
"mirror": mirror_link,
"cover": "",
"title": cells[1].get_text(strip=True) if len(cells) > 1 else "Unknown",
"authors": [cells[2].get_text(strip=True)]
if len(cells) > 2
else ["Unknown"],
"publisher": cells[3].get_text(strip=True) if len(cells) > 3 else "",
"year": cells[4].get_text(strip=True) if len(cells) > 4 else "",
"pages": cells[6].get_text(strip=True) if len(cells) > 6 else "",
"language": cells[5].get_text(strip=True) if len(cells) > 5 else "",
"size": cells[7].get_text(strip=True) if len(cells) > 7 else "",
"extension": cells[8].get_text(strip=True) if len(cells) > 8 else "",
"isbn": "",
}
)
except Exception as exc: # pragma: no cover - defensive
logging.debug("[libgen_no_ads] Error parsing row: %s", exc)
continue
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit)
else:
results = self._search_libgen_rs(mirror, query, limit)
if results:
logging.info("[libgen_no_ads] %d results from %s", len(results), mirror)
return results
except Exception as exc: # pragma: no cover - mirror issues
logging.debug("[libgen_no_ads] Mirror %s failed: %s", mirror, exc)
except Exception as e:
logging.debug(f"Mirror {mirror} failed: {e}")
continue
return []
def format_book_info(book: Any) -> Dict[str, Any]:
"""Format Libgen search result into a consistent dictionary."""
filesize_bytes = 0
size_str = getattr(book, "size", "") or ""
if size_str:
parts = size_str.strip().split()
try:
value = float(parts[0])
unit = parts[1].upper() if len(parts) > 1 else "B"
if unit in {"MB", "M"}:
filesize_bytes = int(value * 1024 * 1024)
elif unit in {"GB", "G"}:
filesize_bytes = int(value * 1024 * 1024 * 1024)
elif unit in {"KB", "K"}:
filesize_bytes = int(value * 1024)
else:
filesize_bytes = int(value)
except (ValueError, IndexError): # pragma: no cover - defensive
filesize_bytes = 0
title = getattr(book, "title", "") or ""
isbn = getattr(book, "isbn", "") or ""
if not isbn and title:
import re
match = re.search(
r"((?:[\d]{10,13}(?:\s*[;,]\s*[\d]{10,13})+)|(?:[\d]{10,13})(?:\s*[;,]?\s*[\d\-]{0,50})?)\s*(?:\b|$)",
title,
)
if match:
potential_isbn = match.group(0).strip()
if re.search(r"\d{10,13}", potential_isbn):
isbn = potential_isbn
title = re.sub(r"\s+[a-z]\s*$", "", title[: match.start()].strip(), flags=re.IGNORECASE)
authors_value = getattr(book, "authors", None)
if isinstance(authors_value, Iterable) and not isinstance(authors_value, str):
authors_str = ", ".join(str(author) for author in authors_value)
else:
authors_str = str(authors_value or "Unknown")
download_links = getattr(book, "download_links", None)
mirror_url = None
if download_links and getattr(download_links, "get_link", None):
mirror_url = download_links.get_link
return {
"title": title or "Unknown",
"author": authors_str,
"publisher": getattr(book, "publisher", "") or "",
"year": getattr(book, "year", "") or "",
"pages": getattr(book, "pages", "") or "",
"language": getattr(book, "language", "") or "",
"filesize": filesize_bytes,
"filesize_str": size_str or "Unknown",
"extension": getattr(book, "extension", "") or "",
"isbn": isbn,
"mirror_url": mirror_url,
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.rs/is/st style mirrors."""
# Search URL: /search.php?req=QUERY&res=100&column=def
url = f"{mirror}/search.php"
params = {
"req": query,
"res": 100, # Request more to filter later
"column": "def",
"open": 0,
"view": "simple",
"phrase": 1,
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# Find the table with results. usually class 'c'
table = soup.find("table", {"class": "c"})
if not table:
# Try finding by structure (table with many rows)
tables = soup.find_all("table")
for t in tables:
if len(t.find_all("tr")) > 5:
table = t
break
if not table:
return []
results = []
# Skip header row
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
# Columns:
# 0: ID
# 1: Author(s)
# 2: Title
# 3: Publisher
# 4: Year
# 5: Pages
# 6: Language
# 7: Size
# 8: Extension
# 9+: Mirrors
try:
libgen_id = cols[0].get_text(strip=True)
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
if not authors:
authors = [cols[1].get_text(strip=True)]
title_tag = cols[2].find("a")
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
# Extract MD5 from title link if possible (often in href)
# href='book/index.php?md5=...'
md5 = ""
if title_tag and title_tag.has_attr("href"):
href = title_tag["href"]
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
if match:
md5 = match.group(1)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
pages = cols[5].get_text(strip=True)
language = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirrors
# Usually col 9 is http://library.lol/main/MD5
mirror_links = []
for i in range(9, len(cols)):
a = cols[i].find("a")
if a and a.has_attr("href"):
mirror_links.append(a["href"])
# Construct direct download page link (library.lol)
# If we have MD5, we can guess it: http://library.lol/main/{md5}
if md5:
download_link = f"http://library.lol/main/{md5}"
elif mirror_links:
download_link = mirror_links[0]
else:
download_link = ""
results.append({
"id": libgen_id,
"title": title,
"author": ", ".join(authors),
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": md5,
"mirror_url": download_link,
"cover": "", # Could extract from hover if needed
})
if len(results) >= limit:
break
except Exception as e:
logging.debug(f"Error parsing row: {e}")
continue
return results
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.li/gl style mirrors."""
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
url = f"{mirror}/index.php"
params = {
"req": query,
"res": 100,
"covers": "on",
"filesuns": "all",
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"id": "tablelibgen"})
if not table:
table = soup.find("table", {"class": "table table-striped"})
if not table:
return []
results = []
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
try:
# Structure is different
# 0: Cover
# 1: Title (with link to file.php?id=...)
# 2: Author
# 3: Publisher
# 4: Year
# 5: Language
# 6: Pages
# 7: Size
# 8: Extension
# 9: Mirrors
title_col = cols[1]
title_link = title_col.find("a")
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
# Extract ID from link
libgen_id = ""
if title_link and title_link.has_attr("href"):
href = title_link["href"]
# href is usually "file.php?id=..." or "edition.php?id=..."
match = re.search(r"id=(\d+)", href)
if match:
libgen_id = match.group(1)
authors = cols[2].get_text(strip=True)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
language = cols[5].get_text(strip=True)
pages = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirror link
# Usually in col 9 or title link
mirror_url = ""
if title_link:
href = title_link["href"]
if href.startswith("/"):
mirror_url = mirror + href
else:
mirror_url = urljoin(mirror, href)
results.append({
"id": libgen_id,
"title": title,
"author": authors,
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": "", # .li doesn't show MD5 easily in table
"mirror_url": mirror_url,
})
if len(results) >= limit:
break
except Exception:
continue
return results
def search_libgen(
query: str,
@@ -195,183 +298,160 @@ def search_libgen(
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> List[Dict[str, Any]]:
"""Search Libgen returning formatted dictionaries with multiple mirrors.
Uses HTML scraper (search_libgen_no_ads) to find books quickly.
Returns mirror URLs and book IDs that can be used to generate alternative mirrors.
"""
"""Search Libgen using the robust scraper."""
searcher = LibgenSearch(session=session)
try:
_call(log_info, f"[search] Searching Libgen for: {query}")
session = session or requests.Session()
# Use HTML scraper - more reliable and doesn't hang on mirror resolution
_call(log_info, "[search] Using HTML scraper (search_libgen_no_ads)...")
results: List[Any] = search_libgen_no_ads(query, session=session)
if not results:
_call(log_info, "[search] No results from HTML scraper")
results = searcher.search(query, limit=limit)
_call(log_info, f"[libgen] Found {len(results)} results")
return results
except Exception as e:
_call(log_error, f"[libgen] Search failed: {e}")
return []
formatted: List[Dict[str, Any]] = []
mirrors_list = [
"https://libgen.gl",
"https://libgen.vg",
"https://libgen.la",
"https://libgen.bz",
"https://libgen.gs",
]
for book in results[:limit]:
if isinstance(book, dict):
# Result from search_libgen_no_ads (HTML scraper)
authors = book.get("authors", ["Unknown"])
if isinstance(authors, list):
author_value = ", ".join(str(a) for a in authors)
else:
author_value = str(authors)
def _resolve_download_url(
session: requests.Session,
url: str,
log_info: LogFn = None
) -> Optional[str]:
"""Resolve the final download URL by following the LibGen chain."""
current_url = url
visited = set()
# Max hops to prevent infinite loops
for _ in range(6):
if current_url in visited:
break
visited.add(current_url)
_call(log_info, f"[resolve] Checking: {current_url}")
# Simple heuristic: if it looks like a file, return it
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
return current_url
# Extract book ID from mirror URL if available
mirror = book.get("mirror", "")
book_id = ""
if mirror and "/file.php?id=" in mirror:
try:
book_id = mirror.split("/file.php?id=")[1].split("&")[0]
except (IndexError, ValueError):
pass
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
# So we'll just GET with stream=True to peek headers/content without downloading everything
with session.get(current_url, stream=True, timeout=30) as resp:
resp.raise_for_status()
ct = resp.headers.get("Content-Type", "").lower()
# Build list of alternative mirrors based on book ID
mirrors_dict = {}
if book_id:
for mirror_base in mirrors_list:
mirrors_dict[mirror_base] = f"{mirror_base}/file.php?id={book_id}"
elif mirror:
# Fallback: use the mirror we found
mirrors_dict["primary"] = mirror
if "text/html" not in ct:
# It's a binary file
return current_url
formatted.append(
{
"title": book.get("title", "Unknown"),
"author": author_value,
"publisher": book.get("publisher", ""),
"year": book.get("year", ""),
"pages": book.get("pages", ""),
"language": book.get("language", ""),
"filesize": 0,
"filesize_str": book.get("size", "Unknown"),
"extension": book.get("extension", ""),
"isbn": book.get("isbn", ""),
"mirror_url": mirror, # Primary mirror
"mirrors": mirrors_dict, # Alternative mirrors
"book_id": book_id,
}
)
else:
# Fallback: try to format as book object
try:
formatted.append(format_book_info(book))
except Exception:
pass
# It's HTML, read content
content = resp.text
except Exception as e:
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
return None
_call(log_info, f"[search] Found {len(formatted)} result(s)")
return formatted
except LibgenError as exc:
_call(log_error, f"[search] Libgen error: {exc}")
return []
except Exception as exc: # pragma: no cover - defensive
_call(log_error, f"[search] Error: {exc}")
return []
soup = BeautifulSoup(content, "html.parser")
# 1. Check for "GET" link (library.lol / ads.php style)
# Usually <h2>GET</h2> inside <a> or just text "GET"
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
if not get_link:
# Try finding <a> containing <h2>GET</h2>
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
if h2_get and h2_get.parent.name == "a":
get_link = h2_get.parent
if get_link and get_link.has_attr("href"):
return urljoin(current_url, get_link["href"])
# 2. Check for "series.php" -> "edition.php"
if "series.php" in current_url:
# Find first edition link
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
if edition_link:
current_url = urljoin(current_url, edition_link["href"])
continue
# 3. Check for "edition.php" -> "file.php"
if "edition.php" in current_url:
file_link = soup.find("a", href=re.compile(r"file\.php"))
if file_link:
current_url = urljoin(current_url, file_link["href"])
continue
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
if "file.php" in current_url:
# Look for link with title="libgen" or text "Libgen"
libgen_link = soup.find("a", title="libgen")
if not libgen_link:
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
if libgen_link and libgen_link.has_attr("href"):
current_url = urljoin(current_url, libgen_link["href"])
continue
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
if "ads.php" in current_url:
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
if get_php_link:
return urljoin(current_url, get_php_link["href"])
# 6. Library.lol / generic fallback
for text in ["Cloudflare", "IPFS.io", "Infura"]:
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
if link and link.has_attr("href"):
return urljoin(current_url, link["href"])
# If we found nothing new, stop
break
return None
def download_from_mirror(
mirror_url: str,
output_path: str | Path,
output_path: Path,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> bool:
"""Download a Libgen file and write it to disk.
Handles Libgen redirects and ensures proper file download by:
- Following all redirects (default behavior)
- Setting User-Agent header (required by some mirrors)
- Validating that we're downloading binary content, not HTML
- Attempting alternative download method if HTML is returned
"""
"""Download file from a LibGen mirror URL."""
session = session or requests.Session()
try:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
_call(log_info, f"[download] Downloading from mirror: {mirror_url}")
# Ensure session has proper headers for Libgen
if 'User-Agent' not in session.headers:
session.headers['User-Agent'] = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
)
# Download with redirects enabled (default) and referer
session.headers['Referer'] = 'https://libgen.gs/'
response = session.get(mirror_url, stream=True, timeout=30, allow_redirects=True)
response.raise_for_status()
# Check if we got HTML instead of a file (common Libgen issue)
content_type = response.headers.get('content-type', '').lower()
if 'text/html' in content_type:
_call(log_error, f"[download] Server returned HTML. Trying alternative method...")
# Try to extract file ID and use alternative CDN
try:
# Parse the HTML to extract MD5 or file ID
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
# Look for download link in the HTML
# Common patterns: md5 hash in form, or direct link in anchor tags
download_link = None
download_url = _resolve_download_url(session, mirror_url, log_info)
# Try to find forms that might contain download functionality
forms = soup.find_all('form')
for form in forms:
action = form.get('action', '')
if 'download' in action.lower() or 'get' in action.lower():
download_link = action
break
if not download_link:
_call(log_error, f"[download] Could not extract alternative download link from HTML")
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False
_call(log_info, f"[download] Using alternative download method: {download_link[:100]}")
# Try downloading from alternative link
response2 = session.get(download_link, stream=True, timeout=30, allow_redirects=True)
response2.raise_for_status()
response = response2 # Use the new response
_call(log_info, f"[download] Downloading from: {download_url}")
except Exception as alt_error:
_call(log_error, f"[download] Alternative method failed: {alt_error}")
# Download the actual file
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
# Verify it's not HTML (error page)
ct = r.headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False
total_size = int(response.headers.get("content-length", 0))
total_size = int(r.headers.get("content-length", 0))
downloaded = 0
with open(output_path, "wb") as handle:
for chunk in response.iter_content(chunk_size=8192):
if not chunk:
continue
handle.write(chunk)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
percent = downloaded / total_size * 100
_call(
log_info,
f"[download] {percent:.1f}% - {downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB",
)
# Optional: progress logging
_call(log_info, f"[download] Downloaded successfully to: {output_path}")
_call(log_info, f"[download] Saved to {output_path}")
return True
except Exception as exc: # pragma: no cover - defensive
_call(log_error, f"[download] Error: {exc}")
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False

View File

@@ -238,7 +238,7 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
# Sanitize title for M3U (remove newlines)
safe_title = title.replace("\n", " ").replace("\r", "")
# M3U format: #EXTM3U\n#EXTINF:-1,Title\nURL
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}\n"
m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{file_url}"
target = f"memory://{m3u_content}"
else:
target = file_url
@@ -256,9 +256,8 @@ def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = N
# Command 3: Set title (metadata for display) - still useful for window title
if title:
safe_title_prop = title.replace('"', '\\"')
cmd_title = {
"command": ["set_property", "force-media-title", safe_title_prop],
"command": ["set_property", "force-media-title", title],
"request_id": 2
}
client.send_command(cmd_title)

View File

@@ -510,6 +510,7 @@ class ResultTable:
('title | name | filename', ['title', 'name', 'filename']),
('ext', ['ext']),
('origin | source | store', ['origin', 'source', 'store']),
('size | size_bytes', ['size', 'size_bytes']),
('type | media_kind | kind', ['type', 'media_kind', 'kind']),
('tags | tag_summary', ['tags', 'tag_summary']),
('detail | description', ['detail', 'description']),

44
test_ssl.py Normal file
View File

@@ -0,0 +1,44 @@
import httpx
import ssl
def test_libgen_ssl():
url = "https://libgen.li/series.php?id=577851"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
print(f"Testing connection to {url} with httpx...")
try:
with httpx.Client(verify=True, headers=headers, timeout=30.0) as client:
resp = client.get(url)
print(f"Status: {resp.status_code}")
print(f"Content length: {len(resp.content)}")
except Exception as e:
print(f"Error with default settings: {e}")
print("\nTesting with http2=True...")
try:
with httpx.Client(verify=True, headers=headers, timeout=30.0, http2=True) as client:
resp = client.get(url)
print(f"Status: {resp.status_code}")
except Exception as e:
print(f"Error with http2=True: {e}")
print("\nTesting with verify=False...")
try:
with httpx.Client(verify=False, headers=headers, timeout=30.0) as client:
resp = client.get(url)
print(f"Status: {resp.status_code}")
except Exception as e:
print(f"Error with verify=False: {e}")
import requests
print("\nTesting with requests (HTTP/1.1)...")
try:
resp = requests.get(url, headers=headers, timeout=30.0)
print(f"Status: {resp.status_code}")
except Exception as e:
print(f"Error with requests: {e}")
if __name__ == "__main__":
test_libgen_ssl()