This commit is contained in:
nose
2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions

View File

@@ -282,15 +282,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
if opts.cookies_path and opts.cookies_path.is_file():
base_options["cookiefile"] = str(opts.cookies_path)
else:
# Check global cookies file lazily to avoid import cycles
from hydrus_health_check import get_cookies_file_path # local import
global_cookies = get_cookies_file_path()
if global_cookies:
base_options["cookiefile"] = global_cookies
else:
# Fallback to browser cookies
base_options["cookiesfrombrowser"] = ("chrome",)
# Fallback to browser cookies
base_options["cookiesfrombrowser"] = ("chrome",)
# Add no-playlist option if specified (for single video from playlist url)
if opts.no_playlist:
@@ -453,21 +446,40 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
# Try to find actual download link in the page
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Look for download links - LibGen typically has forms with download buttons
# Look for all links and forms that might lead to download
for link in soup.find_all('a'):
href = link.get('href')
if href and isinstance(href, str):
# Look for direct file links or get.php redirects
if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
download_url = href if href.startswith('http') else urljoin(final_url, href)
try:
from lxml import html as lxml_html
except ImportError:
lxml_html = None
if lxml_html is not None:
doc = lxml_html.fromstring(response.content)
for a in doc.xpath("//a[@href]"):
href = str(a.get("href") or "").strip()
if not href:
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
debug(f"Found download link: {download_url}")
return download_url
except ImportError:
pass # BeautifulSoup not available
else:
# Regex fallback
for m in re.finditer(
r"href=[\"\']([^\"\']+)[\"\']",
response.text or "",
flags=re.IGNORECASE,
):
href = str(m.group(1) or "").strip()
if not href or href.lower().startswith("javascript:"):
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
debug(f"Found download link: {download_url}")
return download_url
except Exception:
pass
# If we followed redirects successfully, return the final URL
# This handles cases where libgen redirects to a direct download mirror
@@ -708,12 +720,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
"noprogress": True, # No progress bars
}
# Add cookies if available (lazy import to avoid circular dependency)
from hydrus_health_check import get_cookies_file_path # local import
global_cookies = get_cookies_file_path()
if global_cookies:
ydl_opts["cookiefile"] = global_cookies
# Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
# Add no_playlist option if specified
if no_playlist: