jkj

2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions
--- a/SYS/download.py
+++ b/SYS/download.py
@@ -282,15 +282,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)
    else:
-        # Check global cookies file lazily to avoid import cycles
-        from hydrus_health_check import get_cookies_file_path  # local import
-
-        global_cookies = get_cookies_file_path()
-        if global_cookies:
-            base_options["cookiefile"] = global_cookies
-        else:
-            # Fallback to browser cookies
-            base_options["cookiesfrombrowser"] = ("chrome",)
+        # Fallback to browser cookies
+        base_options["cookiesfrombrowser"] = ("chrome",)

    # Add no-playlist option if specified (for single video from playlist url)
    if opts.no_playlist:
@@ -453,21 +446,40 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
            
            # Try to find actual download link in the page
            try:
-                from bs4 import BeautifulSoup
-                soup = BeautifulSoup(response.content, 'html.parser')
-                
-                # Look for download links - LibGen typically has forms with download buttons
-                # Look for all links and forms that might lead to download
-                for link in soup.find_all('a'):
-                    href = link.get('href')
-                    if href and isinstance(href, str):
-                        # Look for direct file links or get.php redirects
-                        if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
-                            download_url = href if href.startswith('http') else urljoin(final_url, href)
+                try:
+                    from lxml import html as lxml_html
+                except ImportError:
+                    lxml_html = None
+
+                if lxml_html is not None:
+                    doc = lxml_html.fromstring(response.content)
+                    for a in doc.xpath("//a[@href]"):
+                        href = str(a.get("href") or "").strip()
+                        if not href:
+                            continue
+
+                        href_lower = href.lower()
+                        if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
+                            download_url = href if href.startswith("http") else urljoin(final_url, href)
                            debug(f"Found download link: {download_url}")
                            return download_url
-            except ImportError:
-                pass  # BeautifulSoup not available
+                else:
+                    # Regex fallback
+                    for m in re.finditer(
+                        r"href=[\"\']([^\"\']+)[\"\']",
+                        response.text or "",
+                        flags=re.IGNORECASE,
+                    ):
+                        href = str(m.group(1) or "").strip()
+                        if not href or href.lower().startswith("javascript:"):
+                            continue
+                        href_lower = href.lower()
+                        if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
+                            download_url = href if href.startswith("http") else urljoin(final_url, href)
+                            debug(f"Found download link: {download_url}")
+                            return download_url
+            except Exception:
+                pass
            
            # If we followed redirects successfully, return the final URL
            # This handles cases where libgen redirects to a direct download mirror
@@ -708,12 +720,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
                "noprogress": True,  # No progress bars
            }
            
-            # Add cookies if available (lazy import to avoid circular dependency)
-            from hydrus_health_check import get_cookies_file_path  # local import
-
-            global_cookies = get_cookies_file_path()
-            if global_cookies:
-                ydl_opts["cookiefile"] = global_cookies
+            # Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
            
            # Add no_playlist option if specified
            if no_playlist: