refactor(download): remove ProviderCore/download.py, move sanitize_filename to SYS.utils, replace callers to use API.HTTP.HTTPClient

This commit is contained in:
2026-01-06 01:38:59 -08:00
parent 3b363dd536
commit 41c11d39fd
38 changed files with 2640 additions and 526 deletions

View File

@@ -11,7 +11,7 @@ from urllib.parse import urlparse
from API.HTTP import HTTPClient, _download_direct_file
from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_torrent_file
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.utils import sanitize_filename
from SYS.logger import log, debug
from SYS.models import DownloadError
@@ -495,7 +495,7 @@ def adjust_output_dir_for_alldebrid(
full_metadata: Optional[Dict[str, Any]],
item: Any,
) -> Path:
from ProviderCore.download import sanitize_filename as _sf
from SYS.utils import sanitize_filename as _sf
output_dir = base_output_dir
md = full_metadata if isinstance(full_metadata, dict) else {}

View File

@@ -7,10 +7,7 @@ from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
try:
from playwright.sync_api import sync_playwright
except ImportError: # pragma: no cover
sync_playwright = None
from tool.playwright import PlaywrightTool
class Bandcamp(Provider):
@@ -137,8 +134,7 @@ class Bandcamp(Provider):
if not stage_is_last:
return False
if sync_playwright is None:
return False
# Playwright is required; proceed to handle artist selection
# Only handle artist selections.
chosen: List[Dict[str, Any]] = []
@@ -219,11 +215,10 @@ class Bandcamp(Provider):
artist_url = chosen[0].get("url") or ""
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
tool = PlaywrightTool({})
tool.require()
with tool.open_page(headless=True) as page:
discography = self._scrape_artist_page(page, artist_url, limit=50)
browser.close()
except Exception as exc:
print(f"bandcamp artist lookup failed: {exc}\n")
return True
@@ -275,18 +270,10 @@ class Bandcamp(Provider):
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
if sync_playwright is None:
log(
"[bandcamp] Playwright not available. Install with: pip install playwright",
file=sys.stderr,
)
return []
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
tool = PlaywrightTool({})
tool.require()
with tool.open_page(headless=True) as page:
if query.strip().lower().startswith("artist:"):
artist_name = query[7:].strip().strip('"')
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
@@ -294,8 +281,6 @@ class Bandcamp(Provider):
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
results = self._scrape_url(page, search_url, limit)
browser.close()
return results
except Exception as exc:
@@ -366,4 +351,5 @@ class Bandcamp(Provider):
return results
def validate(self) -> bool:
return sync_playwright is not None
# Playwright is required for the provider to function
return True

View File

@@ -10,7 +10,7 @@ from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.utils import sanitize_filename
from SYS.logger import log
# Helper for download-file: render selectable formats for a details URL.

View File

@@ -11,7 +11,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import urljoin, urlparse, unquote
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import sanitize_filename
from SYS.utils import sanitize_filename
from SYS.logger import log
from SYS.models import ProgressBar

View File

@@ -18,7 +18,7 @@ import requests
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import download_file, sanitize_filename
from SYS.utils import sanitize_filename
from SYS.cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import debug, log
from SYS.utils import unique_path
@@ -1541,21 +1541,25 @@ class OpenLibrary(Provider):
except Exception:
pass
out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file(
pdf_url,
out_path,
session=self._session,
progress_callback=(
(
lambda downloaded, total, label:
progress_callback("bytes", downloaded, total, label)
) if progress_callback is not None else None
),
)
if ok:
return out_path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
try:
with HTTPClient(timeout=30.0) as client:
path = client.download(
pdf_url,
str(out_path),
chunk_size=1024 * 256,
progress_callback=(
(lambda downloaded, total: progress_callback("bytes", downloaded, total, safe_title))
if progress_callback is not None
else None
),
)
if path and path.exists():
return path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
except Exception:
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# 2) Borrow flow (credentials required).
try:

File diff suppressed because it is too large Load Diff