d
This commit is contained in:
@@ -9,8 +9,8 @@ import logging
|
||||
import re
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from urllib.parse import quote, urljoin
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, urljoin, urlparse, unquote
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
@@ -405,6 +405,61 @@ def _resolve_download_url(
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
suffix = Path(filename).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
parsed = urlparse(download_url)
|
||||
suffix = Path(parsed.path).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
content_type = headers.get('content-type', '').lower()
|
||||
mime_map = {
|
||||
'application/pdf': 'pdf',
|
||||
'application/epub+zip': 'epub',
|
||||
'application/x-mobipocket-ebook': 'mobi',
|
||||
'application/x-cbr': 'cbr',
|
||||
'application/x-cbz': 'cbz',
|
||||
'application/zip': 'zip',
|
||||
}
|
||||
|
||||
for mime, ext in mime_map.items():
|
||||
if mime in content_type:
|
||||
return ext
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
|
||||
"""Rename the path to match the detected extension, if needed."""
|
||||
if not extension:
|
||||
return path
|
||||
|
||||
suffix = extension if extension.startswith('.') else f'.{extension}'
|
||||
if path.suffix.lower() == suffix.lower():
|
||||
return path
|
||||
|
||||
candidate = path.with_suffix(suffix)
|
||||
base_stem = path.stem
|
||||
counter = 1
|
||||
while candidate.exists() and counter < 100:
|
||||
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
|
||||
counter += 1
|
||||
|
||||
try:
|
||||
path.replace(candidate)
|
||||
return candidate
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: Path,
|
||||
@@ -412,8 +467,9 @@ def download_from_mirror(
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> bool:
|
||||
"""Download file from a LibGen mirror URL."""
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -425,33 +481,43 @@ def download_from_mirror(
|
||||
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
# Download the actual file
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
headers = dict(r.headers)
|
||||
|
||||
# Verify it's not HTML (error page)
|
||||
ct = r.headers.get("content-type", "").lower()
|
||||
ct = headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
total_size = int(r.headers.get("content-length", 0))
|
||||
downloaded = 0
|
||||
total_size = int(headers.get("content-length", 0) or 0)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
# Optional: progress logging
|
||||
|
||||
_call(log_info, f"[download] Saved to {output_path}")
|
||||
return True
|
||||
if progress_callback:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
final_extension = _guess_filename_extension(download_url, headers)
|
||||
final_path = _apply_extension(output_path, final_extension)
|
||||
|
||||
if progress_callback and total_size > 0:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
_call(log_info, f"[download] Saved to {final_path}")
|
||||
return True, final_path
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
Reference in New Issue
Block a user