h
This commit is contained in:
@@ -7,8 +7,9 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from urllib.parse import quote, urlparse
|
||||
from urllib.parse import quote, unquote, urlparse
|
||||
|
||||
from API.HTTP import _download_direct_file
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.utils import sanitize_filename
|
||||
from SYS.logger import log
|
||||
@@ -394,9 +395,11 @@ def _extract_download_filename_from_url(url: str) -> str:
|
||||
return ""
|
||||
|
||||
parts = [x for x in path.split("/") if x]
|
||||
# /download/<identifier>/<filename>
|
||||
# /download/<identifier>/<filename...>
|
||||
if len(parts) >= 3 and parts[0].lower() == "download":
|
||||
return str(parts[2]).strip()
|
||||
# Keep subpath segments if present and decode URL encoding.
|
||||
encoded_name = "/".join(parts[2:]).strip()
|
||||
return unquote(encoded_name)
|
||||
|
||||
return ""
|
||||
|
||||
@@ -667,6 +670,52 @@ class InternetArchive(Provider):
|
||||
return self.download(sr, output_dir)
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
raw_path = str(getattr(result, "path", "") or "").strip()
|
||||
|
||||
# Fast path for explicit IA file URLs.
|
||||
# This uses the shared direct downloader, which already integrates with
|
||||
# pipeline transfer progress bars.
|
||||
if is_download_file_url(raw_path):
|
||||
try:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
suggested_filename: Optional[str] = None
|
||||
try:
|
||||
extracted = _extract_download_filename_from_url(raw_path)
|
||||
if extracted:
|
||||
suggested_filename = extracted
|
||||
except Exception:
|
||||
suggested_filename = None
|
||||
|
||||
quiet_mode = False
|
||||
pipeline_progress = None
|
||||
try:
|
||||
if isinstance(self.config, dict):
|
||||
quiet_mode = bool(self.config.get("_quiet_background_output"))
|
||||
pipeline_progress = self.config.get("_pipeline_progress")
|
||||
except Exception:
|
||||
quiet_mode = False
|
||||
pipeline_progress = None
|
||||
|
||||
try:
|
||||
direct_result = _download_direct_file(
|
||||
raw_path,
|
||||
output_dir,
|
||||
quiet=quiet_mode,
|
||||
suggested_filename=suggested_filename,
|
||||
pipeline_progress=pipeline_progress,
|
||||
)
|
||||
direct_path = getattr(direct_result, "path", None)
|
||||
if direct_path is not None:
|
||||
return Path(str(direct_path))
|
||||
if isinstance(direct_result, (str, Path)):
|
||||
return Path(str(direct_result))
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"[internetarchive] direct file download failed, falling back to IA API: {exc}", file=sys.stderr)
|
||||
|
||||
ia = _ia()
|
||||
get_item = getattr(ia, "get_item", None)
|
||||
download_fn = getattr(ia, "download", None)
|
||||
@@ -736,7 +785,35 @@ class InternetArchive(Provider):
|
||||
|
||||
chosen_name = ""
|
||||
if requested_filename:
|
||||
chosen_name = requested_filename
|
||||
requested_variants: List[str] = []
|
||||
req_raw = str(requested_filename or "").strip()
|
||||
req_decoded = unquote(req_raw).strip() if req_raw else ""
|
||||
for candidate in (req_decoded, req_raw):
|
||||
if candidate and candidate not in requested_variants:
|
||||
requested_variants.append(candidate)
|
||||
|
||||
available_names = [
|
||||
str(f.get("name") or "").strip()
|
||||
for f in files
|
||||
if isinstance(f, dict) and str(f.get("name") or "").strip()
|
||||
]
|
||||
available_set = {name for name in available_names}
|
||||
|
||||
for candidate in requested_variants:
|
||||
if candidate in available_set:
|
||||
chosen_name = candidate
|
||||
break
|
||||
|
||||
if not chosen_name:
|
||||
available_lower = {name.lower(): name for name in available_names}
|
||||
for candidate in requested_variants:
|
||||
hit = available_lower.get(candidate.lower())
|
||||
if hit:
|
||||
chosen_name = hit
|
||||
break
|
||||
|
||||
if not chosen_name and requested_variants:
|
||||
chosen_name = requested_variants[0]
|
||||
else:
|
||||
chosen = _best_file_candidate(files)
|
||||
if chosen is not None:
|
||||
@@ -746,29 +823,44 @@ class InternetArchive(Provider):
|
||||
raise Exception("Internet Archive item has no downloadable files")
|
||||
|
||||
# Download the selected file.
|
||||
def _download_one(file_name: str) -> None:
|
||||
try:
|
||||
download_fn(
|
||||
identifier,
|
||||
files=[file_name],
|
||||
destdir=str(output_dir),
|
||||
no_directory=True,
|
||||
ignore_existing=True,
|
||||
verbose=False,
|
||||
)
|
||||
except TypeError:
|
||||
# Older versions may not support some flags.
|
||||
download_fn(
|
||||
identifier,
|
||||
files=[file_name],
|
||||
destdir=str(output_dir),
|
||||
)
|
||||
|
||||
try:
|
||||
download_fn(
|
||||
identifier,
|
||||
files=[chosen_name],
|
||||
destdir=str(output_dir),
|
||||
no_directory=True,
|
||||
ignore_existing=True,
|
||||
verbose=False,
|
||||
)
|
||||
except TypeError:
|
||||
# Older versions may not support some flags.
|
||||
download_fn(
|
||||
identifier,
|
||||
files=[chosen_name],
|
||||
destdir=str(output_dir),
|
||||
)
|
||||
_download_one(chosen_name)
|
||||
except Exception as exc:
|
||||
raise Exception(f"Internet Archive download failed: {exc}")
|
||||
retry_name = unquote(str(chosen_name or "")).strip()
|
||||
if retry_name and retry_name != chosen_name:
|
||||
try:
|
||||
_download_one(retry_name)
|
||||
chosen_name = retry_name
|
||||
except Exception as retry_exc:
|
||||
raise Exception(f"Internet Archive download failed: {retry_exc}")
|
||||
else:
|
||||
raise Exception(f"Internet Archive download failed: {exc}")
|
||||
|
||||
# Resolve downloaded path (library behavior varies by version/flags).
|
||||
chosen_basename = Path(chosen_name).name if chosen_name else ""
|
||||
candidates = [
|
||||
output_dir / chosen_name,
|
||||
output_dir / chosen_basename,
|
||||
output_dir / identifier / chosen_name,
|
||||
output_dir / identifier / chosen_basename,
|
||||
]
|
||||
for p in candidates:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user