This commit is contained in:
2026-02-15 22:22:48 -08:00
parent 14871a1f02
commit c86e5e67cf
2 changed files with 114 additions and 22 deletions

View File

@@ -92,7 +92,7 @@
"(hitfile\\.net/[a-z0-9A-Z]{4,9})"
],
"regexp": "(hitf\\.(to|cc)/([a-z0-9A-Z]{4,9}))|(htfl\\.(net|to|cc)/([a-z0-9A-Z]{4,9}))|(hitfile\\.(net)/download/free/([a-z0-9A-Z]{4,9}))|((hitfile\\.net/[a-z0-9A-Z]{4,9}))",
"status": true
"status": false
},
"mega": {
"name": "mega",
@@ -375,7 +375,7 @@
"(filespace\\.com/[a-zA-Z0-9]{12})"
],
"regexp": "(filespace\\.com/fd/([a-zA-Z0-9]{12}))|((filespace\\.com/[a-zA-Z0-9]{12}))",
"status": true
"status": false
},
"filezip": {
"name": "filezip",

View File

@@ -7,8 +7,9 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse
from urllib.parse import quote, unquote, urlparse
from API.HTTP import _download_direct_file
from ProviderCore.base import Provider, SearchResult
from SYS.utils import sanitize_filename
from SYS.logger import log
@@ -394,9 +395,11 @@ def _extract_download_filename_from_url(url: str) -> str:
return ""
parts = [x for x in path.split("/") if x]
# /download/<identifier>/<filename>
# /download/<identifier>/<filename...>
if len(parts) >= 3 and parts[0].lower() == "download":
return str(parts[2]).strip()
# Keep subpath segments if present and decode URL encoding.
encoded_name = "/".join(parts[2:]).strip()
return unquote(encoded_name)
return ""
@@ -667,6 +670,52 @@ class InternetArchive(Provider):
return self.download(sr, output_dir)
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
raw_path = str(getattr(result, "path", "") or "").strip()
# Fast path for explicit IA file URLs.
# This uses the shared direct downloader, which already integrates with
# pipeline transfer progress bars.
if is_download_file_url(raw_path):
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
suggested_filename: Optional[str] = None
try:
extracted = _extract_download_filename_from_url(raw_path)
if extracted:
suggested_filename = extracted
except Exception:
suggested_filename = None
quiet_mode = False
pipeline_progress = None
try:
if isinstance(self.config, dict):
quiet_mode = bool(self.config.get("_quiet_background_output"))
pipeline_progress = self.config.get("_pipeline_progress")
except Exception:
quiet_mode = False
pipeline_progress = None
try:
direct_result = _download_direct_file(
raw_path,
output_dir,
quiet=quiet_mode,
suggested_filename=suggested_filename,
pipeline_progress=pipeline_progress,
)
direct_path = getattr(direct_result, "path", None)
if direct_path is not None:
return Path(str(direct_path))
if isinstance(direct_result, (str, Path)):
return Path(str(direct_result))
return None
except Exception as exc:
log(f"[internetarchive] direct file download failed, falling back to IA API: {exc}", file=sys.stderr)
ia = _ia()
get_item = getattr(ia, "get_item", None)
download_fn = getattr(ia, "download", None)
@@ -736,7 +785,35 @@ class InternetArchive(Provider):
chosen_name = ""
if requested_filename:
chosen_name = requested_filename
requested_variants: List[str] = []
req_raw = str(requested_filename or "").strip()
req_decoded = unquote(req_raw).strip() if req_raw else ""
for candidate in (req_decoded, req_raw):
if candidate and candidate not in requested_variants:
requested_variants.append(candidate)
available_names = [
str(f.get("name") or "").strip()
for f in files
if isinstance(f, dict) and str(f.get("name") or "").strip()
]
available_set = {name for name in available_names}
for candidate in requested_variants:
if candidate in available_set:
chosen_name = candidate
break
if not chosen_name:
available_lower = {name.lower(): name for name in available_names}
for candidate in requested_variants:
hit = available_lower.get(candidate.lower())
if hit:
chosen_name = hit
break
if not chosen_name and requested_variants:
chosen_name = requested_variants[0]
else:
chosen = _best_file_candidate(files)
if chosen is not None:
@@ -746,29 +823,44 @@ class InternetArchive(Provider):
raise Exception("Internet Archive item has no downloadable files")
# Download the selected file.
def _download_one(file_name: str) -> None:
try:
download_fn(
identifier,
files=[file_name],
destdir=str(output_dir),
no_directory=True,
ignore_existing=True,
verbose=False,
)
except TypeError:
# Older versions may not support some flags.
download_fn(
identifier,
files=[file_name],
destdir=str(output_dir),
)
try:
download_fn(
identifier,
files=[chosen_name],
destdir=str(output_dir),
no_directory=True,
ignore_existing=True,
verbose=False,
)
except TypeError:
# Older versions may not support some flags.
download_fn(
identifier,
files=[chosen_name],
destdir=str(output_dir),
)
_download_one(chosen_name)
except Exception as exc:
raise Exception(f"Internet Archive download failed: {exc}")
retry_name = unquote(str(chosen_name or "")).strip()
if retry_name and retry_name != chosen_name:
try:
_download_one(retry_name)
chosen_name = retry_name
except Exception as retry_exc:
raise Exception(f"Internet Archive download failed: {retry_exc}")
else:
raise Exception(f"Internet Archive download failed: {exc}")
# Resolve downloaded path (library behavior varies by version/flags).
chosen_basename = Path(chosen_name).name if chosen_name else ""
candidates = [
output_dir / chosen_name,
output_dir / chosen_basename,
output_dir / identifier / chosen_name,
output_dir / identifier / chosen_basename,
]
for p in candidates:
try: