This commit is contained in:
2026-02-15 22:22:48 -08:00
parent 14871a1f02
commit c86e5e67cf
2 changed files with 114 additions and 22 deletions

View File

@@ -92,7 +92,7 @@
"(hitfile\\.net/[a-z0-9A-Z]{4,9})" "(hitfile\\.net/[a-z0-9A-Z]{4,9})"
], ],
"regexp": "(hitf\\.(to|cc)/([a-z0-9A-Z]{4,9}))|(htfl\\.(net|to|cc)/([a-z0-9A-Z]{4,9}))|(hitfile\\.(net)/download/free/([a-z0-9A-Z]{4,9}))|((hitfile\\.net/[a-z0-9A-Z]{4,9}))", "regexp": "(hitf\\.(to|cc)/([a-z0-9A-Z]{4,9}))|(htfl\\.(net|to|cc)/([a-z0-9A-Z]{4,9}))|(hitfile\\.(net)/download/free/([a-z0-9A-Z]{4,9}))|((hitfile\\.net/[a-z0-9A-Z]{4,9}))",
"status": true "status": false
}, },
"mega": { "mega": {
"name": "mega", "name": "mega",
@@ -375,7 +375,7 @@
"(filespace\\.com/[a-zA-Z0-9]{12})" "(filespace\\.com/[a-zA-Z0-9]{12})"
], ],
"regexp": "(filespace\\.com/fd/([a-zA-Z0-9]{12}))|((filespace\\.com/[a-zA-Z0-9]{12}))", "regexp": "(filespace\\.com/fd/([a-zA-Z0-9]{12}))|((filespace\\.com/[a-zA-Z0-9]{12}))",
"status": true "status": false
}, },
"filezip": { "filezip": {
"name": "filezip", "name": "filezip",

View File

@@ -7,8 +7,9 @@ import sys
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from urllib.parse import quote, urlparse from urllib.parse import quote, unquote, urlparse
from API.HTTP import _download_direct_file
from ProviderCore.base import Provider, SearchResult from ProviderCore.base import Provider, SearchResult
from SYS.utils import sanitize_filename from SYS.utils import sanitize_filename
from SYS.logger import log from SYS.logger import log
@@ -394,9 +395,11 @@ def _extract_download_filename_from_url(url: str) -> str:
return "" return ""
parts = [x for x in path.split("/") if x] parts = [x for x in path.split("/") if x]
# /download/<identifier>/<filename> # /download/<identifier>/<filename...>
if len(parts) >= 3 and parts[0].lower() == "download": if len(parts) >= 3 and parts[0].lower() == "download":
return str(parts[2]).strip() # Keep subpath segments if present and decode URL encoding.
encoded_name = "/".join(parts[2:]).strip()
return unquote(encoded_name)
return "" return ""
@@ -667,6 +670,52 @@ class InternetArchive(Provider):
return self.download(sr, output_dir) return self.download(sr, output_dir)
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
raw_path = str(getattr(result, "path", "") or "").strip()
# Fast path for explicit IA file URLs.
# This uses the shared direct downloader, which already integrates with
# pipeline transfer progress bars.
if is_download_file_url(raw_path):
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
suggested_filename: Optional[str] = None
try:
extracted = _extract_download_filename_from_url(raw_path)
if extracted:
suggested_filename = extracted
except Exception:
suggested_filename = None
quiet_mode = False
pipeline_progress = None
try:
if isinstance(self.config, dict):
quiet_mode = bool(self.config.get("_quiet_background_output"))
pipeline_progress = self.config.get("_pipeline_progress")
except Exception:
quiet_mode = False
pipeline_progress = None
try:
direct_result = _download_direct_file(
raw_path,
output_dir,
quiet=quiet_mode,
suggested_filename=suggested_filename,
pipeline_progress=pipeline_progress,
)
direct_path = getattr(direct_result, "path", None)
if direct_path is not None:
return Path(str(direct_path))
if isinstance(direct_result, (str, Path)):
return Path(str(direct_result))
return None
except Exception as exc:
log(f"[internetarchive] direct file download failed, falling back to IA API: {exc}", file=sys.stderr)
ia = _ia() ia = _ia()
get_item = getattr(ia, "get_item", None) get_item = getattr(ia, "get_item", None)
download_fn = getattr(ia, "download", None) download_fn = getattr(ia, "download", None)
@@ -736,7 +785,35 @@ class InternetArchive(Provider):
chosen_name = "" chosen_name = ""
if requested_filename: if requested_filename:
chosen_name = requested_filename requested_variants: List[str] = []
req_raw = str(requested_filename or "").strip()
req_decoded = unquote(req_raw).strip() if req_raw else ""
for candidate in (req_decoded, req_raw):
if candidate and candidate not in requested_variants:
requested_variants.append(candidate)
available_names = [
str(f.get("name") or "").strip()
for f in files
if isinstance(f, dict) and str(f.get("name") or "").strip()
]
available_set = {name for name in available_names}
for candidate in requested_variants:
if candidate in available_set:
chosen_name = candidate
break
if not chosen_name:
available_lower = {name.lower(): name for name in available_names}
for candidate in requested_variants:
hit = available_lower.get(candidate.lower())
if hit:
chosen_name = hit
break
if not chosen_name and requested_variants:
chosen_name = requested_variants[0]
else: else:
chosen = _best_file_candidate(files) chosen = _best_file_candidate(files)
if chosen is not None: if chosen is not None:
@@ -746,29 +823,44 @@ class InternetArchive(Provider):
raise Exception("Internet Archive item has no downloadable files") raise Exception("Internet Archive item has no downloadable files")
# Download the selected file. # Download the selected file.
def _download_one(file_name: str) -> None:
try:
download_fn(
identifier,
files=[file_name],
destdir=str(output_dir),
no_directory=True,
ignore_existing=True,
verbose=False,
)
except TypeError:
# Older versions may not support some flags.
download_fn(
identifier,
files=[file_name],
destdir=str(output_dir),
)
try: try:
download_fn( _download_one(chosen_name)
identifier,
files=[chosen_name],
destdir=str(output_dir),
no_directory=True,
ignore_existing=True,
verbose=False,
)
except TypeError:
# Older versions may not support some flags.
download_fn(
identifier,
files=[chosen_name],
destdir=str(output_dir),
)
except Exception as exc: except Exception as exc:
raise Exception(f"Internet Archive download failed: {exc}") retry_name = unquote(str(chosen_name or "")).strip()
if retry_name and retry_name != chosen_name:
try:
_download_one(retry_name)
chosen_name = retry_name
except Exception as retry_exc:
raise Exception(f"Internet Archive download failed: {retry_exc}")
else:
raise Exception(f"Internet Archive download failed: {exc}")
# Resolve downloaded path (library behavior varies by version/flags). # Resolve downloaded path (library behavior varies by version/flags).
chosen_basename = Path(chosen_name).name if chosen_name else ""
candidates = [ candidates = [
output_dir / chosen_name, output_dir / chosen_name,
output_dir / chosen_basename,
output_dir / identifier / chosen_name, output_dir / identifier / chosen_name,
output_dir / identifier / chosen_basename,
] ]
for p in candidates: for p in candidates:
try: try: