API/HTTP.py

"""
Unified HTTP client for downlow using httpx.

Provides synchronous and asynchronous HTTP operations with:
- Automatic retries on transient failures
- Configurable timeouts and headers
- Built-in progress tracking for downloads
- Request/response logging support
"""

import httpx
import asyncio
import sys
import time
import traceback
import re
import os
import json
from typing import Optional, Dict, Any, Callable, List, Union
from pathlib import Path
from urllib.parse import unquote, urlparse, parse_qs
import logging

from SYS.logger import debug, is_debug_enabled, log
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
from SYS.utils import ensure_directory, sha256_file

try:  # Optional; used for metadata extraction when available
    from SYS.yt_metadata import extract_ytdlp_tags
except Exception:  # pragma: no cover - optional dependency
    extract_ytdlp_tags = None  # type: ignore[assignment]

logger = logging.getLogger(__name__)

from API.ssl_certs import resolve_verify_value as _resolve_verify_value
from API.ssl_certs import get_requests_verify_value
from API.httpx_shared import get_shared_httpx_client

# Default configuration
DEFAULT_TIMEOUT = 30.0
DEFAULT_RETRIES = 3
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"


class HTTPClient:
    """Unified HTTP client with sync support."""

    def __init__(
        self,
        timeout: float = DEFAULT_TIMEOUT,
        retries: int = DEFAULT_RETRIES,
        user_agent: str = DEFAULT_USER_AGENT,
        verify_ssl: bool = True,
        headers: Optional[Dict[str,
                               str]] = None,
    ):
        """
        Initialize HTTP client.

        Args:
            timeout: Request timeout in seconds
            retries: Number of retries on transient failures
            user_agent: User-Agent header value
            verify_ssl: Whether to verify SSL certificates
            headers: Additional headers to include in all requests
        """
        self.timeout = timeout
        self.retries = retries
        self.user_agent = user_agent
        self.verify_ssl = verify_ssl
        self.base_headers = headers or {}
        self._client: Optional[httpx.Client] = None

        self._httpx_verify = _resolve_verify_value(verify_ssl)

    # Debug helpers
    def _debug_panel(self, title: str, rows: List[tuple[str, Any]]) -> None:
        if not is_debug_enabled():
            return
        try:
            from rich.table import Table as RichTable
            from rich.panel import Panel

            grid = RichTable.grid(padding=(0, 1))
            grid.add_column("Key", style="cyan", no_wrap=True)
            grid.add_column("Value")
            for key, val in rows:
                try:
                    grid.add_row(str(key), str(val))
                except Exception:
                    grid.add_row(str(key), "<unprintable>")

            debug(Panel(grid, title=title, expand=False))
        except Exception:
            # Fallback to simple debug output
            debug(title, rows)

    def __enter__(self):
        """Context manager entry."""
        self._client = httpx.Client(
            timeout=self.timeout,
            verify=self._httpx_verify,
            headers=self._get_headers(),
        )
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        if self._client:
            self._client.close()
            self._client = None

    def _get_headers(self) -> Dict[str, str]:
        """Get request headers with user-agent."""
        headers = {
            "User-Agent": self.user_agent
        }
        headers.update(self.base_headers)
        return headers

    def get(
        self,
        url: str,
        params: Optional[Dict[str,
                              Any]] = None,
        headers: Optional[Dict[str,
                               str]] = None,
        allow_redirects: bool = True,
    ) -> httpx.Response:
        """
        Make a GET request.

        Args:
            url: Request URL
            params: Query parameters
            headers: Additional headers
            allow_redirects: Follow redirects

        Returns:
            httpx.Response object
        """
        return self._request(
            "GET",
            url,
            params=params,
            headers=headers,
            follow_redirects=allow_redirects,
        )

    def post(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        files: Optional[Dict] = None,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> httpx.Response:
        """
        Make a POST request.

        Args:
            url: Request URL
            data: Form data
            json: JSON data
            files: Files to upload
            headers: Additional headers

        Returns:
            httpx.Response object
        """
        return self._request(
            "POST",
            url,
            data=data,
            json=json,
            files=files,
            headers=headers,
        )

    def put(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        content: Optional[Any] = None,
        files: Optional[Dict] = None,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> httpx.Response:
        """
        Make a PUT request.

        Args:
            url: Request URL
            data: Form data
            json: JSON data
            content: Raw content
            files: Files to upload
            headers: Additional headers

        Returns:
            httpx.Response object
        """
        return self._request(
            "PUT",
            url,
            data=data,
            json=json,
            content=content,
            files=files,
            headers=headers,
        )

    def delete(
        self,
        url: str,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> httpx.Response:
        """
        Make a DELETE request.

        Args:
            url: Request URL
            headers: Additional headers

        Returns:
            httpx.Response object
        """
        return self._request(
            "DELETE",
            url,
            headers=headers,
        )

    def request(self, method: str, url: str, **kwargs) -> httpx.Response:
        """
        Make a generic HTTP request.

        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments

        Returns:
            httpx.Response object
        """
        return self._request(method, url, **kwargs)

    def download(
        self,
        url: str,
        file_path: str,
        chunk_size: int = 8192,
        progress_callback: Optional[Callable[[int,
                                              int],
                                             None]] = None,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> Path:
        """
        Download a file from URL with optional progress tracking.

        Args:
            url: File URL
            file_path: Local file path to save to
            chunk_size: Download chunk size
            progress_callback: Callback(bytes_downloaded, total_bytes)
            headers: Additional headers

        Returns:
            Path object of downloaded file
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)

        with self._request_stream("GET",
                                  url,
                                  headers=headers,
                                  follow_redirects=True) as response:
            response.raise_for_status()
            total_bytes = int(response.headers.get("content-length", 0))
            bytes_downloaded = 0

            # Render progress immediately (even if the transfer is very fast)
            if progress_callback:
                try:
                    progress_callback(0, total_bytes)
                except Exception:
                    logger.exception("Error in progress_callback initial call")

            with open(path, "wb") as f:
                for chunk in response.iter_bytes(chunk_size):
                    if chunk:
                        f.write(chunk)
                        bytes_downloaded += len(chunk)
                        if progress_callback:
                            progress_callback(bytes_downloaded, total_bytes)

            # Ensure a final callback is emitted.
            if progress_callback:
                try:
                    progress_callback(bytes_downloaded, total_bytes)
                except Exception:
                    logger.exception("Error in progress_callback final call")

        return path

    def _request(
        self,
        method: str,
        url: str,
        raise_for_status: bool = True,
        log_http_errors: bool = True,
        **kwargs,
    ) -> httpx.Response:
        """
        Make an HTTP request with automatic retries.

        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments for httpx.Client.request()

        Returns:
            httpx.Response object
        """
        if not self._client:
            raise RuntimeError(
                "HTTPClient must be used with context manager (with statement)"
            )

        # Merge headers once per call (do not rebuild for every retry attempt).
        merged_headers = self._get_headers()
        extra_headers = kwargs.get("headers")
        if extra_headers:
            try:
                merged_headers.update(extra_headers)
            except Exception:
                # If headers is not a mapping, keep it as-is and let httpx raise.
                merged_headers = extra_headers
        kwargs["headers"] = merged_headers

        last_exception: Exception | None = None

        def _raw_debug_enabled() -> bool:
            try:
                val = str(os.environ.get("MM_HTTP_RAW", "")).strip().lower()
                if val in {"0", "false", "no", "off"}:
                    return False
                return is_debug_enabled()
            except Exception:
                return is_debug_enabled()

        def _preview(value: Any, *, limit: int = 2000) -> str:
            if value is None:
                return "<not provided>"
            try:
                # File-like objects (uploads/streams) - show compact summary instead of raw contents
                if hasattr(value, "read") or hasattr(value, "fileno"):
                    name = getattr(value, "name", None)
                    total = getattr(value, "_total", None)
                    label = getattr(value, "_label", None)
                    try:
                        pos = value.tell() if hasattr(value, "tell") else None
                    except Exception:
                        pos = None
                    parts = []
                    if name is not None:
                        parts.append(f"name={name!r}")
                    if total is not None:
                        parts.append(f"total={total}")
                    if label is not None:
                        parts.append(f"label={label!r}")
                    if pos is not None:
                        parts.append(f"pos={pos}")
                    summary = " ".join(parts) if parts else None
                    return f"<file-like {summary}>" if summary else "<file-like>"

                if isinstance(value, (dict, list, tuple)):
                    # Use default=str to avoid failing on non-serializable objects (e.g., file-like)
                    text = json.dumps(value, ensure_ascii=False, default=str)
                elif isinstance(value, (bytes, bytearray)):
                    text = value.decode("utf-8", errors="replace")
                else:
                    text = str(value)
            except Exception:
                try:
                    text = repr(value)
                except Exception:
                    text = "<unprintable>"
            if len(text) > limit:
                return text[:limit] + "..."
            return text

        for attempt in range(self.retries):
            self._debug_panel(
                "HTTP request",
                [
                    ("method", method),
                    ("url", url),
                    ("attempt", f"{attempt + 1}/{self.retries}"),
                    ("params", kwargs.get("params")),
                    ("headers", kwargs.get("headers")),
                    ("verify", self._httpx_verify),
                    ("follow_redirects", kwargs.get("follow_redirects", False)),
                ],
            )
            if _raw_debug_enabled():
                self._debug_panel(
                    "HTTP request raw",
                    [
                        ("params", _preview(kwargs.get("params"))),
                        ("data", _preview(kwargs.get("data"))),
                        ("json", _preview(kwargs.get("json"))),
                        ("content", _preview(kwargs.get("content"))),
                        ("files", _preview(kwargs.get("files"))),
                    ],
                )
            try:
                response = self._client.request(method, url, **kwargs)
                self._debug_panel(
                    "HTTP response",
                    [
                        ("method", method),
                        ("url", url),
                        ("status", getattr(response, "status_code", "")),
                        ("elapsed", getattr(response, "elapsed", "")),
                        (
                            "content_length",
                            response.headers.get("content-length") if hasattr(response, "headers") else "",
                        ),
                    ],
                )
                if _raw_debug_enabled():
                    content_type = ""
                    try:
                        content_type = response.headers.get("content-type", "")
                    except Exception:
                        content_type = ""
                    body_preview = ""
                    try:
                        if isinstance(content_type, str) and (
                            content_type.startswith("application/json")
                            or content_type.startswith("text/")
                        ):
                            body_preview = _preview(response.text, limit=4000)
                        else:
                            raw = response.content
                            if raw is None:
                                body_preview = "<no content>"
                            else:
                                body_preview = raw[:400].hex()
                    except Exception:
                        body_preview = "<unavailable>"
                    self._debug_panel(
                        "HTTP response raw",
                        [
                            ("content_type", content_type),
                            ("body_preview", body_preview),
                            ("body_length", len(response.content) if response is not None else ""),
                        ],
                    )
                if raise_for_status:
                    response.raise_for_status()
                return response
            except httpx.TimeoutException as e:
                last_exception = e
                logger.warning(
                    f"Timeout on attempt {attempt + 1}/{self.retries}: {url}"
                )
                if attempt < self.retries - 1:
                    continue
            except httpx.HTTPStatusError as e:
                # Don't retry on 4xx errors
                if 400 <= e.response.status_code < 500:
                    try:
                        response_text = e.response.text[:500]
                    except Exception:
                        response_text = "<unable to read response>"
                    if log_http_errors:
                        logger.error(
                            f"HTTP {e.response.status_code} from {url}: {response_text}"
                        )
                    raise
                last_exception = e
                try:
                    response_text = e.response.text[:200]
                except Exception:
                    response_text = "<unable to read response>"
                logger.warning(
                    f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
                )
                if attempt < self.retries - 1:
                    continue
            except (httpx.RequestError, httpx.ConnectError) as e:
                last_exception = e
                logger.warning(
                    f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
                )

                # Detect certificate verification failures in the underlying error
                msg = str(e or "").lower()
                if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
                    logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
                    try:
                        temp_client = get_shared_httpx_client(
                            timeout=self.timeout,
                            verify_ssl=self._httpx_verify,
                            headers=self._get_headers(),
                        )
                        try:
                            response = temp_client.request(method, url, **kwargs)
                            if raise_for_status:
                                response.raise_for_status()
                            return response
                        except Exception as e2:
                            last_exception = e2
                    except Exception:
                        # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
                        pass

                if attempt < self.retries - 1:
                    continue
            except Exception as e:
                # Catch-all to handle non-httpx exceptions that may represent
                # certificate verification failures from underlying transports.
                last_exception = e
                logger.warning(f"Request exception on attempt {attempt + 1}/{self.retries}: {url} - {e}")
                msg = str(e or "").lower()
                if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
                    logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
                    try:
                        temp_client = get_shared_httpx_client(
                            timeout=self.timeout,
                            verify_ssl=self._httpx_verify,
                            headers=self._get_headers(),
                        )
                        try:
                            response = temp_client.request(method, url, **kwargs)
                            if raise_for_status:
                                response.raise_for_status()
                            return response
                        except Exception as e2:
                            last_exception = e2
                    except Exception:
                        # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
                        pass

                if attempt < self.retries - 1:
                    continue

        if last_exception:
            logger.error(
                f"Request failed after {self.retries} attempts: {url} - {last_exception}"
            )
            raise last_exception

        raise RuntimeError("Request failed after retries")

    def _request_stream(self, method: str, url: str, **kwargs):
        """Make a streaming request."""
        if not self._client:
            raise RuntimeError(
                "HTTPClient must be used with context manager (with statement)"
            )

        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()

        self._debug_panel(
            "HTTP stream",
            [
                ("method", method),
                ("url", url),
                ("headers", kwargs.get("headers")),
                ("follow_redirects", kwargs.get("follow_redirects", False)),
            ],
        )

        return self._client.stream(method, url, **kwargs)


def download_direct_file(
    url: str,
    output_dir: Path,
    debug_logger: Optional[DebugLogger] = None,
    quiet: bool = False,
    suggested_filename: Optional[str] = None,
    pipeline_progress: Optional[Any] = None,
) -> DownloadMediaResult:
    """Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""

    ensure_directory(output_dir)

    def _sanitize_filename(name: str) -> str:
        # Windows-safe filename sanitization.
        text = str(name or "").strip()
        if not text:
            return ""
        text = text.replace("/", "\\")
        text = text.split("\\")[-1]

        invalid = set('<>:"/\\|?*')
        cleaned_chars: List[str] = []
        for ch in text:
            o = ord(ch)
            if o < 32 or ch in invalid:
                cleaned_chars.append(" ")
                continue
            cleaned_chars.append(ch)
        cleaned = " ".join("".join(cleaned_chars).split()).strip()
        cleaned = cleaned.rstrip(" .")
        return cleaned

    def _unique_path(path: Path) -> Path:
        if not path.exists():
            return path
        stem = path.stem
        suffix = path.suffix
        parent = path.parent
        for i in range(1, 10_000):
            candidate = parent / f"{stem} ({i}){suffix}"
            if not candidate.exists():
                return candidate
        return parent / f"{stem} ({int(time.time())}){suffix}"

    parsed_url = urlparse(url)
    url_path = parsed_url.path

    filename: Optional[str] = None
    if parsed_url.query:
        query_params = parse_qs(parsed_url.query)
        for param_name in ("filename", "download", "file", "name"):
            if param_name in query_params and query_params[param_name]:
                filename = query_params[param_name][0]
                filename = unquote(filename)
                break

    if not filename or not filename.strip():
        filename = url_path.split("/")[-1] if url_path else ""
        filename = unquote(filename)

    if "?" in filename:
        filename = filename.split("?")[0]

    content_type = ""
    try:
        with HTTPClient(timeout=10.0) as client:
            response = client._request("HEAD", url, follow_redirects=True)
            content_disposition = response.headers.get("content-disposition", "")
            try:
                content_type = str(response.headers.get("content-type", "") or "").strip().lower()
            except Exception:
                content_type = ""

            if content_disposition:
                match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
                if match:
                    extracted_name = match.group(1) or match.group(2)
                    if extracted_name:
                        filename = unquote(extracted_name)
                        if not quiet:
                            debug(f"Filename from Content-Disposition: {filename}")
    except Exception as exc:
        if not quiet:
            log(f"Could not get filename from headers: {exc}", file=sys.stderr)

    try:
        page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
        ext = ""
        try:
            ext = Path(str(filename or "")).suffix.lower()
        except Exception:
            ext = ""

        ct0 = (content_type or "").split(";", 1)[0].strip().lower()
        must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)

        if must_probe:
            with HTTPClient(timeout=10.0) as client:
                with client._request_stream("GET", url, follow_redirects=True) as resp:
                    resp.raise_for_status()
                    ct = (
                        str(resp.headers.get("content-type", "") or "")
                        .split(";", 1)[0]
                        .strip()
                        .lower()
                    )
                    if ct.startswith("text/html"):
                        raise DownloadError("URL appears to be an HTML page, not a direct file")
    except DownloadError:
        raise
    except Exception:
        logger.exception("Unexpected error while probing URL content")

    suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
    if suggested:
        suggested_path = Path(suggested)
        if suggested_path.suffix:
            filename = suggested
        else:
            detected_ext = ""
            try:
                detected_ext = Path(str(filename)).suffix
            except Exception:
                logger.exception("Failed to detect file extension from filename")
                detected_ext = ""
            filename = suggested + detected_ext if detected_ext else suggested

    try:
        has_ext = bool(filename and Path(str(filename)).suffix)
    except Exception:
        logger.exception("Failed to determine if filename has extension")
        has_ext = False

    if filename and (not has_ext):
        ct = (content_type or "").split(";", 1)[0].strip().lower()
        ext_by_ct = {
            "application/pdf": ".pdf",
            "application/epub+zip": ".epub",
            "application/x-mobipocket-ebook": ".mobi",
            "image/jpeg": ".jpg",
            "image/png": ".png",
            "image/webp": ".webp",
            "image/gif": ".gif",
            "text/plain": ".txt",
            "application/zip": ".zip",
        }

        if ct in ext_by_ct:
            filename = f"{filename}{ext_by_ct[ct]}"
        elif ct.startswith("text/html"):
            raise DownloadError("URL appears to be an HTML page, not a direct file")

    if not filename or not str(filename).strip():
        raise DownloadError(
            "Could not determine filename for URL (no Content-Disposition and no path filename)"
        )

    file_path = _unique_path(output_dir / str(filename))

    use_pipeline_transfer = False
    try:
        if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
            ui = None
            if hasattr(pipeline_progress, "ui_and_pipe_index"):
                ui, _ = pipeline_progress.ui_and_pipe_index()  # type: ignore[attr-defined]
            use_pipeline_transfer = ui is not None
    except Exception:
        use_pipeline_transfer = False

    progress_bar: Optional[ProgressBar] = None
    if (not quiet) and (not use_pipeline_transfer):
        progress_bar = ProgressBar()

    transfer_started = [False]

    if not quiet:
        debug(f"Direct download: {filename}")

    try:
        start_time = time.time()
        downloaded_bytes = [0]
        transfer_started[0] = False

        def _maybe_begin_transfer(content_length: int) -> None:
            if pipeline_progress is None or transfer_started[0]:
                return
            try:
                total_val: Optional[int] = (
                    int(content_length)
                    if isinstance(content_length, int) and content_length > 0
                    else None
                )
            except Exception:
                total_val = None
            try:
                if hasattr(pipeline_progress, "begin_transfer"):
                    pipeline_progress.begin_transfer(
                        label=str(filename or "download"),
                        total=total_val,
                    )
                    transfer_started[0] = True
            except Exception:
                return

        def progress_callback(bytes_downloaded: int, content_length: int) -> None:
            downloaded_bytes[0] = int(bytes_downloaded or 0)

            try:
                if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
                    _maybe_begin_transfer(content_length)
                    total_val: Optional[int] = (
                        int(content_length)
                        if isinstance(content_length, int) and content_length > 0
                        else None
                    )
                    pipeline_progress.update_transfer(
                        label=str(filename or "download"),
                        completed=int(bytes_downloaded or 0),
                        total=total_val,
                    )
            except Exception:
                logger.exception("Error updating pipeline progress transfer")

            if progress_bar is not None:
                progress_bar.update(
                    downloaded=int(bytes_downloaded or 0),
                    total=int(content_length) if content_length and content_length > 0 else None,
                    label=str(filename or "download"),
                    file=sys.stderr,
                )

        with HTTPClient(timeout=30.0) as client:
            client.download(url, str(file_path), progress_callback=progress_callback)

        elapsed = time.time() - start_time

        try:
            if progress_bar is not None:
                progress_bar.finish()
        except Exception:
            logger.exception("Failed to finish progress bar")

        try:
            if pipeline_progress is not None and transfer_started[0] and hasattr(
                pipeline_progress, "finish_transfer"
            ):
                pipeline_progress.finish_transfer(label=str(filename or "download"))
        except Exception:
            logger.exception("Failed to finish pipeline transfer")

        if not quiet:
            debug(f"✓ Downloaded in {elapsed:.1f}s")

        ext_out = ""
        try:
            ext_out = Path(str(filename)).suffix.lstrip(".")
        except Exception:
            ext_out = ""

        info: Dict[str, Any] = {
            "id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
            "ext": ext_out,
            "webpage_url": url,
        }

        hash_value = None
        try:
            hash_value = sha256_file(file_path)
        except Exception:
            logger.exception("Failed to compute SHA256 of downloaded file")

        tags: List[str] = []
        if extract_ytdlp_tags is not None:
            try:
                tags = extract_ytdlp_tags(info)
            except Exception as exc:
                log(f"Error extracting tags: {exc}", file=sys.stderr)

        if not any(str(t).startswith("title:") for t in tags):
            info["title"] = str(filename)
            tags = []
            if extract_ytdlp_tags is not None:
                try:
                    tags = extract_ytdlp_tags(info)
                except Exception as exc:
                    log(f"Error extracting tags with filename: {exc}", file=sys.stderr)

        if debug_logger is not None:
            debug_logger.write_record(
                "direct-file-downloaded",
                {"url": url, "path": str(file_path), "hash": hash_value},
            )

        return DownloadMediaResult(
            path=file_path,
            info=info,
            tag=tags,
            source_url=url,
            hash_value=hash_value,
        )

    except (httpx.HTTPError, httpx.RequestError) as exc:
        try:
            if progress_bar is not None:
                progress_bar.finish()
        except Exception:
            logger.exception("Failed to finish progress bar during HTTP error handling")
        try:
            if pipeline_progress is not None and transfer_started[0] and hasattr(
                pipeline_progress, "finish_transfer"
            ):
                pipeline_progress.finish_transfer(label=str(filename or "download"))
        except Exception:
            logger.exception("Failed to finish pipeline transfer during HTTP error handling")

        log(f"Download error: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {"phase": "direct-file", "url": url, "error": str(exc)},
            )
        raise DownloadError(f"Failed to download {url}: {exc}") from exc

    except Exception as exc:
        try:
            if progress_bar is not None:
                progress_bar.finish()
        except Exception:
            logger.exception("Failed to finish progress bar during error handling")
        try:
            if pipeline_progress is not None and transfer_started[0] and hasattr(
                pipeline_progress, "finish_transfer"
            ):
                pipeline_progress.finish_transfer(label=str(filename or "download"))
        except Exception:
            logger.exception("Failed to finish pipeline transfer during error handling")

        log(f"Error downloading file: {exc}", file=sys.stderr)
        if debug_logger is not None:
            debug_logger.write_record(
                "exception",
                {
                    "phase": "direct-file",
                    "url": url,
                    "error": str(exc),
                    "traceback": traceback.format_exc(),
                },
            )
        raise DownloadError(f"Error downloading file: {exc}") from exc


# Back-compat alias
_download_direct_file = download_direct_file


class AsyncHTTPClient:
    """Unified async HTTP client with asyncio support."""

    def __init__(
        self,
        timeout: float = DEFAULT_TIMEOUT,
        retries: int = DEFAULT_RETRIES,
        user_agent: str = DEFAULT_USER_AGENT,
        verify_ssl: bool = True,
        headers: Optional[Dict[str,
                               str]] = None,
    ):
        """
        Initialize async HTTP client.

        Args:
            timeout: Request timeout in seconds
            retries: Number of retries on transient failures
            user_agent: User-Agent header value
            verify_ssl: Whether to verify SSL certificates
            headers: Additional headers to include in all requests
        """
        self.timeout = timeout
        self.retries = retries
        self.user_agent = user_agent
        self.verify_ssl = verify_ssl
        self.base_headers = headers or {}
        self._client: Optional[httpx.AsyncClient] = None
        self._httpx_verify = _resolve_verify_value(verify_ssl)

    async def __aenter__(self):
        """Async context manager entry."""
        self._client = httpx.AsyncClient(
            timeout=self.timeout,
            verify=self._httpx_verify,
            headers=self._get_headers(),
        )
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self._client:
            await self._client.aclose()
            self._client = None

    def _get_headers(self) -> Dict[str, str]:
        """Get request headers with user-agent."""
        headers = {
            "User-Agent": self.user_agent
        }
        headers.update(self.base_headers)
        return headers

    async def get(
        self,
        url: str,
        params: Optional[Dict[str,
                              Any]] = None,
        headers: Optional[Dict[str,
                               str]] = None,
        allow_redirects: bool = True,
    ) -> httpx.Response:
        """
        Make an async GET request.

        Args:
            url: Request URL
            params: Query parameters
            headers: Additional headers
            allow_redirects: Follow redirects

        Returns:
            httpx.Response object
        """
        return await self._request(
            "GET",
            url,
            params=params,
            headers=headers,
            follow_redirects=allow_redirects,
        )

    async def post(
        self,
        url: str,
        data: Optional[Any] = None,
        json: Optional[Dict] = None,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> httpx.Response:
        """
        Make an async POST request.

        Args:
            url: Request URL
            data: Form data
            json: JSON data
            headers: Additional headers

        Returns:
            httpx.Response object
        """
        return await self._request(
            "POST",
            url,
            data=data,
            json=json,
            headers=headers,
        )

    async def download(
        self,
        url: str,
        file_path: str,
        chunk_size: int = 8192,
        progress_callback: Optional[Callable[[int,
                                              int],
                                             None]] = None,
        headers: Optional[Dict[str,
                               str]] = None,
    ) -> Path:
        """
        Download a file from URL asynchronously with optional progress tracking.

        Args:
            url: File URL
            file_path: Local file path to save to
            chunk_size: Download chunk size
            progress_callback: Callback(bytes_downloaded, total_bytes)
            headers: Additional headers

        Returns:
            Path object of downloaded file
        """
        path = Path(file_path)
        path.parent.mkdir(parents=True, exist_ok=True)

        async with self._request_stream("GET", url, headers=headers) as response:
            response.raise_for_status()
            total_bytes = int(response.headers.get("content-length", 0))
            bytes_downloaded = 0

            with open(path, "wb") as f:
                async for chunk in response.aiter_bytes(chunk_size):
                    if chunk:
                        f.write(chunk)
                        bytes_downloaded += len(chunk)
                        if progress_callback:
                            progress_callback(bytes_downloaded, total_bytes)

        return path

    async def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
        """
        Make an async HTTP request with automatic retries.

        Args:
            method: HTTP method
            url: Request URL
            **kwargs: Additional arguments for httpx.AsyncClient.request()

        Returns:
            httpx.Response object
        """
        if not self._client:
            raise RuntimeError(
                "AsyncHTTPClient must be used with async context manager"
            )

        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()

        last_exception: Exception | None = None

        for attempt in range(self.retries):
            try:
                response = await self._client.request(method, url, **kwargs)
                response.raise_for_status()
                return response
            except httpx.TimeoutException as e:
                last_exception = e
                logger.warning(
                    f"Timeout on attempt {attempt + 1}/{self.retries}: {url}"
                )
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)  # Brief delay before retry
                    continue
            except httpx.HTTPStatusError as e:
                # Don't retry on 4xx errors
                if 400 <= e.response.status_code < 500:
                    try:
                        response_text = e.response.text[:500]
                    except Exception:
                        response_text = "<unable to read response>"
                    logger.error(
                        f"HTTP {e.response.status_code} from {url}: {response_text}"
                    )
                    raise
                last_exception = e
                try:
                    response_text = e.response.text[:200]
                except Exception:
                    response_text = "<unable to read response>"
                logger.warning(
                    f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
                )
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)
                    continue
            except (httpx.RequestError, httpx.ConnectError) as e:
                last_exception = e
                logger.warning(
                    f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
                )
                if attempt < self.retries - 1:
                    await asyncio.sleep(0.5)
                    continue

        if last_exception:
            logger.error(
                f"Request failed after {self.retries} attempts: {url} - {last_exception}"
            )
            raise last_exception

        raise RuntimeError("Request failed after retries")

    def _request_stream(self, method: str, url: str, **kwargs):
        """Make a streaming request."""
        if not self._client:
            raise RuntimeError(
                "AsyncHTTPClient must be used with async context manager"
            )

        # Merge headers
        if "headers" in kwargs and kwargs["headers"]:
            headers = self._get_headers()
            headers.update(kwargs["headers"])
            kwargs["headers"] = headers
        else:
            kwargs["headers"] = self._get_headers()

        return self._client.stream(method, url, **kwargs)


# Convenience function for quick sync requests
def get(url: str, **kwargs) -> httpx.Response:
    """Quick GET request without context manager."""
    with HTTPClient() as client:
        return client.get(url, **kwargs)


def post(url: str, **kwargs) -> httpx.Response:
    """Quick POST request without context manager."""
    with HTTPClient() as client:
        return client.post(url, **kwargs)


def download(
    url: str,
    file_path: str,
    progress_callback: Optional[Callable[[int,
                                          int],
                                         None]] = None,
    **kwargs,
) -> Path:
    """Quick file download without context manager."""
    with HTTPClient() as client:
        return client.download(
            url,
            file_path,
            progress_callback=progress_callback,
            **kwargs
        )