Files
Medios-Macina/API/HTTP.py
2026-02-11 18:16:07 -08:00

1220 lines
42 KiB
Python

"""
Unified HTTP client for downlow using httpx.
Provides synchronous and asynchronous HTTP operations with:
- Automatic retries on transient failures
- Configurable timeouts and headers
- Built-in progress tracking for downloads
- Request/response logging support
"""
import httpx
import asyncio
import sys
import time
import traceback
import re
import os
import json
from typing import Optional, Dict, Any, Callable, List, Union
from pathlib import Path
from urllib.parse import unquote, urlparse, parse_qs
import logging
from SYS.logger import debug, is_debug_enabled, log
from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar
from SYS.utils import ensure_directory, sha256_file
try: # Optional; used for metadata extraction when available
from SYS.yt_metadata import extract_ytdlp_tags
except Exception: # pragma: no cover - optional dependency
extract_ytdlp_tags = None # type: ignore[assignment]
logger = logging.getLogger(__name__)
from API.ssl_certs import resolve_verify_value as _resolve_verify_value
from API.ssl_certs import get_requests_verify_value
# Default configuration
DEFAULT_TIMEOUT = 30.0
DEFAULT_RETRIES = 3
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
class HTTPClient:
"""Unified HTTP client with sync support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
retries: int = DEFAULT_RETRIES,
user_agent: str = DEFAULT_USER_AGENT,
verify_ssl: bool = True,
headers: Optional[Dict[str,
str]] = None,
):
"""
Initialize HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
user_agent: User-Agent header value
verify_ssl: Whether to verify SSL certificates
headers: Additional headers to include in all requests
"""
self.timeout = timeout
self.retries = retries
self.user_agent = user_agent
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.Client] = None
self._httpx_verify = _resolve_verify_value(verify_ssl)
# Debug helpers
def _debug_panel(self, title: str, rows: List[tuple[str, Any]]) -> None:
if not is_debug_enabled():
return
try:
from rich.table import Table as RichTable
from rich.panel import Panel
grid = RichTable.grid(padding=(0, 1))
grid.add_column("Key", style="cyan", no_wrap=True)
grid.add_column("Value")
for key, val in rows:
try:
grid.add_row(str(key), str(val))
except Exception:
grid.add_row(str(key), "<unprintable>")
debug(Panel(grid, title=title, expand=False))
except Exception:
# Fallback to simple debug output
debug(title, rows)
def __enter__(self):
"""Context manager entry."""
self._client = httpx.Client(
timeout=self.timeout,
verify=self._httpx_verify,
headers=self._get_headers(),
)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
if self._client:
self._client.close()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {
"User-Agent": self.user_agent
}
headers.update(self.base_headers)
return headers
def get(
self,
url: str,
params: Optional[Dict[str,
Any]] = None,
headers: Optional[Dict[str,
str]] = None,
allow_redirects: bool = True,
) -> httpx.Response:
"""
Make a GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
return self._request(
"GET",
url,
params=params,
headers=headers,
follow_redirects=allow_redirects,
)
def post(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
files: Optional[Dict] = None,
headers: Optional[Dict[str,
str]] = None,
) -> httpx.Response:
"""
Make a POST request.
Args:
url: Request URL
data: Form data
json: JSON data
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"POST",
url,
data=data,
json=json,
files=files,
headers=headers,
)
def put(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
content: Optional[Any] = None,
files: Optional[Dict] = None,
headers: Optional[Dict[str,
str]] = None,
) -> httpx.Response:
"""
Make a PUT request.
Args:
url: Request URL
data: Form data
json: JSON data
content: Raw content
files: Files to upload
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"PUT",
url,
data=data,
json=json,
content=content,
files=files,
headers=headers,
)
def delete(
self,
url: str,
headers: Optional[Dict[str,
str]] = None,
) -> httpx.Response:
"""
Make a DELETE request.
Args:
url: Request URL
headers: Additional headers
Returns:
httpx.Response object
"""
return self._request(
"DELETE",
url,
headers=headers,
)
def request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make a generic HTTP request.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments
Returns:
httpx.Response object
"""
return self._request(method, url, **kwargs)
def download(
self,
url: str,
file_path: str,
chunk_size: int = 8192,
progress_callback: Optional[Callable[[int,
int],
None]] = None,
headers: Optional[Dict[str,
str]] = None,
) -> Path:
"""
Download a file from URL with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
with self._request_stream("GET",
url,
headers=headers,
follow_redirects=True) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
# Render progress immediately (even if the transfer is very fast)
if progress_callback:
try:
progress_callback(0, total_bytes)
except Exception:
logger.exception("Error in progress_callback initial call")
with open(path, "wb") as f:
for chunk in response.iter_bytes(chunk_size):
if chunk:
f.write(chunk)
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
# Ensure a final callback is emitted.
if progress_callback:
try:
progress_callback(bytes_downloaded, total_bytes)
except Exception:
logger.exception("Error in progress_callback final call")
return path
def _request(
self,
method: str,
url: str,
raise_for_status: bool = True,
log_http_errors: bool = True,
**kwargs,
) -> httpx.Response:
"""
Make an HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.Client.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError(
"HTTPClient must be used with context manager (with statement)"
)
# Merge headers once per call (do not rebuild for every retry attempt).
merged_headers = self._get_headers()
extra_headers = kwargs.get("headers")
if extra_headers:
try:
merged_headers.update(extra_headers)
except Exception:
# If headers is not a mapping, keep it as-is and let httpx raise.
merged_headers = extra_headers
kwargs["headers"] = merged_headers
last_exception: Exception | None = None
def _raw_debug_enabled() -> bool:
try:
val = str(os.environ.get("MM_HTTP_RAW", "")).strip().lower()
if val in {"0", "false", "no", "off"}:
return False
return is_debug_enabled()
except Exception:
return is_debug_enabled()
def _preview(value: Any, *, limit: int = 2000) -> str:
if value is None:
return "<not provided>"
try:
# File-like objects (uploads/streams) - show compact summary instead of raw contents
if hasattr(value, "read") or hasattr(value, "fileno"):
name = getattr(value, "name", None)
total = getattr(value, "_total", None)
label = getattr(value, "_label", None)
try:
pos = value.tell() if hasattr(value, "tell") else None
except Exception:
pos = None
parts = []
if name is not None:
parts.append(f"name={name!r}")
if total is not None:
parts.append(f"total={total}")
if label is not None:
parts.append(f"label={label!r}")
if pos is not None:
parts.append(f"pos={pos}")
summary = " ".join(parts) if parts else None
return f"<file-like {summary}>" if summary else "<file-like>"
if isinstance(value, (dict, list, tuple)):
# Use default=str to avoid failing on non-serializable objects (e.g., file-like)
text = json.dumps(value, ensure_ascii=False, default=str)
elif isinstance(value, (bytes, bytearray)):
text = value.decode("utf-8", errors="replace")
else:
text = str(value)
except Exception:
try:
text = repr(value)
except Exception:
text = "<unprintable>"
if len(text) > limit:
return text[:limit] + "..."
return text
for attempt in range(self.retries):
self._debug_panel(
"HTTP request",
[
("method", method),
("url", url),
("attempt", f"{attempt + 1}/{self.retries}"),
("params", kwargs.get("params")),
("headers", kwargs.get("headers")),
("verify", self._httpx_verify),
("follow_redirects", kwargs.get("follow_redirects", False)),
],
)
if _raw_debug_enabled():
self._debug_panel(
"HTTP request raw",
[
("params", _preview(kwargs.get("params"))),
("data", _preview(kwargs.get("data"))),
("json", _preview(kwargs.get("json"))),
("content", _preview(kwargs.get("content"))),
("files", _preview(kwargs.get("files"))),
],
)
try:
response = self._client.request(method, url, **kwargs)
self._debug_panel(
"HTTP response",
[
("method", method),
("url", url),
("status", getattr(response, "status_code", "")),
("elapsed", getattr(response, "elapsed", "")),
(
"content_length",
response.headers.get("content-length") if hasattr(response, "headers") else "",
),
],
)
if _raw_debug_enabled():
content_type = ""
try:
content_type = response.headers.get("content-type", "")
except Exception:
content_type = ""
body_preview = ""
try:
if isinstance(content_type, str) and (
content_type.startswith("application/json")
or content_type.startswith("text/")
):
body_preview = _preview(response.text, limit=4000)
else:
raw = response.content
if raw is None:
body_preview = "<no content>"
else:
body_preview = raw[:400].hex()
except Exception:
body_preview = "<unavailable>"
self._debug_panel(
"HTTP response raw",
[
("content_type", content_type),
("body_preview", body_preview),
("body_length", len(response.content) if response is not None else ""),
],
)
if raise_for_status:
response.raise_for_status()
return response
except httpx.TimeoutException as e:
last_exception = e
logger.warning(
f"Timeout on attempt {attempt + 1}/{self.retries}: {url}"
)
if attempt < self.retries - 1:
continue
except httpx.HTTPStatusError as e:
# Don't retry on 4xx errors
if 400 <= e.response.status_code < 500:
try:
response_text = e.response.text[:500]
except Exception:
response_text = "<unable to read response>"
if log_http_errors:
logger.error(
f"HTTP {e.response.status_code} from {url}: {response_text}"
)
raise
last_exception = e
try:
response_text = e.response.text[:200]
except Exception:
response_text = "<unable to read response>"
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
# Detect certificate verification failures in the underlying error
msg = str(e or "").lower()
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass
if attempt < self.retries - 1:
continue
except Exception as e:
# Catch-all to handle non-httpx exceptions that may represent
# certificate verification failures from underlying transports.
last_exception = e
logger.warning(f"Request exception on attempt {attempt + 1}/{self.retries}: {url} - {e}")
msg = str(e or "").lower()
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass
if attempt < self.retries - 1:
continue
if last_exception:
logger.error(
f"Request failed after {self.retries} attempts: {url} - {last_exception}"
)
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError(
"HTTPClient must be used with context manager (with statement)"
)
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
self._debug_panel(
"HTTP stream",
[
("method", method),
("url", url),
("headers", kwargs.get("headers")),
("follow_redirects", kwargs.get("follow_redirects", False)),
],
)
return self._client.stream(method, url, **kwargs)
def download_direct_file(
url: str,
output_dir: Path,
debug_logger: Optional[DebugLogger] = None,
quiet: bool = False,
suggested_filename: Optional[str] = None,
pipeline_progress: Optional[Any] = None,
) -> DownloadMediaResult:
"""Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks."""
ensure_directory(output_dir)
def _sanitize_filename(name: str) -> str:
# Windows-safe filename sanitization.
text = str(name or "").strip()
if not text:
return ""
text = text.replace("/", "\\")
text = text.split("\\")[-1]
invalid = set('<>:"/\\|?*')
cleaned_chars: List[str] = []
for ch in text:
o = ord(ch)
if o < 32 or ch in invalid:
cleaned_chars.append(" ")
continue
cleaned_chars.append(ch)
cleaned = " ".join("".join(cleaned_chars).split()).strip()
cleaned = cleaned.rstrip(" .")
return cleaned
def _unique_path(path: Path) -> Path:
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
for i in range(1, 10_000):
candidate = parent / f"{stem} ({i}){suffix}"
if not candidate.exists():
return candidate
return parent / f"{stem} ({int(time.time())}){suffix}"
parsed_url = urlparse(url)
url_path = parsed_url.path
filename: Optional[str] = None
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param_name in ("filename", "download", "file", "name"):
if param_name in query_params and query_params[param_name]:
filename = query_params[param_name][0]
filename = unquote(filename)
break
if not filename or not filename.strip():
filename = url_path.split("/")[-1] if url_path else ""
filename = unquote(filename)
if "?" in filename:
filename = filename.split("?")[0]
content_type = ""
try:
with HTTPClient(timeout=10.0) as client:
response = client._request("HEAD", url, follow_redirects=True)
content_disposition = response.headers.get("content-disposition", "")
try:
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
except Exception:
content_type = ""
if content_disposition:
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
if match:
extracted_name = match.group(1) or match.group(2)
if extracted_name:
filename = unquote(extracted_name)
if not quiet:
debug(f"Filename from Content-Disposition: {filename}")
except Exception as exc:
if not quiet:
log(f"Could not get filename from headers: {exc}", file=sys.stderr)
try:
page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"}
ext = ""
try:
ext = Path(str(filename or "")).suffix.lower()
except Exception:
ext = ""
ct0 = (content_type or "").split(";", 1)[0].strip().lower()
must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts)
if must_probe:
with HTTPClient(timeout=10.0) as client:
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
except DownloadError:
raise
except Exception:
logger.exception("Unexpected error while probing URL content")
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
if suggested:
suggested_path = Path(suggested)
if suggested_path.suffix:
filename = suggested
else:
detected_ext = ""
try:
detected_ext = Path(str(filename)).suffix
except Exception:
logger.exception("Failed to detect file extension from filename")
detected_ext = ""
filename = suggested + detected_ext if detected_ext else suggested
try:
has_ext = bool(filename and Path(str(filename)).suffix)
except Exception:
logger.exception("Failed to determine if filename has extension")
has_ext = False
if filename and (not has_ext):
ct = (content_type or "").split(";", 1)[0].strip().lower()
ext_by_ct = {
"application/pdf": ".pdf",
"application/epub+zip": ".epub",
"application/x-mobipocket-ebook": ".mobi",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"image/gif": ".gif",
"text/plain": ".txt",
"application/zip": ".zip",
}
if ct in ext_by_ct:
filename = f"{filename}{ext_by_ct[ct]}"
elif ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
if not filename or not str(filename).strip():
raise DownloadError(
"Could not determine filename for URL (no Content-Disposition and no path filename)"
)
file_path = _unique_path(output_dir / str(filename))
use_pipeline_transfer = False
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
ui = None
if hasattr(pipeline_progress, "ui_and_pipe_index"):
ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined]
use_pipeline_transfer = ui is not None
except Exception:
use_pipeline_transfer = False
progress_bar: Optional[ProgressBar] = None
if (not quiet) and (not use_pipeline_transfer):
progress_bar = ProgressBar()
transfer_started = [False]
if not quiet:
debug(f"Direct download: {filename}")
try:
start_time = time.time()
downloaded_bytes = [0]
transfer_started[0] = False
def _maybe_begin_transfer(content_length: int) -> None:
if pipeline_progress is None or transfer_started[0]:
return
try:
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(
label=str(filename or "download"),
total=total_val,
)
transfer_started[0] = True
except Exception:
return
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
downloaded_bytes[0] = int(bytes_downloaded or 0)
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded or 0),
total=total_val,
)
except Exception:
logger.exception("Error updating pipeline progress transfer")
if progress_bar is not None:
progress_bar.update(
downloaded=int(bytes_downloaded or 0),
total=int(content_length) if content_length and content_length > 0 else None,
label=str(filename or "download"),
file=sys.stderr,
)
with HTTPClient(timeout=30.0) as client:
client.download(url, str(file_path), progress_callback=progress_callback)
elapsed = time.time() - start_time
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
logger.exception("Failed to finish progress bar")
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
logger.exception("Failed to finish pipeline transfer")
if not quiet:
debug(f"✓ Downloaded in {elapsed:.1f}s")
ext_out = ""
try:
ext_out = Path(str(filename)).suffix.lstrip(".")
except Exception:
ext_out = ""
info: Dict[str, Any] = {
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
"ext": ext_out,
"webpage_url": url,
}
hash_value = None
try:
hash_value = sha256_file(file_path)
except Exception:
logger.exception("Failed to compute SHA256 of downloaded file")
tags: List[str] = []
if extract_ytdlp_tags is not None:
try:
tags = extract_ytdlp_tags(info)
except Exception as exc:
log(f"Error extracting tags: {exc}", file=sys.stderr)
if not any(str(t).startswith("title:") for t in tags):
info["title"] = str(filename)
tags = []
if extract_ytdlp_tags is not None:
try:
tags = extract_ytdlp_tags(info)
except Exception as exc:
log(f"Error extracting tags with filename: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"direct-file-downloaded",
{"url": url, "path": str(file_path), "hash": hash_value},
)
return DownloadMediaResult(
path=file_path,
info=info,
tag=tags,
source_url=url,
hash_value=hash_value,
)
except (httpx.HTTPError, httpx.RequestError) as exc:
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
logger.exception("Failed to finish progress bar during HTTP error handling")
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
logger.exception("Failed to finish pipeline transfer during HTTP error handling")
log(f"Download error: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"exception",
{"phase": "direct-file", "url": url, "error": str(exc)},
)
raise DownloadError(f"Failed to download {url}: {exc}") from exc
except Exception as exc:
try:
if progress_bar is not None:
progress_bar.finish()
except Exception:
logger.exception("Failed to finish progress bar during error handling")
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(
pipeline_progress, "finish_transfer"
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
logger.exception("Failed to finish pipeline transfer during error handling")
log(f"Error downloading file: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
"exception",
{
"phase": "direct-file",
"url": url,
"error": str(exc),
"traceback": traceback.format_exc(),
},
)
raise DownloadError(f"Error downloading file: {exc}") from exc
# Back-compat alias
_download_direct_file = download_direct_file
class AsyncHTTPClient:
"""Unified async HTTP client with asyncio support."""
def __init__(
self,
timeout: float = DEFAULT_TIMEOUT,
retries: int = DEFAULT_RETRIES,
user_agent: str = DEFAULT_USER_AGENT,
verify_ssl: bool = True,
headers: Optional[Dict[str,
str]] = None,
):
"""
Initialize async HTTP client.
Args:
timeout: Request timeout in seconds
retries: Number of retries on transient failures
user_agent: User-Agent header value
verify_ssl: Whether to verify SSL certificates
headers: Additional headers to include in all requests
"""
self.timeout = timeout
self.retries = retries
self.user_agent = user_agent
self.verify_ssl = verify_ssl
self.base_headers = headers or {}
self._client: Optional[httpx.AsyncClient] = None
self._httpx_verify = _resolve_verify_value(verify_ssl)
async def __aenter__(self):
"""Async context manager entry."""
self._client = httpx.AsyncClient(
timeout=self.timeout,
verify=self._httpx_verify,
headers=self._get_headers(),
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit."""
if self._client:
await self._client.aclose()
self._client = None
def _get_headers(self) -> Dict[str, str]:
"""Get request headers with user-agent."""
headers = {
"User-Agent": self.user_agent
}
headers.update(self.base_headers)
return headers
async def get(
self,
url: str,
params: Optional[Dict[str,
Any]] = None,
headers: Optional[Dict[str,
str]] = None,
allow_redirects: bool = True,
) -> httpx.Response:
"""
Make an async GET request.
Args:
url: Request URL
params: Query parameters
headers: Additional headers
allow_redirects: Follow redirects
Returns:
httpx.Response object
"""
return await self._request(
"GET",
url,
params=params,
headers=headers,
follow_redirects=allow_redirects,
)
async def post(
self,
url: str,
data: Optional[Any] = None,
json: Optional[Dict] = None,
headers: Optional[Dict[str,
str]] = None,
) -> httpx.Response:
"""
Make an async POST request.
Args:
url: Request URL
data: Form data
json: JSON data
headers: Additional headers
Returns:
httpx.Response object
"""
return await self._request(
"POST",
url,
data=data,
json=json,
headers=headers,
)
async def download(
self,
url: str,
file_path: str,
chunk_size: int = 8192,
progress_callback: Optional[Callable[[int,
int],
None]] = None,
headers: Optional[Dict[str,
str]] = None,
) -> Path:
"""
Download a file from URL asynchronously with optional progress tracking.
Args:
url: File URL
file_path: Local file path to save to
chunk_size: Download chunk size
progress_callback: Callback(bytes_downloaded, total_bytes)
headers: Additional headers
Returns:
Path object of downloaded file
"""
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
async with self._request_stream("GET", url, headers=headers) as response:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
with open(path, "wb") as f:
async for chunk in response.aiter_bytes(chunk_size):
if chunk:
f.write(chunk)
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
return path
async def _request(self, method: str, url: str, **kwargs) -> httpx.Response:
"""
Make an async HTTP request with automatic retries.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional arguments for httpx.AsyncClient.request()
Returns:
httpx.Response object
"""
if not self._client:
raise RuntimeError(
"AsyncHTTPClient must be used with async context manager"
)
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
last_exception: Exception | None = None
for attempt in range(self.retries):
try:
response = await self._client.request(method, url, **kwargs)
response.raise_for_status()
return response
except httpx.TimeoutException as e:
last_exception = e
logger.warning(
f"Timeout on attempt {attempt + 1}/{self.retries}: {url}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5) # Brief delay before retry
continue
except httpx.HTTPStatusError as e:
# Don't retry on 4xx errors
if 400 <= e.response.status_code < 500:
try:
response_text = e.response.text[:500]
except Exception:
response_text = "<unable to read response>"
logger.error(
f"HTTP {e.response.status_code} from {url}: {response_text}"
)
raise
last_exception = e
try:
response_text = e.response.text[:200]
except Exception:
response_text = "<unable to read response>"
logger.warning(
f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
except (httpx.RequestError, httpx.ConnectError) as e:
last_exception = e
logger.warning(
f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}"
)
if attempt < self.retries - 1:
await asyncio.sleep(0.5)
continue
if last_exception:
logger.error(
f"Request failed after {self.retries} attempts: {url} - {last_exception}"
)
raise last_exception
raise RuntimeError("Request failed after retries")
def _request_stream(self, method: str, url: str, **kwargs):
"""Make a streaming request."""
if not self._client:
raise RuntimeError(
"AsyncHTTPClient must be used with async context manager"
)
# Merge headers
if "headers" in kwargs and kwargs["headers"]:
headers = self._get_headers()
headers.update(kwargs["headers"])
kwargs["headers"] = headers
else:
kwargs["headers"] = self._get_headers()
return self._client.stream(method, url, **kwargs)
# Convenience function for quick sync requests
def get(url: str, **kwargs) -> httpx.Response:
"""Quick GET request without context manager."""
with HTTPClient() as client:
return client.get(url, **kwargs)
def post(url: str, **kwargs) -> httpx.Response:
"""Quick POST request without context manager."""
with HTTPClient() as client:
return client.post(url, **kwargs)
def download(
url: str,
file_path: str,
progress_callback: Optional[Callable[[int,
int],
None]] = None,
**kwargs,
) -> Path:
"""Quick file download without context manager."""
with HTTPClient() as client:
return client.download(
url,
file_path,
progress_callback=progress_callback,
**kwargs
)