""" Unified HTTP client for downlow using httpx. Provides synchronous and asynchronous HTTP operations with: - Automatic retries on transient failures - Configurable timeouts and headers - Built-in progress tracking for downloads - Request/response logging support """ import httpx import asyncio import sys import time import traceback import re import os from typing import Optional, Dict, Any, Callable, BinaryIO, List, Iterable, Set, Union from pathlib import Path from urllib.parse import unquote, urlparse, parse_qs import logging from SYS.logger import debug, is_debug_enabled, log from SYS.models import DebugLogger, DownloadError, DownloadMediaResult, ProgressBar from SYS.utils import ensure_directory, sha256_file try: # Optional; used for metadata extraction when available from SYS.metadata import extract_ytdlp_tags except Exception: # pragma: no cover - optional dependency extract_ytdlp_tags = None # type: ignore[assignment] logger = logging.getLogger(__name__) def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]: """Return the httpx verify argument, preferring system-aware bundles. Order of precedence: 1. If verify_ssl is not True (False or path), return it. 2. Respect existing SSL_CERT_FILE env var if present. 3. Prefer `pip_system_certs` if present and it exposes a bundle path. 4. Prefer `certifi_win32`/similar helpers by invoking them and reading certifi.where(). 5. Fall back to `certifi.where()` if available. 6. Otherwise, return True to let httpx use system defaults. """ if verify_ssl is not True: return verify_ssl env_cert = os.environ.get("SSL_CERT_FILE") if env_cert: return env_cert def _try_module_bundle(mod_name: str) -> Optional[str]: # Prefer checking sys.modules first (helps test injection / monkeypatching) try: mod = sys.modules.get(mod_name) if mod is None: mod = __import__(mod_name) except Exception: return None # Common APIs that return a bundle path for attr in ("where", "get_ca_bundle", "bundle_path", "get_bundle_path", "get_bundle"): fn = getattr(mod, attr, None) if callable(fn): try: res = fn() if res: return res except Exception: continue elif isinstance(fn, str) and fn: return fn # Some helpers (e.g., certifi_win32) expose an action to merge system certs for call_attr in ("add_windows_store_certs", "add_system_certs", "merge_system_certs"): fn = getattr(mod, call_attr, None) if callable(fn): try: fn() try: import certifi as _certifi res = _certifi.where() if res: return res except Exception: pass except Exception: pass return None # Prefer pip_system_certs if available for mod_name in ("pip_system_certs",): path = _try_module_bundle(mod_name) if path: try: os.environ["SSL_CERT_FILE"] = path except Exception: pass logger.info(f"SSL_CERT_FILE not set; using bundle from {mod_name}: {path}") return path # Special-case helpers that merge system certs (eg. certifi_win32) try: import certifi_win32 as _cw # type: ignore if hasattr(_cw, "add_windows_store_certs") and callable(_cw.add_windows_store_certs): try: _cw.add_windows_store_certs() except Exception: pass try: import certifi # type: ignore path = certifi.where() if path: try: os.environ["SSL_CERT_FILE"] = path except Exception: pass logger.info( f"SSL_CERT_FILE not set; using certifi bundle after certifi_win32: {path}" ) return path except Exception: pass except Exception: pass # Fallback to certifi try: import certifi # type: ignore path = certifi.where() if path: try: os.environ["SSL_CERT_FILE"] = path except Exception: pass logger.info(f"SSL_CERT_FILE not set; using certifi bundle: {path}") return path except Exception: pass return True # Default configuration DEFAULT_TIMEOUT = 30.0 DEFAULT_RETRIES = 3 DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" class HTTPClient: """Unified HTTP client with sync support.""" def __init__( self, timeout: float = DEFAULT_TIMEOUT, retries: int = DEFAULT_RETRIES, user_agent: str = DEFAULT_USER_AGENT, verify_ssl: bool = True, headers: Optional[Dict[str, str]] = None, ): """ Initialize HTTP client. Args: timeout: Request timeout in seconds retries: Number of retries on transient failures user_agent: User-Agent header value verify_ssl: Whether to verify SSL certificates headers: Additional headers to include in all requests """ self.timeout = timeout self.retries = retries self.user_agent = user_agent self.verify_ssl = verify_ssl self.base_headers = headers or {} self._client: Optional[httpx.Client] = None self._httpx_verify = _resolve_verify_value(verify_ssl) # Debug helpers def _debug_panel(self, title: str, rows: List[tuple[str, Any]]) -> None: if not is_debug_enabled(): return try: from rich.table import Table from rich.panel import Panel grid = Table.grid(padding=(0, 1)) grid.add_column("Key", style="cyan", no_wrap=True) grid.add_column("Value") for key, val in rows: try: grid.add_row(str(key), str(val)) except Exception: grid.add_row(str(key), "") debug(Panel(grid, title=title, expand=False)) except Exception: # Fallback to simple debug output debug(title, rows) def __enter__(self): """Context manager entry.""" self._client = httpx.Client( timeout=self.timeout, verify=self._httpx_verify, headers=self._get_headers(), ) return self def __exit__(self, exc_type, exc_val, exc_tb): """Context manager exit.""" if self._client: self._client.close() self._client = None def _get_headers(self) -> Dict[str, str]: """Get request headers with user-agent.""" headers = { "User-Agent": self.user_agent } headers.update(self.base_headers) return headers def get( self, url: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, allow_redirects: bool = True, ) -> httpx.Response: """ Make a GET request. Args: url: Request URL params: Query parameters headers: Additional headers allow_redirects: Follow redirects Returns: httpx.Response object """ return self._request( "GET", url, params=params, headers=headers, follow_redirects=allow_redirects, ) def post( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, files: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a POST request. Args: url: Request URL data: Form data json: JSON data files: Files to upload headers: Additional headers Returns: httpx.Response object """ return self._request( "POST", url, data=data, json=json, files=files, headers=headers, ) def put( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, content: Optional[Any] = None, files: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a PUT request. Args: url: Request URL data: Form data json: JSON data content: Raw content files: Files to upload headers: Additional headers Returns: httpx.Response object """ return self._request( "PUT", url, data=data, json=json, content=content, files=files, headers=headers, ) def delete( self, url: str, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a DELETE request. Args: url: Request URL headers: Additional headers Returns: httpx.Response object """ return self._request( "DELETE", url, headers=headers, ) def request(self, method: str, url: str, **kwargs) -> httpx.Response: """ Make a generic HTTP request. Args: method: HTTP method url: Request URL **kwargs: Additional arguments Returns: httpx.Response object """ return self._request(method, url, **kwargs) def download( self, url: str, file_path: str, chunk_size: int = 8192, progress_callback: Optional[Callable[[int, int], None]] = None, headers: Optional[Dict[str, str]] = None, ) -> Path: """ Download a file from URL with optional progress tracking. Args: url: File URL file_path: Local file path to save to chunk_size: Download chunk size progress_callback: Callback(bytes_downloaded, total_bytes) headers: Additional headers Returns: Path object of downloaded file """ path = Path(file_path) path.parent.mkdir(parents=True, exist_ok=True) with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response: response.raise_for_status() total_bytes = int(response.headers.get("content-length", 0)) bytes_downloaded = 0 # Render progress immediately (even if the transfer is very fast) if progress_callback: try: progress_callback(0, total_bytes) except Exception: pass with open(path, "wb") as f: for chunk in response.iter_bytes(chunk_size): if chunk: f.write(chunk) bytes_downloaded += len(chunk) if progress_callback: progress_callback(bytes_downloaded, total_bytes) # Ensure a final callback is emitted. if progress_callback: try: progress_callback(bytes_downloaded, total_bytes) except Exception: pass return path def _request( self, method: str, url: str, raise_for_status: bool = True, log_http_errors: bool = True, **kwargs, ) -> httpx.Response: """ Make an HTTP request with automatic retries. Args: method: HTTP method url: Request URL **kwargs: Additional arguments for httpx.Client.request() Returns: httpx.Response object """ if not self._client: raise RuntimeError( "HTTPClient must be used with context manager (with statement)" ) # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() last_exception = None for attempt in range(self.retries): self._debug_panel( "HTTP request", [ ("method", method), ("url", url), ("attempt", f"{attempt + 1}/{self.retries}"), ("params", kwargs.get("params")), ("headers", kwargs.get("headers")), ("verify", self._httpx_verify), ("follow_redirects", kwargs.get("follow_redirects", False)), ], ) try: response = self._client.request(method, url, **kwargs) self._debug_panel( "HTTP response", [ ("method", method), ("url", url), ("status", getattr(response, "status_code", "")), ("elapsed", getattr(response, "elapsed", "")), ( "content_length", response.headers.get("content-length") if hasattr(response, "headers") else "", ), ], ) if raise_for_status: response.raise_for_status() return response except httpx.TimeoutException as e: last_exception = e logger.warning( f"Timeout on attempt {attempt + 1}/{self.retries}: {url}" ) if attempt < self.retries - 1: continue except httpx.HTTPStatusError as e: # Don't retry on 4xx errors if 400 <= e.response.status_code < 500: try: response_text = e.response.text[:500] except: response_text = "" if log_http_errors: logger.error( f"HTTP {e.response.status_code} from {url}: {response_text}" ) raise last_exception = e try: response_text = e.response.text[:200] except: response_text = "" logger.warning( f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}" ) if attempt < self.retries - 1: continue except (httpx.RequestError, httpx.ConnectError) as e: last_exception = e logger.warning( f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}" ) # Detect certificate verification failures in the underlying error msg = str(e or "").lower() if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") try: import httpx as _httpx # Use the client's precomputed verify argument (set at init) verify_override = self._httpx_verify with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: try: response = temp_client.request(method, url, **kwargs) if raise_for_status: response.raise_for_status() return response except Exception as e2: last_exception = e2 except Exception: # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior pass if attempt < self.retries - 1: continue except Exception as e: # Catch-all to handle non-httpx exceptions that may represent # certificate verification failures from underlying transports. last_exception = e logger.warning(f"Request exception on attempt {attempt + 1}/{self.retries}: {url} - {e}") msg = str(e or "").lower() if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") try: import httpx as _httpx # Use the client's precomputed verify argument (set at init) verify_override = self._httpx_verify with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: try: response = temp_client.request(method, url, **kwargs) if raise_for_status: response.raise_for_status() return response except Exception as e2: last_exception = e2 except Exception: # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior pass if attempt < self.retries - 1: continue if last_exception: logger.error( f"Request failed after {self.retries} attempts: {url} - {last_exception}" ) raise last_exception raise RuntimeError("Request failed after retries") def _request_stream(self, method: str, url: str, **kwargs): """Make a streaming request.""" if not self._client: raise RuntimeError( "HTTPClient must be used with context manager (with statement)" ) # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() self._debug_panel( "HTTP stream", [ ("method", method), ("url", url), ("headers", kwargs.get("headers")), ("follow_redirects", kwargs.get("follow_redirects", False)), ], ) return self._client.stream(method, url, **kwargs) def download_direct_file( url: str, output_dir: Path, debug_logger: Optional[DebugLogger] = None, quiet: bool = False, suggested_filename: Optional[str] = None, pipeline_progress: Optional[Any] = None, ) -> DownloadMediaResult: """Download a direct file (PDF, image, document, etc.) with guardrails and metadata hooks.""" ensure_directory(output_dir) def _sanitize_filename(name: str) -> str: # Windows-safe filename sanitization. text = str(name or "").strip() if not text: return "" text = text.replace("/", "\\") text = text.split("\\")[-1] invalid = set('<>:"/\\|?*') cleaned_chars: List[str] = [] for ch in text: o = ord(ch) if o < 32 or ch in invalid: cleaned_chars.append(" ") continue cleaned_chars.append(ch) cleaned = " ".join("".join(cleaned_chars).split()).strip() cleaned = cleaned.rstrip(" .") return cleaned def _unique_path(path: Path) -> Path: if not path.exists(): return path stem = path.stem suffix = path.suffix parent = path.parent for i in range(1, 10_000): candidate = parent / f"{stem} ({i}){suffix}" if not candidate.exists(): return candidate return parent / f"{stem} ({int(time.time())}){suffix}" parsed_url = urlparse(url) url_path = parsed_url.path filename: Optional[str] = None if parsed_url.query: query_params = parse_qs(parsed_url.query) for param_name in ("filename", "download", "file", "name"): if param_name in query_params and query_params[param_name]: filename = query_params[param_name][0] filename = unquote(filename) break if not filename or not filename.strip(): filename = url_path.split("/")[-1] if url_path else "" filename = unquote(filename) if "?" in filename: filename = filename.split("?")[0] content_type = "" try: with HTTPClient(timeout=10.0) as client: response = client._request("HEAD", url, follow_redirects=True) content_disposition = response.headers.get("content-disposition", "") try: content_type = str(response.headers.get("content-type", "") or "").strip().lower() except Exception: content_type = "" if content_disposition: match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition) if match: extracted_name = match.group(1) or match.group(2) if extracted_name: filename = unquote(extracted_name) if not quiet: debug(f"Filename from Content-Disposition: {filename}") except Exception as exc: if not quiet: log(f"Could not get filename from headers: {exc}", file=sys.stderr) try: page_like_exts = {".php", ".asp", ".aspx", ".jsp", ".cgi"} ext = "" try: ext = Path(str(filename or "")).suffix.lower() except Exception: ext = "" ct0 = (content_type or "").split(";", 1)[0].strip().lower() must_probe = bool(ct0.startswith("text/html") or ext in page_like_exts) if must_probe: with HTTPClient(timeout=10.0) as client: with client._request_stream("GET", url, follow_redirects=True) as resp: resp.raise_for_status() ct = ( str(resp.headers.get("content-type", "") or "") .split(";", 1)[0] .strip() .lower() ) if ct.startswith("text/html"): raise DownloadError("URL appears to be an HTML page, not a direct file") except DownloadError: raise except Exception: pass suggested = _sanitize_filename(suggested_filename) if suggested_filename else "" if suggested: suggested_path = Path(suggested) if suggested_path.suffix: filename = suggested else: detected_ext = "" try: detected_ext = Path(str(filename)).suffix except Exception: detected_ext = "" filename = suggested + detected_ext if detected_ext else suggested try: has_ext = bool(filename and Path(str(filename)).suffix) except Exception: has_ext = False if filename and (not has_ext): ct = (content_type or "").split(";", 1)[0].strip().lower() ext_by_ct = { "application/pdf": ".pdf", "application/epub+zip": ".epub", "application/x-mobipocket-ebook": ".mobi", "image/jpeg": ".jpg", "image/png": ".png", "image/webp": ".webp", "image/gif": ".gif", "text/plain": ".txt", "application/zip": ".zip", } if ct in ext_by_ct: filename = f"{filename}{ext_by_ct[ct]}" elif ct.startswith("text/html"): raise DownloadError("URL appears to be an HTML page, not a direct file") if not filename or not str(filename).strip(): raise DownloadError( "Could not determine filename for URL (no Content-Disposition and no path filename)" ) file_path = _unique_path(output_dir / str(filename)) use_pipeline_transfer = False try: if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"): ui = None if hasattr(pipeline_progress, "ui_and_pipe_index"): ui, _ = pipeline_progress.ui_and_pipe_index() # type: ignore[attr-defined] use_pipeline_transfer = ui is not None except Exception: use_pipeline_transfer = False progress_bar: Optional[ProgressBar] = None if (not quiet) and (not use_pipeline_transfer): progress_bar = ProgressBar() transfer_started = [False] if not quiet: debug(f"Direct download: {filename}") try: start_time = time.time() downloaded_bytes = [0] transfer_started[0] = False def _maybe_begin_transfer(content_length: int) -> None: if pipeline_progress is None or transfer_started[0]: return try: total_val: Optional[int] = ( int(content_length) if isinstance(content_length, int) and content_length > 0 else None ) except Exception: total_val = None try: if hasattr(pipeline_progress, "begin_transfer"): pipeline_progress.begin_transfer( label=str(filename or "download"), total=total_val, ) transfer_started[0] = True except Exception: return def progress_callback(bytes_downloaded: int, content_length: int) -> None: downloaded_bytes[0] = int(bytes_downloaded or 0) try: if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"): _maybe_begin_transfer(content_length) total_val: Optional[int] = ( int(content_length) if isinstance(content_length, int) and content_length > 0 else None ) pipeline_progress.update_transfer( label=str(filename or "download"), completed=int(bytes_downloaded or 0), total=total_val, ) except Exception: pass if progress_bar is not None: progress_bar.update( downloaded=int(bytes_downloaded or 0), total=int(content_length) if content_length and content_length > 0 else None, label=str(filename or "download"), file=sys.stderr, ) with HTTPClient(timeout=30.0) as client: client.download(url, str(file_path), progress_callback=progress_callback) elapsed = time.time() - start_time try: if progress_bar is not None: progress_bar.finish() except Exception: pass try: if pipeline_progress is not None and transfer_started[0] and hasattr( pipeline_progress, "finish_transfer" ): pipeline_progress.finish_transfer(label=str(filename or "download")) except Exception: pass if not quiet: debug(f"✓ Downloaded in {elapsed:.1f}s") ext_out = "" try: ext_out = Path(str(filename)).suffix.lstrip(".") except Exception: ext_out = "" info: Dict[str, Any] = { "id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename), "ext": ext_out, "webpage_url": url, } hash_value = None try: hash_value = sha256_file(file_path) except Exception: pass tags: List[str] = [] if extract_ytdlp_tags: try: tags = extract_ytdlp_tags(info) except Exception as exc: log(f"Error extracting tags: {exc}", file=sys.stderr) if not any(str(t).startswith("title:") for t in tags): info["title"] = str(filename) tags = [] if extract_ytdlp_tags: try: tags = extract_ytdlp_tags(info) except Exception as exc: log(f"Error extracting tags with filename: {exc}", file=sys.stderr) if debug_logger is not None: debug_logger.write_record( "direct-file-downloaded", {"url": url, "path": str(file_path), "hash": hash_value}, ) return DownloadMediaResult( path=file_path, info=info, tag=tags, source_url=url, hash_value=hash_value, ) except (httpx.HTTPError, httpx.RequestError) as exc: try: if progress_bar is not None: progress_bar.finish() except Exception: pass try: if pipeline_progress is not None and transfer_started[0] and hasattr( pipeline_progress, "finish_transfer" ): pipeline_progress.finish_transfer(label=str(filename or "download")) except Exception: pass log(f"Download error: {exc}", file=sys.stderr) if debug_logger is not None: debug_logger.write_record( "exception", {"phase": "direct-file", "url": url, "error": str(exc)}, ) raise DownloadError(f"Failed to download {url}: {exc}") from exc except Exception as exc: try: if progress_bar is not None: progress_bar.finish() except Exception: pass try: if pipeline_progress is not None and transfer_started[0] and hasattr( pipeline_progress, "finish_transfer" ): pipeline_progress.finish_transfer(label=str(filename or "download")) except Exception: pass log(f"Error downloading file: {exc}", file=sys.stderr) if debug_logger is not None: debug_logger.write_record( "exception", { "phase": "direct-file", "url": url, "error": str(exc), "traceback": traceback.format_exc(), }, ) raise DownloadError(f"Error downloading file: {exc}") from exc # Back-compat alias _download_direct_file = download_direct_file class AsyncHTTPClient: """Unified async HTTP client with asyncio support.""" def __init__( self, timeout: float = DEFAULT_TIMEOUT, retries: int = DEFAULT_RETRIES, user_agent: str = DEFAULT_USER_AGENT, verify_ssl: bool = True, headers: Optional[Dict[str, str]] = None, ): """ Initialize async HTTP client. Args: timeout: Request timeout in seconds retries: Number of retries on transient failures user_agent: User-Agent header value verify_ssl: Whether to verify SSL certificates headers: Additional headers to include in all requests """ self.timeout = timeout self.retries = retries self.user_agent = user_agent self.verify_ssl = verify_ssl self.base_headers = headers or {} self._client: Optional[httpx.AsyncClient] = None self._httpx_verify = _resolve_verify_value(verify_ssl) async def __aenter__(self): """Async context manager entry.""" self._client = httpx.AsyncClient( timeout=self.timeout, verify=self._httpx_verify, headers=self._get_headers(), ) return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" if self._client: await self._client.aclose() self._client = None def _get_headers(self) -> Dict[str, str]: """Get request headers with user-agent.""" headers = { "User-Agent": self.user_agent } headers.update(self.base_headers) return headers async def get( self, url: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, allow_redirects: bool = True, ) -> httpx.Response: """ Make an async GET request. Args: url: Request URL params: Query parameters headers: Additional headers allow_redirects: Follow redirects Returns: httpx.Response object """ return await self._request( "GET", url, params=params, headers=headers, follow_redirects=allow_redirects, ) async def post( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make an async POST request. Args: url: Request URL data: Form data json: JSON data headers: Additional headers Returns: httpx.Response object """ return await self._request( "POST", url, data=data, json=json, headers=headers, ) async def download( self, url: str, file_path: str, chunk_size: int = 8192, progress_callback: Optional[Callable[[int, int], None]] = None, headers: Optional[Dict[str, str]] = None, ) -> Path: """ Download a file from URL asynchronously with optional progress tracking. Args: url: File URL file_path: Local file path to save to chunk_size: Download chunk size progress_callback: Callback(bytes_downloaded, total_bytes) headers: Additional headers Returns: Path object of downloaded file """ path = Path(file_path) path.parent.mkdir(parents=True, exist_ok=True) async with self._request_stream("GET", url, headers=headers) as response: response.raise_for_status() total_bytes = int(response.headers.get("content-length", 0)) bytes_downloaded = 0 with open(path, "wb") as f: async for chunk in response.aiter_bytes(chunk_size): if chunk: f.write(chunk) bytes_downloaded += len(chunk) if progress_callback: progress_callback(bytes_downloaded, total_bytes) return path async def _request(self, method: str, url: str, **kwargs) -> httpx.Response: """ Make an async HTTP request with automatic retries. Args: method: HTTP method url: Request URL **kwargs: Additional arguments for httpx.AsyncClient.request() Returns: httpx.Response object """ if not self._client: raise RuntimeError( "AsyncHTTPClient must be used with async context manager" ) # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() last_exception = None for attempt in range(self.retries): try: response = await self._client.request(method, url, **kwargs) response.raise_for_status() return response except httpx.TimeoutException as e: last_exception = e logger.warning( f"Timeout on attempt {attempt + 1}/{self.retries}: {url}" ) if attempt < self.retries - 1: await asyncio.sleep(0.5) # Brief delay before retry continue except httpx.HTTPStatusError as e: # Don't retry on 4xx errors if 400 <= e.response.status_code < 500: try: response_text = e.response.text[:500] except: response_text = "" logger.error( f"HTTP {e.response.status_code} from {url}: {response_text}" ) raise last_exception = e try: response_text = e.response.text[:200] except: response_text = "" logger.warning( f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}" ) if attempt < self.retries - 1: await asyncio.sleep(0.5) continue except (httpx.RequestError, httpx.ConnectError) as e: last_exception = e logger.warning( f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}" ) if attempt < self.retries - 1: await asyncio.sleep(0.5) continue if last_exception: logger.error( f"Request failed after {self.retries} attempts: {url} - {last_exception}" ) raise last_exception raise RuntimeError("Request failed after retries") def _request_stream(self, method: str, url: str, **kwargs): """Make a streaming request.""" if not self._client: raise RuntimeError( "AsyncHTTPClient must be used with async context manager" ) # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() return self._client.stream(method, url, **kwargs) # Convenience function for quick sync requests def get(url: str, **kwargs) -> httpx.Response: """Quick GET request without context manager.""" with HTTPClient() as client: return client.get(url, **kwargs) def post(url: str, **kwargs) -> httpx.Response: """Quick POST request without context manager.""" with HTTPClient() as client: return client.post(url, **kwargs) def download( url: str, file_path: str, progress_callback: Optional[Callable[[int, int], None]] = None, **kwargs, ) -> Path: """Quick file download without context manager.""" with HTTPClient() as client: return client.download( url, file_path, progress_callback=progress_callback, **kwargs )