""" Unified HTTP client for downlow using httpx. Provides synchronous and asynchronous HTTP operations with: - Automatic retries on transient failures - Configurable timeouts and headers - Built-in progress tracking for downloads - Request/response logging support """ import httpx import asyncio from typing import Optional, Dict, Any, Callable, BinaryIO from pathlib import Path import logging logger = logging.getLogger(__name__) # Default configuration DEFAULT_TIMEOUT = 30.0 DEFAULT_RETRIES = 3 DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" class HTTPClient: """Unified HTTP client with sync support.""" def __init__( self, timeout: float = DEFAULT_TIMEOUT, retries: int = DEFAULT_RETRIES, user_agent: str = DEFAULT_USER_AGENT, verify_ssl: bool = True, headers: Optional[Dict[str, str]] = None, ): """ Initialize HTTP client. Args: timeout: Request timeout in seconds retries: Number of retries on transient failures user_agent: User-Agent header value verify_ssl: Whether to verify SSL certificates headers: Additional headers to include in all requests """ self.timeout = timeout self.retries = retries self.user_agent = user_agent self.verify_ssl = verify_ssl self.base_headers = headers or {} self._client: Optional[httpx.Client] = None def __enter__(self): """Context manager entry.""" self._client = httpx.Client( timeout=self.timeout, verify=self.verify_ssl, headers=self._get_headers(), ) return self def __exit__(self, exc_type, exc_val, exc_tb): """Context manager exit.""" if self._client: self._client.close() self._client = None def _get_headers(self) -> Dict[str, str]: """Get request headers with user-agent.""" headers = {"User-Agent": self.user_agent} headers.update(self.base_headers) return headers def get( self, url: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, allow_redirects: bool = True, ) -> httpx.Response: """ Make a GET request. Args: url: Request URL params: Query parameters headers: Additional headers allow_redirects: Follow redirects Returns: httpx.Response object """ return self._request( "GET", url, params=params, headers=headers, follow_redirects=allow_redirects, ) def post( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, files: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a POST request. Args: url: Request URL data: Form data json: JSON data files: Files to upload headers: Additional headers Returns: httpx.Response object """ return self._request( "POST", url, data=data, json=json, files=files, headers=headers, ) def put( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, content: Optional[Any] = None, files: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a PUT request. Args: url: Request URL data: Form data json: JSON data content: Raw content files: Files to upload headers: Additional headers Returns: httpx.Response object """ return self._request( "PUT", url, data=data, json=json, content=content, files=files, headers=headers, ) def delete( self, url: str, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make a DELETE request. Args: url: Request URL headers: Additional headers Returns: httpx.Response object """ return self._request( "DELETE", url, headers=headers, ) def request( self, method: str, url: str, **kwargs ) -> httpx.Response: """ Make a generic HTTP request. Args: method: HTTP method url: Request URL **kwargs: Additional arguments Returns: httpx.Response object """ return self._request(method, url, **kwargs) def download( self, url: str, file_path: str, chunk_size: int = 8192, progress_callback: Optional[Callable[[int, int], None]] = None, headers: Optional[Dict[str, str]] = None, ) -> Path: """ Download a file from URL with optional progress tracking. Args: url: File URL file_path: Local file path to save to chunk_size: Download chunk size progress_callback: Callback(bytes_downloaded, total_bytes) headers: Additional headers Returns: Path object of downloaded file """ path = Path(file_path) path.parent.mkdir(parents=True, exist_ok=True) with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response: response.raise_for_status() total_bytes = int(response.headers.get("content-length", 0)) bytes_downloaded = 0 with open(path, "wb") as f: for chunk in response.iter_bytes(chunk_size): if chunk: f.write(chunk) bytes_downloaded += len(chunk) if progress_callback: progress_callback(bytes_downloaded, total_bytes) return path def _request( self, method: str, url: str, **kwargs ) -> httpx.Response: """ Make an HTTP request with automatic retries. Args: method: HTTP method url: Request URL **kwargs: Additional arguments for httpx.Client.request() Returns: httpx.Response object """ if not self._client: raise RuntimeError("HTTPClient must be used with context manager (with statement)") # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() last_exception = None for attempt in range(self.retries): try: response = self._client.request(method, url, **kwargs) response.raise_for_status() return response except httpx.TimeoutException as e: last_exception = e logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}") if attempt < self.retries - 1: continue except httpx.HTTPStatusError as e: # Don't retry on 4xx errors if 400 <= e.response.status_code < 500: try: response_text = e.response.text[:500] except: response_text = "" logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}") raise last_exception = e try: response_text = e.response.text[:200] except: response_text = "" logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}") if attempt < self.retries - 1: continue except (httpx.RequestError, httpx.ConnectError) as e: last_exception = e logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}") if attempt < self.retries - 1: continue if last_exception: logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}") raise last_exception raise RuntimeError("Request failed after retries") def _request_stream(self, method: str, url: str, **kwargs): """Make a streaming request.""" if not self._client: raise RuntimeError("HTTPClient must be used with context manager (with statement)") # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() return self._client.stream(method, url, **kwargs) class AsyncHTTPClient: """Unified async HTTP client with asyncio support.""" def __init__( self, timeout: float = DEFAULT_TIMEOUT, retries: int = DEFAULT_RETRIES, user_agent: str = DEFAULT_USER_AGENT, verify_ssl: bool = True, headers: Optional[Dict[str, str]] = None, ): """ Initialize async HTTP client. Args: timeout: Request timeout in seconds retries: Number of retries on transient failures user_agent: User-Agent header value verify_ssl: Whether to verify SSL certificates headers: Additional headers to include in all requests """ self.timeout = timeout self.retries = retries self.user_agent = user_agent self.verify_ssl = verify_ssl self.base_headers = headers or {} self._client: Optional[httpx.AsyncClient] = None async def __aenter__(self): """Async context manager entry.""" self._client = httpx.AsyncClient( timeout=self.timeout, verify=self.verify_ssl, headers=self._get_headers(), ) return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" if self._client: await self._client.aclose() self._client = None def _get_headers(self) -> Dict[str, str]: """Get request headers with user-agent.""" headers = {"User-Agent": self.user_agent} headers.update(self.base_headers) return headers async def get( self, url: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, allow_redirects: bool = True, ) -> httpx.Response: """ Make an async GET request. Args: url: Request URL params: Query parameters headers: Additional headers allow_redirects: Follow redirects Returns: httpx.Response object """ return await self._request( "GET", url, params=params, headers=headers, follow_redirects=allow_redirects, ) async def post( self, url: str, data: Optional[Any] = None, json: Optional[Dict] = None, headers: Optional[Dict[str, str]] = None, ) -> httpx.Response: """ Make an async POST request. Args: url: Request URL data: Form data json: JSON data headers: Additional headers Returns: httpx.Response object """ return await self._request( "POST", url, data=data, json=json, headers=headers, ) async def download( self, url: str, file_path: str, chunk_size: int = 8192, progress_callback: Optional[Callable[[int, int], None]] = None, headers: Optional[Dict[str, str]] = None, ) -> Path: """ Download a file from URL asynchronously with optional progress tracking. Args: url: File URL file_path: Local file path to save to chunk_size: Download chunk size progress_callback: Callback(bytes_downloaded, total_bytes) headers: Additional headers Returns: Path object of downloaded file """ path = Path(file_path) path.parent.mkdir(parents=True, exist_ok=True) async with self._request_stream("GET", url, headers=headers) as response: response.raise_for_status() total_bytes = int(response.headers.get("content-length", 0)) bytes_downloaded = 0 with open(path, "wb") as f: async for chunk in response.aiter_bytes(chunk_size): if chunk: f.write(chunk) bytes_downloaded += len(chunk) if progress_callback: progress_callback(bytes_downloaded, total_bytes) return path async def _request( self, method: str, url: str, **kwargs ) -> httpx.Response: """ Make an async HTTP request with automatic retries. Args: method: HTTP method url: Request URL **kwargs: Additional arguments for httpx.AsyncClient.request() Returns: httpx.Response object """ if not self._client: raise RuntimeError("AsyncHTTPClient must be used with async context manager") # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() last_exception = None for attempt in range(self.retries): try: response = await self._client.request(method, url, **kwargs) response.raise_for_status() return response except httpx.TimeoutException as e: last_exception = e logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}") if attempt < self.retries - 1: await asyncio.sleep(0.5) # Brief delay before retry continue except httpx.HTTPStatusError as e: # Don't retry on 4xx errors if 400 <= e.response.status_code < 500: try: response_text = e.response.text[:500] except: response_text = "" logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}") raise last_exception = e try: response_text = e.response.text[:200] except: response_text = "" logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}") if attempt < self.retries - 1: await asyncio.sleep(0.5) continue except (httpx.RequestError, httpx.ConnectError) as e: last_exception = e logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}") if attempt < self.retries - 1: await asyncio.sleep(0.5) continue if last_exception: logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}") raise last_exception raise RuntimeError("Request failed after retries") def _request_stream(self, method: str, url: str, **kwargs): """Make a streaming request.""" if not self._client: raise RuntimeError("AsyncHTTPClient must be used with async context manager") # Merge headers if "headers" in kwargs and kwargs["headers"]: headers = self._get_headers() headers.update(kwargs["headers"]) kwargs["headers"] = headers else: kwargs["headers"] = self._get_headers() return self._client.stream(method, url, **kwargs) # Convenience function for quick sync requests def get(url: str, **kwargs) -> httpx.Response: """Quick GET request without context manager.""" with HTTPClient() as client: return client.get(url, **kwargs) def post(url: str, **kwargs) -> httpx.Response: """Quick POST request without context manager.""" with HTTPClient() as client: return client.post(url, **kwargs) def download( url: str, file_path: str, progress_callback: Optional[Callable[[int, int], None]] = None, **kwargs ) -> Path: """Quick file download without context manager.""" with HTTPClient() as client: return client.download(url, file_path, progress_callback=progress_callback, **kwargs)