AST
This commit is contained in:
579
helper/http_client.py
Normal file
579
helper/http_client.py
Normal file
@@ -0,0 +1,579 @@
|
||||
"""
|
||||
Unified HTTP client for downlow using httpx.
|
||||
|
||||
Provides synchronous and asynchronous HTTP operations with:
|
||||
- Automatic retries on transient failures
|
||||
- Configurable timeouts and headers
|
||||
- Built-in progress tracking for downloads
|
||||
- Request/response logging support
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import asyncio
|
||||
from typing import Optional, Dict, Any, Callable, BinaryIO
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default configuration
|
||||
DEFAULT_TIMEOUT = 30.0
|
||||
DEFAULT_RETRIES = 3
|
||||
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
|
||||
|
||||
class HTTPClient:
|
||||
"""Unified HTTP client with sync support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: float = DEFAULT_TIMEOUT,
|
||||
retries: int = DEFAULT_RETRIES,
|
||||
user_agent: str = DEFAULT_USER_AGENT,
|
||||
verify_ssl: bool = True,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize HTTP client.
|
||||
|
||||
Args:
|
||||
timeout: Request timeout in seconds
|
||||
retries: Number of retries on transient failures
|
||||
user_agent: User-Agent header value
|
||||
verify_ssl: Whether to verify SSL certificates
|
||||
headers: Additional headers to include in all requests
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
self.user_agent = user_agent
|
||||
self.verify_ssl = verify_ssl
|
||||
self.base_headers = headers or {}
|
||||
self._client: Optional[httpx.Client] = None
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
self._client = httpx.Client(
|
||||
timeout=self.timeout,
|
||||
verify=self.verify_ssl,
|
||||
headers=self._get_headers(),
|
||||
)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
if self._client:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with user-agent."""
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
headers.update(self.base_headers)
|
||||
return headers
|
||||
|
||||
def get(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a GET request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
params: Query parameters
|
||||
headers: Additional headers
|
||||
allow_redirects: Follow redirects
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"GET",
|
||||
url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
follow_redirects=allow_redirects,
|
||||
)
|
||||
|
||||
def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
files: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a POST request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
files: Files to upload
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"POST",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
files=files,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def put(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
content: Optional[Any] = None,
|
||||
files: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a PUT request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
content: Raw content
|
||||
files: Files to upload
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"PUT",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
content=content,
|
||||
files=files,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def delete(
|
||||
self,
|
||||
url: str,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a DELETE request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(
|
||||
"DELETE",
|
||||
url,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make a generic HTTP request.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return self._request(method, url, **kwargs)
|
||||
|
||||
def download(
|
||||
self,
|
||||
url: str,
|
||||
file_path: str,
|
||||
chunk_size: int = 8192,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Path:
|
||||
"""
|
||||
Download a file from URL with optional progress tracking.
|
||||
|
||||
Args:
|
||||
url: File URL
|
||||
file_path: Local file path to save to
|
||||
chunk_size: Download chunk size
|
||||
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
Path object of downloaded file
|
||||
"""
|
||||
path = Path(file_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with self._request_stream("GET", url, headers=headers, follow_redirects=True) as response:
|
||||
response.raise_for_status()
|
||||
total_bytes = int(response.headers.get("content-length", 0))
|
||||
bytes_downloaded = 0
|
||||
|
||||
with open(path, "wb") as f:
|
||||
for chunk in response.iter_bytes(chunk_size):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
bytes_downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(bytes_downloaded, total_bytes)
|
||||
|
||||
return path
|
||||
|
||||
def _request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an HTTP request with automatic retries.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments for httpx.Client.request()
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
if not self._client:
|
||||
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
response = self._client.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.TimeoutException as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Don't retry on 4xx errors
|
||||
if 400 <= e.response.status_code < 500:
|
||||
try:
|
||||
response_text = e.response.text[:500]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||
raise
|
||||
last_exception = e
|
||||
try:
|
||||
response_text = e.response.text[:200]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
|
||||
if last_exception:
|
||||
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||
raise last_exception
|
||||
|
||||
raise RuntimeError("Request failed after retries")
|
||||
|
||||
def _request_stream(self, method: str, url: str, **kwargs):
|
||||
"""Make a streaming request."""
|
||||
if not self._client:
|
||||
raise RuntimeError("HTTPClient must be used with context manager (with statement)")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
class AsyncHTTPClient:
|
||||
"""Unified async HTTP client with asyncio support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: float = DEFAULT_TIMEOUT,
|
||||
retries: int = DEFAULT_RETRIES,
|
||||
user_agent: str = DEFAULT_USER_AGENT,
|
||||
verify_ssl: bool = True,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize async HTTP client.
|
||||
|
||||
Args:
|
||||
timeout: Request timeout in seconds
|
||||
retries: Number of retries on transient failures
|
||||
user_agent: User-Agent header value
|
||||
verify_ssl: Whether to verify SSL certificates
|
||||
headers: Additional headers to include in all requests
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.retries = retries
|
||||
self.user_agent = user_agent
|
||||
self.verify_ssl = verify_ssl
|
||||
self.base_headers = headers or {}
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
self._client = httpx.AsyncClient(
|
||||
timeout=self.timeout,
|
||||
verify=self.verify_ssl,
|
||||
headers=self._get_headers(),
|
||||
)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
def _get_headers(self) -> Dict[str, str]:
|
||||
"""Get request headers with user-agent."""
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
headers.update(self.base_headers)
|
||||
return headers
|
||||
|
||||
async def get(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async GET request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
params: Query parameters
|
||||
headers: Additional headers
|
||||
allow_redirects: Follow redirects
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return await self._request(
|
||||
"GET",
|
||||
url,
|
||||
params=params,
|
||||
headers=headers,
|
||||
follow_redirects=allow_redirects,
|
||||
)
|
||||
|
||||
async def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Any] = None,
|
||||
json: Optional[Dict] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async POST request.
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
data: Form data
|
||||
json: JSON data
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
return await self._request(
|
||||
"POST",
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
async def download(
|
||||
self,
|
||||
url: str,
|
||||
file_path: str,
|
||||
chunk_size: int = 8192,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Path:
|
||||
"""
|
||||
Download a file from URL asynchronously with optional progress tracking.
|
||||
|
||||
Args:
|
||||
url: File URL
|
||||
file_path: Local file path to save to
|
||||
chunk_size: Download chunk size
|
||||
progress_callback: Callback(bytes_downloaded, total_bytes)
|
||||
headers: Additional headers
|
||||
|
||||
Returns:
|
||||
Path object of downloaded file
|
||||
"""
|
||||
path = Path(file_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
async with self._request_stream("GET", url, headers=headers) as response:
|
||||
response.raise_for_status()
|
||||
total_bytes = int(response.headers.get("content-length", 0))
|
||||
bytes_downloaded = 0
|
||||
|
||||
with open(path, "wb") as f:
|
||||
async for chunk in response.aiter_bytes(chunk_size):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
bytes_downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(bytes_downloaded, total_bytes)
|
||||
|
||||
return path
|
||||
|
||||
async def _request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
**kwargs
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Make an async HTTP request with automatic retries.
|
||||
|
||||
Args:
|
||||
method: HTTP method
|
||||
url: Request URL
|
||||
**kwargs: Additional arguments for httpx.AsyncClient.request()
|
||||
|
||||
Returns:
|
||||
httpx.Response object
|
||||
"""
|
||||
if not self._client:
|
||||
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
response = await self._client.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.TimeoutException as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Timeout on attempt {attempt + 1}/{self.retries}: {url}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5) # Brief delay before retry
|
||||
continue
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Don't retry on 4xx errors
|
||||
if 400 <= e.response.status_code < 500:
|
||||
try:
|
||||
response_text = e.response.text[:500]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.error(f"HTTP {e.response.status_code} from {url}: {response_text}")
|
||||
raise
|
||||
last_exception = e
|
||||
try:
|
||||
response_text = e.response.text[:200]
|
||||
except:
|
||||
response_text = "<unable to read response>"
|
||||
logger.warning(f"HTTP {e.response.status_code} on attempt {attempt + 1}/{self.retries}: {url} - {response_text}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
except (httpx.RequestError, httpx.ConnectError) as e:
|
||||
last_exception = e
|
||||
logger.warning(f"Connection error on attempt {attempt + 1}/{self.retries}: {url} - {e}")
|
||||
if attempt < self.retries - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
|
||||
if last_exception:
|
||||
logger.error(f"Request failed after {self.retries} attempts: {url} - {last_exception}")
|
||||
raise last_exception
|
||||
|
||||
raise RuntimeError("Request failed after retries")
|
||||
|
||||
def _request_stream(self, method: str, url: str, **kwargs):
|
||||
"""Make a streaming request."""
|
||||
if not self._client:
|
||||
raise RuntimeError("AsyncHTTPClient must be used with async context manager")
|
||||
|
||||
# Merge headers
|
||||
if "headers" in kwargs and kwargs["headers"]:
|
||||
headers = self._get_headers()
|
||||
headers.update(kwargs["headers"])
|
||||
kwargs["headers"] = headers
|
||||
else:
|
||||
kwargs["headers"] = self._get_headers()
|
||||
|
||||
return self._client.stream(method, url, **kwargs)
|
||||
|
||||
|
||||
# Convenience function for quick sync requests
|
||||
def get(url: str, **kwargs) -> httpx.Response:
|
||||
"""Quick GET request without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.get(url, **kwargs)
|
||||
|
||||
|
||||
def post(url: str, **kwargs) -> httpx.Response:
|
||||
"""Quick POST request without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.post(url, **kwargs)
|
||||
|
||||
|
||||
def download(
|
||||
url: str,
|
||||
file_path: str,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
**kwargs
|
||||
) -> Path:
|
||||
"""Quick file download without context manager."""
|
||||
with HTTPClient() as client:
|
||||
return client.download(url, file_path, progress_callback=progress_callback, **kwargs)
|
||||
Reference in New Issue
Block a user