2025-12-25 16:02:46 -08:00
|
|
|
"""Library of Congress (LoC) API helpers.
|
|
|
|
|
|
|
|
|
|
This module currently focuses on the LoC JSON API endpoint for the
|
|
|
|
|
Chronicling America collection.
|
|
|
|
|
|
|
|
|
|
Docs:
|
|
|
|
|
- https://www.loc.gov/apis/
|
|
|
|
|
- https://www.loc.gov/apis/json-and-yaml/
|
|
|
|
|
|
|
|
|
|
The LoC JSON API does not require an API key.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
|
|
|
|
|
from API.HTTP import HTTPClient
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LOCError(Exception):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LOCClient:
|
|
|
|
|
"""Minimal client for the public LoC JSON API."""
|
|
|
|
|
|
|
|
|
|
BASE_URL = "https://www.loc.gov"
|
|
|
|
|
|
|
|
|
|
def __init__(self, *, timeout: float = 20.0):
|
|
|
|
|
self.timeout = float(timeout)
|
|
|
|
|
|
|
|
|
|
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
|
|
|
|
|
try:
|
|
|
|
|
with HTTPClient(timeout=self.timeout) as client:
|
|
|
|
|
resp = client.get(url, params=params)
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
# httpx.Response.json() exists but keep decoding consistent
|
|
|
|
|
return json.loads(resp.content.decode("utf-8"))
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
raise LOCError(str(exc)) from exc
|
|
|
|
|
|
|
|
|
|
def search_chronicling_america(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
*,
|
|
|
|
|
start: int = 1,
|
|
|
|
|
count: int = 25,
|
2025-12-29 18:42:02 -08:00
|
|
|
extra_params: Optional[Dict[str,
|
|
|
|
|
Any]] = None,
|
|
|
|
|
) -> Dict[str,
|
|
|
|
|
Any]:
|
2025-12-25 16:02:46 -08:00
|
|
|
"""Search the Chronicling America collection via LoC JSON API.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Free-text query.
|
|
|
|
|
start: 1-based start index (LoC uses `sp`).
|
|
|
|
|
count: Results per page (LoC uses `c`).
|
|
|
|
|
extra_params: Additional LoC API params (facets, filters, etc.).
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Parsed JSON response.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
q = str(query or "").strip()
|
|
|
|
|
if not q:
|
2025-12-29 18:42:02 -08:00
|
|
|
return {
|
|
|
|
|
"results": []
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
params: Dict[str,
|
|
|
|
|
Any] = {
|
|
|
|
|
"q": q,
|
|
|
|
|
"fo": "json",
|
|
|
|
|
"c": int(count) if int(count) > 0 else 25,
|
|
|
|
|
"sp": int(start) if int(start) > 0 else 1,
|
|
|
|
|
}
|
2025-12-25 16:02:46 -08:00
|
|
|
if extra_params:
|
|
|
|
|
for k, v in extra_params.items():
|
|
|
|
|
if v is None:
|
|
|
|
|
continue
|
|
|
|
|
params[str(k)] = v
|
|
|
|
|
|
|
|
|
|
return self._get_json("/collections/chronicling-america/", params)
|