Files
Medios-Macina/API/loc.py

81 lines
2.2 KiB
Python
Raw Normal View History

2025-12-25 16:02:46 -08:00
"""Library of Congress (LoC) API helpers.
This module currently focuses on the LoC JSON API endpoint for the
Chronicling America collection.
Docs:
- https://www.loc.gov/apis/
- https://www.loc.gov/apis/json-and-yaml/
The LoC JSON API does not require an API key.
"""
from __future__ import annotations
import json
from typing import Any, Dict, Optional
from API.HTTP import HTTPClient
class LOCError(Exception):
pass
class LOCClient:
"""Minimal client for the public LoC JSON API."""
BASE_URL = "https://www.loc.gov"
def __init__(self, *, timeout: float = 20.0):
self.timeout = float(timeout)
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
try:
with HTTPClient(timeout=self.timeout) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
# httpx.Response.json() exists but keep decoding consistent
return json.loads(resp.content.decode("utf-8"))
except Exception as exc:
raise LOCError(str(exc)) from exc
def search_chronicling_america(
self,
query: str,
*,
start: int = 1,
count: int = 25,
extra_params: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Search the Chronicling America collection via LoC JSON API.
Args:
query: Free-text query.
start: 1-based start index (LoC uses `sp`).
count: Results per page (LoC uses `c`).
extra_params: Additional LoC API params (facets, filters, etc.).
Returns:
Parsed JSON response.
"""
q = str(query or "").strip()
if not q:
return {"results": []}
params: Dict[str, Any] = {
"q": q,
"fo": "json",
"c": int(count) if int(count) > 0 else 25,
"sp": int(start) if int(start) > 0 else 1,
}
if extra_params:
for k, v in extra_params.items():
if v is None:
continue
params[str(k)] = v
return self._get_json("/collections/chronicling-america/", params)