rdf
This commit is contained in:
80
API/loc.py
Normal file
80
API/loc.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Library of Congress (LoC) API helpers.
|
||||
|
||||
This module currently focuses on the LoC JSON API endpoint for the
|
||||
Chronicling America collection.
|
||||
|
||||
Docs:
|
||||
- https://www.loc.gov/apis/
|
||||
- https://www.loc.gov/apis/json-and-yaml/
|
||||
|
||||
The LoC JSON API does not require an API key.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
|
||||
|
||||
class LOCError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LOCClient:
|
||||
"""Minimal client for the public LoC JSON API."""
|
||||
|
||||
BASE_URL = "https://www.loc.gov"
|
||||
|
||||
def __init__(self, *, timeout: float = 20.0):
|
||||
self.timeout = float(timeout)
|
||||
|
||||
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
|
||||
try:
|
||||
with HTTPClient(timeout=self.timeout) as client:
|
||||
resp = client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
# httpx.Response.json() exists but keep decoding consistent
|
||||
return json.loads(resp.content.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
raise LOCError(str(exc)) from exc
|
||||
|
||||
def search_chronicling_america(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
start: int = 1,
|
||||
count: int = 25,
|
||||
extra_params: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Search the Chronicling America collection via LoC JSON API.
|
||||
|
||||
Args:
|
||||
query: Free-text query.
|
||||
start: 1-based start index (LoC uses `sp`).
|
||||
count: Results per page (LoC uses `c`).
|
||||
extra_params: Additional LoC API params (facets, filters, etc.).
|
||||
|
||||
Returns:
|
||||
Parsed JSON response.
|
||||
"""
|
||||
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return {"results": []}
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"q": q,
|
||||
"fo": "json",
|
||||
"c": int(count) if int(count) > 0 else 25,
|
||||
"sp": int(start) if int(start) > 0 else 1,
|
||||
}
|
||||
if extra_params:
|
||||
for k, v in extra_params.items():
|
||||
if v is None:
|
||||
continue
|
||||
params[str(k)] = v
|
||||
|
||||
return self._get_json("/collections/chronicling-america/", params)
|
||||
129
Provider/loc.py
Normal file
129
Provider/loc.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from API.loc import LOCClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from cli_syntax import get_free_text, parse_query
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
class LOC(Provider):
|
||||
"""LoC search provider.
|
||||
|
||||
Currently implements Chronicling America collection search via the LoC JSON API.
|
||||
"""
|
||||
|
||||
URL_DOMAINS = ["www.loc.gov"]
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
_ = kwargs
|
||||
parsed = parse_query(query or "")
|
||||
text = get_free_text(parsed).strip()
|
||||
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
|
||||
|
||||
# Allow explicit q: override.
|
||||
q = str(fields.get("q") or text or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Pass through any extra filters supported by the LoC API.
|
||||
extra: Dict[str, Any] = {}
|
||||
if isinstance(filters, dict):
|
||||
extra.update(filters)
|
||||
if isinstance(fields, dict):
|
||||
for k, v in fields.items():
|
||||
if k == "q":
|
||||
continue
|
||||
extra[str(k)] = v
|
||||
|
||||
client = LOCClient()
|
||||
|
||||
results: List[SearchResult] = []
|
||||
start = 1
|
||||
page_size = 25
|
||||
try:
|
||||
if limit and limit > 0:
|
||||
page_size = max(1, min(int(limit), 50))
|
||||
|
||||
while len(results) < max(0, int(limit)):
|
||||
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
|
||||
items = payload.get("results")
|
||||
if not isinstance(items, list) or not items:
|
||||
break
|
||||
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
|
||||
title = str(it.get("title") or "").strip() or "(untitled)"
|
||||
date = str(it.get("date") or "").strip()
|
||||
url = str(it.get("url") or "").strip()
|
||||
aka = it.get("aka")
|
||||
if (not url) and isinstance(aka, list) and aka:
|
||||
url = str(aka[0] or "").strip()
|
||||
|
||||
formats = it.get("online_format")
|
||||
if isinstance(formats, list):
|
||||
fmt_text = ", ".join([str(x) for x in formats if x])
|
||||
else:
|
||||
fmt_text = str(formats or "").strip()
|
||||
|
||||
partof = it.get("partof")
|
||||
if isinstance(partof, list) and partof:
|
||||
source = str(partof[-1] or "").strip()
|
||||
else:
|
||||
source = "Chronicling America"
|
||||
|
||||
detail_parts = []
|
||||
if date:
|
||||
detail_parts.append(date)
|
||||
if source:
|
||||
detail_parts.append(source)
|
||||
detail = " — ".join(detail_parts)
|
||||
|
||||
annotations: List[str] = []
|
||||
if date:
|
||||
annotations.append(date)
|
||||
if fmt_text:
|
||||
annotations.append(fmt_text)
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="loc",
|
||||
title=title,
|
||||
path=url or title,
|
||||
detail=detail,
|
||||
annotations=annotations,
|
||||
media_kind="document",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Date", date),
|
||||
("Format", fmt_text),
|
||||
("URL", url),
|
||||
],
|
||||
full_metadata=it,
|
||||
)
|
||||
)
|
||||
if len(results) >= int(limit):
|
||||
break
|
||||
|
||||
# LoC API pagination uses sp (1-based start index).
|
||||
if len(items) < page_size:
|
||||
break
|
||||
start += len(items)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[loc] search failed: {exc}")
|
||||
return []
|
||||
|
||||
return results
|
||||
@@ -23,6 +23,7 @@ from Provider.telegram import Telegram
|
||||
from Provider.youtube import YouTube
|
||||
from Provider.fileio import FileIO
|
||||
from Provider.zeroxzero import ZeroXZero
|
||||
from Provider.loc import LOC
|
||||
|
||||
|
||||
_PROVIDERS: Dict[str, Type[Provider]] = {
|
||||
@@ -34,6 +35,7 @@ _PROVIDERS: Dict[str, Type[Provider]] = {
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
"telegram": Telegram,
|
||||
"loc": LOC,
|
||||
# Upload-capable providers
|
||||
"0x0": ZeroXZero,
|
||||
"file.io": FileIO,
|
||||
|
||||
@@ -31,10 +31,10 @@ class Search_Provider(Cmdlet):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
name="search-provider",
|
||||
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)",
|
||||
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc)",
|
||||
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
|
||||
arg=[
|
||||
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"),
|
||||
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc"),
|
||||
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
|
||||
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
|
||||
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
|
||||
@@ -48,6 +48,8 @@ class Search_Provider(Cmdlet):
|
||||
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
|
||||
"- libgen: Search Library Genesis for books",
|
||||
" Example: search-provider -provider libgen \"python programming\"",
|
||||
"- loc: Search Library of Congress (Chronicling America)",
|
||||
" Example: search-provider -provider loc \"lincoln\"",
|
||||
"- soulseek: Search P2P network for music",
|
||||
" Example: search-provider -provider soulseek \"pink floyd\"",
|
||||
"- youtube: Search YouTube for videos",
|
||||
@@ -182,6 +184,8 @@ class Search_Provider(Cmdlet):
|
||||
provider_label = "Youtube"
|
||||
elif provider_lower == "openlibrary":
|
||||
provider_label = "OpenLibrary"
|
||||
elif provider_lower == "loc":
|
||||
provider_label = "LoC"
|
||||
else:
|
||||
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
|
||||
|
||||
@@ -189,7 +193,7 @@ class Search_Provider(Cmdlet):
|
||||
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
|
||||
else:
|
||||
table_title = f"{provider_label}: {query}".strip().rstrip(":")
|
||||
preserve_order = provider_name.lower() in ("youtube", "openlibrary")
|
||||
preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
|
||||
table = ResultTable(table_title).set_preserve_order(preserve_order)
|
||||
table.set_table(provider_name)
|
||||
table.set_source_command("search-provider", list(args))
|
||||
|
||||
@@ -31,7 +31,11 @@ click apply
|
||||
--------------------------------------------
|
||||
edit the below and place in your config.conf
|
||||
|
||||
[store=hydrusnetwork]
|
||||
<figure>
|
||||
<figcaption>config.conf</figcaption>
|
||||
<pre><code class="language-powershell">[store=hydrusnetwork]
|
||||
NAME="shortnamenospacesorsymbols"
|
||||
API="apiaccesskeygoeshere"
|
||||
URL="apibaseurlgoeshere"
|
||||
</code></pre>
|
||||
</figure>
|
||||
|
||||
Reference in New Issue
Block a user