rdf
This commit is contained in:
80
API/loc.py
Normal file
80
API/loc.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""Library of Congress (LoC) API helpers.
|
||||||
|
|
||||||
|
This module currently focuses on the LoC JSON API endpoint for the
|
||||||
|
Chronicling America collection.
|
||||||
|
|
||||||
|
Docs:
|
||||||
|
- https://www.loc.gov/apis/
|
||||||
|
- https://www.loc.gov/apis/json-and-yaml/
|
||||||
|
|
||||||
|
The LoC JSON API does not require an API key.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from API.HTTP import HTTPClient
|
||||||
|
|
||||||
|
|
||||||
|
class LOCError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LOCClient:
|
||||||
|
"""Minimal client for the public LoC JSON API."""
|
||||||
|
|
||||||
|
BASE_URL = "https://www.loc.gov"
|
||||||
|
|
||||||
|
def __init__(self, *, timeout: float = 20.0):
|
||||||
|
self.timeout = float(timeout)
|
||||||
|
|
||||||
|
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
|
||||||
|
try:
|
||||||
|
with HTTPClient(timeout=self.timeout) as client:
|
||||||
|
resp = client.get(url, params=params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
# httpx.Response.json() exists but keep decoding consistent
|
||||||
|
return json.loads(resp.content.decode("utf-8"))
|
||||||
|
except Exception as exc:
|
||||||
|
raise LOCError(str(exc)) from exc
|
||||||
|
|
||||||
|
def search_chronicling_america(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
start: int = 1,
|
||||||
|
count: int = 25,
|
||||||
|
extra_params: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Search the Chronicling America collection via LoC JSON API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Free-text query.
|
||||||
|
start: 1-based start index (LoC uses `sp`).
|
||||||
|
count: Results per page (LoC uses `c`).
|
||||||
|
extra_params: Additional LoC API params (facets, filters, etc.).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Parsed JSON response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
q = str(query or "").strip()
|
||||||
|
if not q:
|
||||||
|
return {"results": []}
|
||||||
|
|
||||||
|
params: Dict[str, Any] = {
|
||||||
|
"q": q,
|
||||||
|
"fo": "json",
|
||||||
|
"c": int(count) if int(count) > 0 else 25,
|
||||||
|
"sp": int(start) if int(start) > 0 else 1,
|
||||||
|
}
|
||||||
|
if extra_params:
|
||||||
|
for k, v in extra_params.items():
|
||||||
|
if v is None:
|
||||||
|
continue
|
||||||
|
params[str(k)] = v
|
||||||
|
|
||||||
|
return self._get_json("/collections/chronicling-america/", params)
|
||||||
129
Provider/loc.py
Normal file
129
Provider/loc.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from API.loc import LOCClient
|
||||||
|
from ProviderCore.base import Provider, SearchResult
|
||||||
|
from cli_syntax import get_free_text, parse_query
|
||||||
|
from SYS.logger import log
|
||||||
|
|
||||||
|
|
||||||
|
class LOC(Provider):
|
||||||
|
"""LoC search provider.
|
||||||
|
|
||||||
|
Currently implements Chronicling America collection search via the LoC JSON API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
URL_DOMAINS = ["www.loc.gov"]
|
||||||
|
|
||||||
|
def validate(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
limit: int = 50,
|
||||||
|
filters: Optional[Dict[str, Any]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[SearchResult]:
|
||||||
|
_ = kwargs
|
||||||
|
parsed = parse_query(query or "")
|
||||||
|
text = get_free_text(parsed).strip()
|
||||||
|
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
|
||||||
|
|
||||||
|
# Allow explicit q: override.
|
||||||
|
q = str(fields.get("q") or text or "").strip()
|
||||||
|
if not q:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Pass through any extra filters supported by the LoC API.
|
||||||
|
extra: Dict[str, Any] = {}
|
||||||
|
if isinstance(filters, dict):
|
||||||
|
extra.update(filters)
|
||||||
|
if isinstance(fields, dict):
|
||||||
|
for k, v in fields.items():
|
||||||
|
if k == "q":
|
||||||
|
continue
|
||||||
|
extra[str(k)] = v
|
||||||
|
|
||||||
|
client = LOCClient()
|
||||||
|
|
||||||
|
results: List[SearchResult] = []
|
||||||
|
start = 1
|
||||||
|
page_size = 25
|
||||||
|
try:
|
||||||
|
if limit and limit > 0:
|
||||||
|
page_size = max(1, min(int(limit), 50))
|
||||||
|
|
||||||
|
while len(results) < max(0, int(limit)):
|
||||||
|
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
|
||||||
|
items = payload.get("results")
|
||||||
|
if not isinstance(items, list) or not items:
|
||||||
|
break
|
||||||
|
|
||||||
|
for it in items:
|
||||||
|
if not isinstance(it, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = str(it.get("title") or "").strip() or "(untitled)"
|
||||||
|
date = str(it.get("date") or "").strip()
|
||||||
|
url = str(it.get("url") or "").strip()
|
||||||
|
aka = it.get("aka")
|
||||||
|
if (not url) and isinstance(aka, list) and aka:
|
||||||
|
url = str(aka[0] or "").strip()
|
||||||
|
|
||||||
|
formats = it.get("online_format")
|
||||||
|
if isinstance(formats, list):
|
||||||
|
fmt_text = ", ".join([str(x) for x in formats if x])
|
||||||
|
else:
|
||||||
|
fmt_text = str(formats or "").strip()
|
||||||
|
|
||||||
|
partof = it.get("partof")
|
||||||
|
if isinstance(partof, list) and partof:
|
||||||
|
source = str(partof[-1] or "").strip()
|
||||||
|
else:
|
||||||
|
source = "Chronicling America"
|
||||||
|
|
||||||
|
detail_parts = []
|
||||||
|
if date:
|
||||||
|
detail_parts.append(date)
|
||||||
|
if source:
|
||||||
|
detail_parts.append(source)
|
||||||
|
detail = " — ".join(detail_parts)
|
||||||
|
|
||||||
|
annotations: List[str] = []
|
||||||
|
if date:
|
||||||
|
annotations.append(date)
|
||||||
|
if fmt_text:
|
||||||
|
annotations.append(fmt_text)
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
SearchResult(
|
||||||
|
table="loc",
|
||||||
|
title=title,
|
||||||
|
path=url or title,
|
||||||
|
detail=detail,
|
||||||
|
annotations=annotations,
|
||||||
|
media_kind="document",
|
||||||
|
columns=[
|
||||||
|
("Title", title),
|
||||||
|
("Date", date),
|
||||||
|
("Format", fmt_text),
|
||||||
|
("URL", url),
|
||||||
|
],
|
||||||
|
full_metadata=it,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(results) >= int(limit):
|
||||||
|
break
|
||||||
|
|
||||||
|
# LoC API pagination uses sp (1-based start index).
|
||||||
|
if len(items) < page_size:
|
||||||
|
break
|
||||||
|
start += len(items)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"[loc] search failed: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
return results
|
||||||
@@ -23,6 +23,7 @@ from Provider.telegram import Telegram
|
|||||||
from Provider.youtube import YouTube
|
from Provider.youtube import YouTube
|
||||||
from Provider.fileio import FileIO
|
from Provider.fileio import FileIO
|
||||||
from Provider.zeroxzero import ZeroXZero
|
from Provider.zeroxzero import ZeroXZero
|
||||||
|
from Provider.loc import LOC
|
||||||
|
|
||||||
|
|
||||||
_PROVIDERS: Dict[str, Type[Provider]] = {
|
_PROVIDERS: Dict[str, Type[Provider]] = {
|
||||||
@@ -34,6 +35,7 @@ _PROVIDERS: Dict[str, Type[Provider]] = {
|
|||||||
"bandcamp": Bandcamp,
|
"bandcamp": Bandcamp,
|
||||||
"youtube": YouTube,
|
"youtube": YouTube,
|
||||||
"telegram": Telegram,
|
"telegram": Telegram,
|
||||||
|
"loc": LOC,
|
||||||
# Upload-capable providers
|
# Upload-capable providers
|
||||||
"0x0": ZeroXZero,
|
"0x0": ZeroXZero,
|
||||||
"file.io": FileIO,
|
"file.io": FileIO,
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ class Search_Provider(Cmdlet):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
name="search-provider",
|
name="search-provider",
|
||||||
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)",
|
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc)",
|
||||||
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
|
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
|
||||||
arg=[
|
arg=[
|
||||||
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"),
|
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc"),
|
||||||
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
|
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
|
||||||
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
|
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
|
||||||
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
|
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
|
||||||
@@ -48,6 +48,8 @@ class Search_Provider(Cmdlet):
|
|||||||
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
|
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
|
||||||
"- libgen: Search Library Genesis for books",
|
"- libgen: Search Library Genesis for books",
|
||||||
" Example: search-provider -provider libgen \"python programming\"",
|
" Example: search-provider -provider libgen \"python programming\"",
|
||||||
|
"- loc: Search Library of Congress (Chronicling America)",
|
||||||
|
" Example: search-provider -provider loc \"lincoln\"",
|
||||||
"- soulseek: Search P2P network for music",
|
"- soulseek: Search P2P network for music",
|
||||||
" Example: search-provider -provider soulseek \"pink floyd\"",
|
" Example: search-provider -provider soulseek \"pink floyd\"",
|
||||||
"- youtube: Search YouTube for videos",
|
"- youtube: Search YouTube for videos",
|
||||||
@@ -182,6 +184,8 @@ class Search_Provider(Cmdlet):
|
|||||||
provider_label = "Youtube"
|
provider_label = "Youtube"
|
||||||
elif provider_lower == "openlibrary":
|
elif provider_lower == "openlibrary":
|
||||||
provider_label = "OpenLibrary"
|
provider_label = "OpenLibrary"
|
||||||
|
elif provider_lower == "loc":
|
||||||
|
provider_label = "LoC"
|
||||||
else:
|
else:
|
||||||
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
|
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
|
||||||
|
|
||||||
@@ -189,7 +193,7 @@ class Search_Provider(Cmdlet):
|
|||||||
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
|
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
|
||||||
else:
|
else:
|
||||||
table_title = f"{provider_label}: {query}".strip().rstrip(":")
|
table_title = f"{provider_label}: {query}".strip().rstrip(":")
|
||||||
preserve_order = provider_name.lower() in ("youtube", "openlibrary")
|
preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
|
||||||
table = ResultTable(table_title).set_preserve_order(preserve_order)
|
table = ResultTable(table_title).set_preserve_order(preserve_order)
|
||||||
table.set_table(provider_name)
|
table.set_table(provider_name)
|
||||||
table.set_source_command("search-provider", list(args))
|
table.set_source_command("search-provider", list(args))
|
||||||
|
|||||||
@@ -31,7 +31,11 @@ click apply
|
|||||||
--------------------------------------------
|
--------------------------------------------
|
||||||
edit the below and place in your config.conf
|
edit the below and place in your config.conf
|
||||||
|
|
||||||
[store=hydrusnetwork]
|
<figure>
|
||||||
|
<figcaption>config.conf</figcaption>
|
||||||
|
<pre><code class="language-powershell">[store=hydrusnetwork]
|
||||||
NAME="shortnamenospacesorsymbols"
|
NAME="shortnamenospacesorsymbols"
|
||||||
API="apiaccesskeygoeshere"
|
API="apiaccesskeygoeshere"
|
||||||
URL="apibaseurlgoeshere"
|
URL="apibaseurlgoeshere"
|
||||||
|
</code></pre>
|
||||||
|
</figure>
|
||||||
|
|||||||
Reference in New Issue
Block a user