This commit is contained in:
goyim nose
2025-12-25 16:02:46 -08:00
parent 6a592f873c
commit 1ec2b313df
5 changed files with 224 additions and 5 deletions

80
API/loc.py Normal file
View File

@@ -0,0 +1,80 @@
"""Library of Congress (LoC) API helpers.
This module currently focuses on the LoC JSON API endpoint for the
Chronicling America collection.
Docs:
- https://www.loc.gov/apis/
- https://www.loc.gov/apis/json-and-yaml/
The LoC JSON API does not require an API key.
"""
from __future__ import annotations
import json
from typing import Any, Dict, Optional
from API.HTTP import HTTPClient
class LOCError(Exception):
pass
class LOCClient:
"""Minimal client for the public LoC JSON API."""
BASE_URL = "https://www.loc.gov"
def __init__(self, *, timeout: float = 20.0):
self.timeout = float(timeout)
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
try:
with HTTPClient(timeout=self.timeout) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
# httpx.Response.json() exists but keep decoding consistent
return json.loads(resp.content.decode("utf-8"))
except Exception as exc:
raise LOCError(str(exc)) from exc
def search_chronicling_america(
self,
query: str,
*,
start: int = 1,
count: int = 25,
extra_params: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Search the Chronicling America collection via LoC JSON API.
Args:
query: Free-text query.
start: 1-based start index (LoC uses `sp`).
count: Results per page (LoC uses `c`).
extra_params: Additional LoC API params (facets, filters, etc.).
Returns:
Parsed JSON response.
"""
q = str(query or "").strip()
if not q:
return {"results": []}
params: Dict[str, Any] = {
"q": q,
"fo": "json",
"c": int(count) if int(count) > 0 else 25,
"sp": int(start) if int(start) > 0 else 1,
}
if extra_params:
for k, v in extra_params.items():
if v is None:
continue
params[str(k)] = v
return self._get_json("/collections/chronicling-america/", params)

129
Provider/loc.py Normal file
View File

@@ -0,0 +1,129 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from API.loc import LOCClient
from ProviderCore.base import Provider, SearchResult
from cli_syntax import get_free_text, parse_query
from SYS.logger import log
class LOC(Provider):
"""LoC search provider.
Currently implements Chronicling America collection search via the LoC JSON API.
"""
URL_DOMAINS = ["www.loc.gov"]
def validate(self) -> bool:
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
_ = kwargs
parsed = parse_query(query or "")
text = get_free_text(parsed).strip()
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
# Allow explicit q: override.
q = str(fields.get("q") or text or "").strip()
if not q:
return []
# Pass through any extra filters supported by the LoC API.
extra: Dict[str, Any] = {}
if isinstance(filters, dict):
extra.update(filters)
if isinstance(fields, dict):
for k, v in fields.items():
if k == "q":
continue
extra[str(k)] = v
client = LOCClient()
results: List[SearchResult] = []
start = 1
page_size = 25
try:
if limit and limit > 0:
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
items = payload.get("results")
if not isinstance(items, list) or not items:
break
for it in items:
if not isinstance(it, dict):
continue
title = str(it.get("title") or "").strip() or "(untitled)"
date = str(it.get("date") or "").strip()
url = str(it.get("url") or "").strip()
aka = it.get("aka")
if (not url) and isinstance(aka, list) and aka:
url = str(aka[0] or "").strip()
formats = it.get("online_format")
if isinstance(formats, list):
fmt_text = ", ".join([str(x) for x in formats if x])
else:
fmt_text = str(formats or "").strip()
partof = it.get("partof")
if isinstance(partof, list) and partof:
source = str(partof[-1] or "").strip()
else:
source = "Chronicling America"
detail_parts = []
if date:
detail_parts.append(date)
if source:
detail_parts.append(source)
detail = "".join(detail_parts)
annotations: List[str] = []
if date:
annotations.append(date)
if fmt_text:
annotations.append(fmt_text)
results.append(
SearchResult(
table="loc",
title=title,
path=url or title,
detail=detail,
annotations=annotations,
media_kind="document",
columns=[
("Title", title),
("Date", date),
("Format", fmt_text),
("URL", url),
],
full_metadata=it,
)
)
if len(results) >= int(limit):
break
# LoC API pagination uses sp (1-based start index).
if len(items) < page_size:
break
start += len(items)
except Exception as exc:
log(f"[loc] search failed: {exc}")
return []
return results

View File

@@ -23,6 +23,7 @@ from Provider.telegram import Telegram
from Provider.youtube import YouTube from Provider.youtube import YouTube
from Provider.fileio import FileIO from Provider.fileio import FileIO
from Provider.zeroxzero import ZeroXZero from Provider.zeroxzero import ZeroXZero
from Provider.loc import LOC
_PROVIDERS: Dict[str, Type[Provider]] = { _PROVIDERS: Dict[str, Type[Provider]] = {
@@ -34,6 +35,7 @@ _PROVIDERS: Dict[str, Type[Provider]] = {
"bandcamp": Bandcamp, "bandcamp": Bandcamp,
"youtube": YouTube, "youtube": YouTube,
"telegram": Telegram, "telegram": Telegram,
"loc": LOC,
# Upload-capable providers # Upload-capable providers
"0x0": ZeroXZero, "0x0": ZeroXZero,
"file.io": FileIO, "file.io": FileIO,

View File

@@ -31,10 +31,10 @@ class Search_Provider(Cmdlet):
def __init__(self): def __init__(self):
super().__init__( super().__init__(
name="search-provider", name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)", summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]", usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[ arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"), CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"), CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"), CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"), CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
@@ -48,6 +48,8 @@ class Search_Provider(Cmdlet):
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"", " Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
"- libgen: Search Library Genesis for books", "- libgen: Search Library Genesis for books",
" Example: search-provider -provider libgen \"python programming\"", " Example: search-provider -provider libgen \"python programming\"",
"- loc: Search Library of Congress (Chronicling America)",
" Example: search-provider -provider loc \"lincoln\"",
"- soulseek: Search P2P network for music", "- soulseek: Search P2P network for music",
" Example: search-provider -provider soulseek \"pink floyd\"", " Example: search-provider -provider soulseek \"pink floyd\"",
"- youtube: Search YouTube for videos", "- youtube: Search YouTube for videos",
@@ -182,6 +184,8 @@ class Search_Provider(Cmdlet):
provider_label = "Youtube" provider_label = "Youtube"
elif provider_lower == "openlibrary": elif provider_lower == "openlibrary":
provider_label = "OpenLibrary" provider_label = "OpenLibrary"
elif provider_lower == "loc":
provider_label = "LoC"
else: else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider" provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
@@ -189,7 +193,7 @@ class Search_Provider(Cmdlet):
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":") table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else: else:
table_title = f"{provider_label}: {query}".strip().rstrip(":") table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_name.lower() in ("youtube", "openlibrary") preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
table = ResultTable(table_title).set_preserve_order(preserve_order) table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name) table.set_table(provider_name)
table.set_source_command("search-provider", list(args)) table.set_source_command("search-provider", list(args))

View File

@@ -31,7 +31,11 @@ click apply
-------------------------------------------- --------------------------------------------
edit the below and place in your config.conf edit the below and place in your config.conf
[store=hydrusnetwork] <figure>
<figcaption>config.conf</figcaption>
<pre><code class="language-powershell">[store=hydrusnetwork]
NAME="shortnamenospacesorsymbols" NAME="shortnamenospacesorsymbols"
API="apiaccesskeygoeshere" API="apiaccesskeygoeshere"
URL="apibaseurlgoeshere" URL="apibaseurlgoeshere"
</code></pre>
</figure>