This commit is contained in:
goyim nose
2025-12-25 16:02:46 -08:00
parent 6a592f873c
commit 1ec2b313df
5 changed files with 224 additions and 5 deletions

80
API/loc.py Normal file
View File

@@ -0,0 +1,80 @@
"""Library of Congress (LoC) API helpers.
This module currently focuses on the LoC JSON API endpoint for the
Chronicling America collection.
Docs:
- https://www.loc.gov/apis/
- https://www.loc.gov/apis/json-and-yaml/
The LoC JSON API does not require an API key.
"""
from __future__ import annotations
import json
from typing import Any, Dict, Optional
from API.HTTP import HTTPClient
class LOCError(Exception):
pass
class LOCClient:
"""Minimal client for the public LoC JSON API."""
BASE_URL = "https://www.loc.gov"
def __init__(self, *, timeout: float = 20.0):
self.timeout = float(timeout)
def _get_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
url = self.BASE_URL.rstrip("/") + "/" + str(path or "").lstrip("/")
try:
with HTTPClient(timeout=self.timeout) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
# httpx.Response.json() exists but keep decoding consistent
return json.loads(resp.content.decode("utf-8"))
except Exception as exc:
raise LOCError(str(exc)) from exc
def search_chronicling_america(
self,
query: str,
*,
start: int = 1,
count: int = 25,
extra_params: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Search the Chronicling America collection via LoC JSON API.
Args:
query: Free-text query.
start: 1-based start index (LoC uses `sp`).
count: Results per page (LoC uses `c`).
extra_params: Additional LoC API params (facets, filters, etc.).
Returns:
Parsed JSON response.
"""
q = str(query or "").strip()
if not q:
return {"results": []}
params: Dict[str, Any] = {
"q": q,
"fo": "json",
"c": int(count) if int(count) > 0 else 25,
"sp": int(start) if int(start) > 0 else 1,
}
if extra_params:
for k, v in extra_params.items():
if v is None:
continue
params[str(k)] = v
return self._get_json("/collections/chronicling-america/", params)

129
Provider/loc.py Normal file
View File

@@ -0,0 +1,129 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from API.loc import LOCClient
from ProviderCore.base import Provider, SearchResult
from cli_syntax import get_free_text, parse_query
from SYS.logger import log
class LOC(Provider):
"""LoC search provider.
Currently implements Chronicling America collection search via the LoC JSON API.
"""
URL_DOMAINS = ["www.loc.gov"]
def validate(self) -> bool:
return True
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
_ = kwargs
parsed = parse_query(query or "")
text = get_free_text(parsed).strip()
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
# Allow explicit q: override.
q = str(fields.get("q") or text or "").strip()
if not q:
return []
# Pass through any extra filters supported by the LoC API.
extra: Dict[str, Any] = {}
if isinstance(filters, dict):
extra.update(filters)
if isinstance(fields, dict):
for k, v in fields.items():
if k == "q":
continue
extra[str(k)] = v
client = LOCClient()
results: List[SearchResult] = []
start = 1
page_size = 25
try:
if limit and limit > 0:
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
items = payload.get("results")
if not isinstance(items, list) or not items:
break
for it in items:
if not isinstance(it, dict):
continue
title = str(it.get("title") or "").strip() or "(untitled)"
date = str(it.get("date") or "").strip()
url = str(it.get("url") or "").strip()
aka = it.get("aka")
if (not url) and isinstance(aka, list) and aka:
url = str(aka[0] or "").strip()
formats = it.get("online_format")
if isinstance(formats, list):
fmt_text = ", ".join([str(x) for x in formats if x])
else:
fmt_text = str(formats or "").strip()
partof = it.get("partof")
if isinstance(partof, list) and partof:
source = str(partof[-1] or "").strip()
else:
source = "Chronicling America"
detail_parts = []
if date:
detail_parts.append(date)
if source:
detail_parts.append(source)
detail = "".join(detail_parts)
annotations: List[str] = []
if date:
annotations.append(date)
if fmt_text:
annotations.append(fmt_text)
results.append(
SearchResult(
table="loc",
title=title,
path=url or title,
detail=detail,
annotations=annotations,
media_kind="document",
columns=[
("Title", title),
("Date", date),
("Format", fmt_text),
("URL", url),
],
full_metadata=it,
)
)
if len(results) >= int(limit):
break
# LoC API pagination uses sp (1-based start index).
if len(items) < page_size:
break
start += len(items)
except Exception as exc:
log(f"[loc] search failed: {exc}")
return []
return results

View File

@@ -23,6 +23,7 @@ from Provider.telegram import Telegram
from Provider.youtube import YouTube
from Provider.fileio import FileIO
from Provider.zeroxzero import ZeroXZero
from Provider.loc import LOC
_PROVIDERS: Dict[str, Type[Provider]] = {
@@ -34,6 +35,7 @@ _PROVIDERS: Dict[str, Type[Provider]] = {
"bandcamp": Bandcamp,
"youtube": YouTube,
"telegram": Telegram,
"loc": LOC,
# Upload-capable providers
"0x0": ZeroXZero,
"file.io": FileIO,

View File

@@ -31,10 +31,10 @@ class Search_Provider(Cmdlet):
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)",
summary="Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid"),
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
CmdletArg("open", type="int", description="(alldebrid) Open folder/magnet by ID and list its files"),
@@ -48,6 +48,8 @@ class Search_Provider(Cmdlet):
" Example: search-provider -provider bandcamp \"artist:altrusian grace\"",
"- libgen: Search Library Genesis for books",
" Example: search-provider -provider libgen \"python programming\"",
"- loc: Search Library of Congress (Chronicling America)",
" Example: search-provider -provider loc \"lincoln\"",
"- soulseek: Search P2P network for music",
" Example: search-provider -provider soulseek \"pink floyd\"",
"- youtube: Search YouTube for videos",
@@ -182,6 +184,8 @@ class Search_Provider(Cmdlet):
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
@@ -189,7 +193,7 @@ class Search_Provider(Cmdlet):
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_name.lower() in ("youtube", "openlibrary")
preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table.set_source_command("search-provider", list(args))

View File

@@ -31,7 +31,11 @@ click apply
--------------------------------------------
edit the below and place in your config.conf
[store=hydrusnetwork]
<figure>
<figcaption>config.conf</figcaption>
<pre><code class="language-powershell">[store=hydrusnetwork]
NAME="shortnamenospacesorsymbols"
API="apiaccesskeygoeshere"
URL="apibaseurlgoeshere"
URL="apibaseurlgoeshere"
</code></pre>
</figure>