Add YAPF style + ignore, and format tracked Python files

This commit is contained in:
2025-12-29 18:42:02 -08:00
parent c019c00aed
commit 507946a3e4
108 changed files with 11664 additions and 6494 deletions

View File

@@ -37,7 +37,7 @@ def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]:
# 2) store.debrid block (canonical for debrid store configuration)
try:
from config import get_debrid_api_key
from SYS.config import get_debrid_api_key
key = get_debrid_api_key(config, service="All-debrid")
return key.strip() if key else None
@@ -97,23 +97,27 @@ class AllDebrid(Provider):
# Quiet mode when download-file is mid-pipeline.
quiet = (
bool(self.config.get("_quiet_background_output"))
if isinstance(self.config, dict)
else False
if isinstance(self.config,
dict) else False
)
unlocked_url = target
try:
unlocked = client.unlock_link(target)
if isinstance(unlocked, str) and unlocked.strip().startswith(
("http://", "https://")
):
if isinstance(unlocked,
str) and unlocked.strip().startswith(("http://",
"https://")):
unlocked_url = unlocked.strip()
except Exception as exc:
# Fall back to the raw link, but warn.
log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
# Prefer provider title as the output filename.
suggested = sanitize_filename(str(getattr(result, "title", "") or "").strip())
suggested = sanitize_filename(
str(getattr(result,
"title",
"") or "").strip()
)
suggested_name = suggested if suggested else None
try:
@@ -142,11 +146,9 @@ class AllDebrid(Provider):
try:
if downloaded_path.exists():
size = downloaded_path.stat().st_size
if (
size > 0
and size <= 250_000
and downloaded_path.suffix.lower() not in (".html", ".htm")
):
if (size > 0 and size <= 250_000
and downloaded_path.suffix.lower() not in (".html",
".htm")):
head = downloaded_path.read_bytes()[:512]
try:
text = head.decode("utf-8", errors="ignore").lower()
@@ -173,9 +175,10 @@ class AllDebrid(Provider):
return None
@staticmethod
def _flatten_files(
items: Any, *, _prefix: Optional[List[str]] = None
) -> Iterable[Dict[str, Any]]:
def _flatten_files(items: Any,
*,
_prefix: Optional[List[str]] = None) -> Iterable[Dict[str,
Any]]:
"""Flatten AllDebrid magnet file tree into file dicts, preserving relative paths.
API commonly returns:
@@ -211,7 +214,9 @@ class AllDebrid(Provider):
name = node.get("n") or node.get("name")
link = node.get("l") or node.get("link")
if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip():
if isinstance(name,
str) and name.strip() and isinstance(link,
str) and link.strip():
rel_parts = prefix + [name.strip()]
relpath = "/".join([p for p in rel_parts if p])
enriched = dict(node)
@@ -222,7 +227,8 @@ class AllDebrid(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
q = (query or "").strip()
@@ -247,7 +253,9 @@ class AllDebrid(Provider):
return []
q_lower = q.lower()
needle = "" if q_lower in {"*", "all", "list"} else q_lower
needle = "" if q_lower in {"*",
"all",
"list"} else q_lower
# Second-stage: list files for a specific magnet id.
if view == "files":
@@ -262,17 +270,16 @@ class AllDebrid(Provider):
except Exception:
return []
magnet_status: Dict[str, Any] = {}
magnet_status: Dict[str,
Any] = {}
try:
magnet_status = client.magnet_status(magnet_id)
except Exception:
magnet_status = {}
magnet_name = str(
magnet_status.get("filename")
or magnet_status.get("name")
or magnet_status.get("hash")
or f"magnet-{magnet_id}"
magnet_status.get("filename") or magnet_status.get("name")
or magnet_status.get("hash") or f"magnet-{magnet_id}"
)
status_code = magnet_status.get("statusCode")
status_text = str(magnet_status.get("status") or "").strip() or "unknown"
@@ -285,25 +292,40 @@ class AllDebrid(Provider):
title=magnet_name,
path=f"alldebrid:magnet:{magnet_id}",
detail=status_text,
annotations=["folder", "not-ready"],
annotations=["folder",
"not-ready"],
media_kind="folder",
tag={"alldebrid", "folder", str(magnet_id), "not-ready"},
tag={"alldebrid",
"folder",
str(magnet_id),
"not-ready"},
columns=[
("Folder", magnet_name),
("ID", str(magnet_id)),
("Status", status_text),
("Ready", "no"),
("Folder",
magnet_name),
("ID",
str(magnet_id)),
("Status",
status_text),
("Ready",
"no"),
],
full_metadata={"magnet": magnet_status, "magnet_id": magnet_id},
full_metadata={
"magnet": magnet_status,
"magnet_id": magnet_id
},
)
]
try:
files_result = client.magnet_links([magnet_id])
magnet_files = (
files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
files_result.get(str(magnet_id),
{}) if isinstance(files_result,
dict) else {}
)
file_tree = magnet_files.get("files", []) if isinstance(magnet_files, dict) else []
file_tree = magnet_files.get("files",
[]) if isinstance(magnet_files,
dict) else []
except Exception as exc:
log(
f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}",
@@ -313,8 +335,10 @@ class AllDebrid(Provider):
results: List[SearchResult] = []
for file_node in self._flatten_files(file_tree):
file_name = str(file_node.get("n") or file_node.get("name") or "").strip()
file_url = str(file_node.get("l") or file_node.get("link") or "").strip()
file_name = str(file_node.get("n") or file_node.get("name")
or "").strip()
file_url = str(file_node.get("l") or file_node.get("link")
or "").strip()
relpath = str(file_node.get("_relpath") or file_name or "").strip()
file_size = file_node.get("s") or file_node.get("size")
if not file_name or not file_url:
@@ -341,11 +365,16 @@ class AllDebrid(Provider):
annotations=["file"],
media_kind="file",
size_bytes=size_bytes,
tag={"alldebrid", "file", str(magnet_id)},
tag={"alldebrid",
"file",
str(magnet_id)},
columns=[
("File", file_name),
("Folder", magnet_name),
("ID", str(magnet_id)),
("File",
file_name),
("Folder",
magnet_name),
("ID",
str(magnet_id)),
],
full_metadata={
"magnet": magnet_status,
@@ -386,9 +415,7 @@ class AllDebrid(Provider):
continue
magnet_name = str(
magnet.get("filename")
or magnet.get("name")
or magnet.get("hash")
magnet.get("filename") or magnet.get("name") or magnet.get("hash")
or f"magnet-{magnet_id}"
)
magnet_name_lower = magnet_name.lower()
@@ -422,15 +449,24 @@ class AllDebrid(Provider):
annotations=["folder"],
media_kind="folder",
size_bytes=size_bytes,
tag={"alldebrid", "folder", str(magnet_id)}
tag={"alldebrid",
"folder",
str(magnet_id)}
| ({"ready"} if ready else {"not-ready"}),
columns=[
("Folder", magnet_name),
("ID", str(magnet_id)),
("Status", status_text),
("Ready", "yes" if ready else "no"),
("Folder",
magnet_name),
("ID",
str(magnet_id)),
("Status",
status_text),
("Ready",
"yes" if ready else "no"),
],
full_metadata={"magnet": magnet, "magnet_id": magnet_id},
full_metadata={
"magnet": magnet,
"magnet_id": magnet_id
},
)
)

View File

@@ -38,9 +38,10 @@ class Bandcamp(Provider):
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(
self, page: Any, artist_url: str, limit: int = 50
) -> List[SearchResult]:
def _scrape_artist_page(self,
page: Any,
artist_url: str,
limit: int = 50) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
@@ -75,7 +76,8 @@ class Bandcamp(Provider):
else:
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title") or item.query_selector(".title")
title_node = item.query_selector("p.title"
) or item.query_selector(".title")
title = title_node.inner_text().strip() if title_node else ""
if title:
title = " ".join(title.split())
@@ -83,7 +85,8 @@ class Bandcamp(Provider):
title = target.rsplit("/", 1)[-1]
kind = (
"album" if "/album/" in target else ("track" if "/track/" in target else "item")
"album" if "/album/" in target else
("track" if "/track/" in target else "item")
)
results.append(
@@ -95,9 +98,12 @@ class Bandcamp(Provider):
annotations=[kind],
media_kind="audio",
columns=[
("Title", title),
("Type", kind),
("Url", target),
("Title",
title),
("Type",
kind),
("Url",
target),
],
full_metadata={
"type": kind,
@@ -112,7 +118,12 @@ class Bandcamp(Provider):
return results
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any
) -> bool:
"""Handle Bandcamp `@N` selection.
@@ -128,7 +139,8 @@ class Bandcamp(Provider):
# Only handle artist selections.
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload: Dict[str, Any] = {}
payload: Dict[str,
Any] = {}
if isinstance(item, dict):
payload = item
else:
@@ -140,11 +152,21 @@ class Bandcamp(Provider):
if not payload:
try:
payload = {
"title": getattr(item, "title", None),
"url": getattr(item, "url", None),
"path": getattr(item, "path", None),
"metadata": getattr(item, "metadata", None),
"extra": getattr(item, "extra", None),
"title": getattr(item,
"title",
None),
"url": getattr(item,
"url",
None),
"path": getattr(item,
"path",
None),
"metadata": getattr(item,
"metadata",
None),
"extra": getattr(item,
"extra",
None),
}
except Exception:
payload = {}
@@ -154,7 +176,10 @@ class Bandcamp(Provider):
meta = {}
extra = payload.get("extra")
if isinstance(extra, dict):
meta = {**meta, **extra}
meta = {
**meta,
**extra
}
type_val = str(meta.get("type") or "").strip().lower()
if type_val != "artist":
@@ -169,7 +194,11 @@ class Bandcamp(Provider):
continue
chosen.append(
{"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}
{
"title": title,
"url": base,
"location": str(meta.get("artist") or "").strip()
}
)
if not chosen:
@@ -211,8 +240,12 @@ class Bandcamp(Provider):
results_payload.append(
{
"table": "bandcamp",
"title": getattr(r, "title", ""),
"path": getattr(r, "path", ""),
"title": getattr(r,
"title",
""),
"path": getattr(r,
"path",
""),
}
)
@@ -234,7 +267,8 @@ class Bandcamp(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
if sync_playwright is None:
@@ -305,10 +339,14 @@ class Bandcamp(Provider):
annotations=[media_type],
media_kind="audio",
columns=[
("Title", title),
("Location", artist),
("Type", media_type),
("Url", base_url or str(target_url or "")),
("Title",
title),
("Location",
artist),
("Type",
media_type),
("Url",
base_url or str(target_url or "")),
],
full_metadata={
"artist": artist,

View File

@@ -54,7 +54,8 @@ class FileIO(Provider):
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
self._base_url = str(conf.get("base_url")
or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
@@ -74,12 +75,19 @@ class FileIO(Provider):
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
data: Dict[str, Any] = {}
data: Dict[str,
Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get(
"maxDownloads", kwargs.get("max_downloads", self._default_max_downloads)
"maxDownloads",
kwargs.get("max_downloads",
self._default_max_downloads)
)
auto_delete = kwargs.get(
"autoDelete",
kwargs.get("auto_delete",
self._default_auto_delete)
)
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
if expires not in (None, ""):
data["expires"] = expires
@@ -88,7 +96,11 @@ class FileIO(Provider):
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
headers: Dict[str,
str] = {
"User-Agent": "Medeia-Macina/1.0",
"Accept": "application/json"
}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
@@ -101,19 +113,28 @@ class FileIO(Provider):
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
wrapped = ProgressFileReader(
handle,
total_bytes=total,
label="upload"
)
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={"file": (filename, wrapped)},
files={
"file": (filename,
wrapped)
},
follow_redirects=True,
raise_for_status=False,
)
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
location = response.headers.get("location"
) or response.headers.get("Location")
ct = response.headers.get("content-type"
) or response.headers.get("Content-Type")
raise Exception(
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
)
@@ -127,7 +148,8 @@ class FileIO(Provider):
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (
response.headers.get("content-type") or response.headers.get("Content-Type") or ""
response.headers.get("content-type")
or response.headers.get("Content-Type") or ""
).lower()
if (payload is None) and ("text/html" in ct):
raise Exception(
@@ -135,7 +157,8 @@ class FileIO(Provider):
)
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
reason = payload.get("message"
) or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
uploaded_url = _extract_link(payload)
@@ -166,9 +189,11 @@ class FileIO(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(pipe_obj,
uploaded_url)
except Exception:
pass

View File

@@ -35,7 +35,9 @@ def _pick_provider_config(config: Any) -> Dict[str, Any]:
def _looks_fielded_query(q: str) -> bool:
low = (q or "").lower()
return (":" in low) or (" and " in low) or (" or " in low) or (" not " in low) or ("(" in low)
return (":" in low) or (" and " in low) or (" or "
in low) or (" not "
in low) or ("(" in low)
def _extract_identifier_from_any(value: str) -> str:
@@ -111,9 +113,7 @@ def is_download_file_url(url: str) -> bool:
return False
# /download/<identifier>/<filename>
return (
len(parts) >= 3
and parts[0].lower() == "download"
and bool(parts[1].strip())
len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip())
and bool(parts[2].strip())
)
@@ -158,9 +158,15 @@ def list_download_files(identifier: str) -> List[Dict[str, Any]]:
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
"size": getattr(f,
"size",
None),
"format": getattr(f,
"format",
None),
"source": getattr(f,
"source",
None),
}
)
except Exception:
@@ -179,13 +185,16 @@ def list_download_files(identifier: str) -> List[Dict[str, Any]]:
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
if fmt in {"metadata",
"archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
return False
candidates = [f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)]
candidates = [
f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)
]
if not candidates:
candidates = [f for f in files if isinstance(f, dict)]
@@ -266,7 +275,8 @@ def _best_file_candidate(files: List[Dict[str, Any]]) -> Optional[Dict[str, Any]
fmt = str(f.get("format") or "").strip().lower()
if source == "metadata":
return True
if fmt in {"metadata", "archive bittorrent"}:
if fmt in {"metadata",
"archive bittorrent"}:
return True
if fmt.startswith("thumbnail"):
return True
@@ -283,7 +293,10 @@ def _best_file_candidate(files: List[Dict[str, Any]]) -> Optional[Dict[str, Any]
candidates = list(files)
# Prefer originals.
originals = [f for f in candidates if str(f.get("source") or "").strip().lower() == "original"]
originals = [
f for f in candidates
if str(f.get("source") or "").strip().lower() == "original"
]
pool = originals if originals else candidates
pool = [f for f in pool if str(f.get("name") or "").strip()]
@@ -330,7 +343,8 @@ class InternetArchive(Provider):
mt = str(mediatype or "").strip().lower()
if mt in {"texts"}:
return "book"
if mt in {"audio", "etree"}:
if mt in {"audio",
"etree"}:
return "audio"
if mt in {"movies"}:
return "video"
@@ -342,7 +356,8 @@ class InternetArchive(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**_kwargs: Any,
) -> List[SearchResult]:
ia = _ia()
@@ -355,7 +370,8 @@ class InternetArchive(Provider):
return []
# If the user supplied a plain string, default to title search.
if not _looks_fielded_query(q) and q not in {"*", "*.*"}:
if not _looks_fielded_query(q) and q not in {"*",
"*.*"}:
q = f'title:("{q}")'
fields = [
@@ -419,10 +435,14 @@ class InternetArchive(Provider):
size_bytes=None,
tag=set(),
columns=[
("title", title),
("mediatype", mediatype),
("date", date),
("creator", creator),
("title",
title),
("mediatype",
mediatype),
("date",
date),
("creator",
creator),
],
full_metadata=dict(row),
)
@@ -437,7 +457,12 @@ class InternetArchive(Provider):
- https://archive.org/details/<identifier>
- https://archive.org/download/<identifier>/<filename>
"""
sr = SearchResult(table="internetarchive", title=str(url), path=str(url), full_metadata={})
sr = SearchResult(
table="internetarchive",
title=str(url),
path=str(url),
full_metadata={}
)
return self.download(sr, output_dir)
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
@@ -449,7 +474,11 @@ class InternetArchive(Provider):
if not callable(download_fn):
raise Exception("internetarchive.download is not available")
identifier = _extract_identifier_from_any(str(getattr(result, "path", "") or ""))
identifier = _extract_identifier_from_any(
str(getattr(result,
"path",
"") or "")
)
if not identifier:
return None
@@ -490,9 +519,15 @@ class InternetArchive(Provider):
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
"size": getattr(f,
"size",
None),
"format": getattr(f,
"format",
None),
"source": getattr(f,
"source",
None),
}
)
except Exception:
@@ -616,7 +651,8 @@ class InternetArchive(Provider):
if not identifier:
raise Exception("Could not determine Internet Archive identifier")
meta: Dict[str, Any] = {}
meta: Dict[str,
Any] = {}
if title:
meta["title"] = title
else:
@@ -628,7 +664,10 @@ class InternetArchive(Provider):
meta["mediatype"] = self._mediatype.strip()
# Build upload options; credentials are optional if the user has internetarchive configured globally.
upload_kwargs: Dict[str, Any] = {"metadata": meta}
upload_kwargs: Dict[str,
Any] = {
"metadata": meta
}
ak = os.getenv("IA_ACCESS_KEY") or self._access_key
sk = os.getenv("IA_SECRET_KEY") or self._secret_key
if isinstance(ak, str) and ak.strip():
@@ -638,7 +677,9 @@ class InternetArchive(Provider):
# Use a friendly uploaded filename.
upload_name = sanitize_filename(p.name)
files = {upload_name: str(p)}
files = {
upload_name: str(p)
}
try:
resp: Any = upload_fn(identifier, files=files, **upload_kwargs)
@@ -664,9 +705,11 @@ class InternetArchive(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, item_url
)
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(pipe_obj,
item_url)
except Exception:
pass

View File

@@ -15,7 +15,6 @@ from ProviderCore.download import sanitize_filename
from SYS.logger import log
from models import ProgressBar
# Optional dependency for HTML scraping fallbacks
try:
from lxml import html as lxml_html
@@ -111,9 +110,7 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
score = 0
for ln in lines:
lo = ln.lower()
if ":" in ln and any(
k in lo
for k in (
if ":" in ln and any(k in lo for k in (
"title",
"author",
"publisher",
@@ -121,9 +118,7 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
"isbn",
"language",
"series",
"tags",
)
):
"tags", )):
score += 1
if score > best_score:
best_score = score
@@ -133,15 +128,20 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
if not best_lines:
best_lines = _strip_html_to_lines(s)
raw_fields: Dict[str, str] = {}
raw_fields: Dict[str,
str] = {}
pending_key: Optional[str] = None
def _norm_key(k: str) -> str:
kk = str(k or "").strip().lower()
kk = re.sub(r"\s+", " ", kk)
if kk in {"authors", "author(s)", "author(s).", "author(s):"}:
if kk in {"authors",
"author(s)",
"author(s).",
"author(s):"}:
return "author"
if kk in {"tag", "tags"}:
if kk in {"tag",
"tags"}:
return "tags"
return kk
@@ -166,7 +166,10 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
raw_fields[pending_key] = line
pending_key = None
out: Dict[str, Any] = {"_raw_fields": dict(raw_fields)}
out: Dict[str,
Any] = {
"_raw_fields": dict(raw_fields)
}
title = str(raw_fields.get("title") or "").strip()
if title:
@@ -272,9 +275,11 @@ def _prefer_isbn(isbns: List[str]) -> str:
return vals[0] if vals else ""
def _enrich_book_tags_from_isbn(
isbn: str, *, config: Optional[Dict[str, Any]] = None
) -> Tuple[List[str], str]:
def _enrich_book_tags_from_isbn(isbn: str,
*,
config: Optional[Dict[str,
Any]] = None) -> Tuple[List[str],
str]:
"""Return (tags, source_name) for the given ISBN.
Priority:
@@ -378,7 +383,8 @@ def _enrich_book_tags_from_isbn(
try:
from Provider.metadata_provider import get_metadata_provider
provider = get_metadata_provider("isbnsearch", config or {})
provider = get_metadata_provider("isbnsearch",
config or {})
if provider is None:
return [], ""
items = provider.search(isbn_clean, limit=1)
@@ -393,7 +399,10 @@ def _enrich_book_tags_from_isbn(
def _fetch_libgen_details_html(
url: str, *, timeout: Optional[Tuple[float, float]] = None
url: str,
*,
timeout: Optional[Tuple[float,
float]] = None
) -> Optional[str]:
try:
if timeout is None:
@@ -401,7 +410,8 @@ def _fetch_libgen_details_html(
session = requests.Session()
session.headers.update(
{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
}
)
with session.get(str(url), stream=True, timeout=timeout) as resp:
@@ -420,14 +430,16 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
Best-effort and intentionally tolerant of mirror variations.
"""
out: Dict[str, Any] = {}
raw_fields: Dict[str, str] = {}
out: Dict[str,
Any] = {}
raw_fields: Dict[str,
str] = {}
s = str(html or "")
# Fast path: try to pull simple Label/Value table rows.
for m in re.finditer(
r"(?is)<tr\b[^>]*>\s*<t[dh]\b[^>]*>\s*([^<]{1,80}?)\s*:??\s*</t[dh]>\s*<t[dh]\b[^>]*>(.*?)</t[dh]>\s*</tr>",
s,
r"(?is)<tr\b[^>]*>\s*<t[dh]\b[^>]*>\s*([^<]{1,80}?)\s*:??\s*</t[dh]>\s*<t[dh]\b[^>]*>(.*?)</t[dh]>\s*</tr>",
s,
):
label = _strip_html_to_text(m.group(1))
raw_val_html = str(m.group(2) or "")
@@ -467,7 +479,8 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
chunk_start = m.end()
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
strong_matches[idx + 1].start() if
(idx + 1) < len(strong_matches) else len(s)
)
raw_val_html = s[chunk_start:chunk_end]
@@ -619,17 +632,17 @@ def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {
"title",
"author(s)",
"authors",
"author",
"publisher",
"year",
"isbn",
"language",
"oclc/worldcat",
"tags",
"edition id",
"title",
"author(s)",
"authors",
"author",
"publisher",
"year",
"isbn",
"language",
"oclc/worldcat",
"tags",
"edition id",
}:
continue
vv = str(v or "").strip()
@@ -658,13 +671,14 @@ class Libgen(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
try:
from cli_syntax import get_field, get_free_text, parse_query
from SYS.cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import is_debug_enabled
parsed = parse_query(query)
@@ -701,10 +715,14 @@ class Libgen(Provider):
mirror_url = book.get("mirror_url", "")
columns = [
("Title", title),
("Author", author),
("Pages", str(pages)),
("Ext", str(extension)),
("Title",
title),
("Author",
author),
("Pages",
str(pages)),
("Ext",
str(extension)),
]
detail = f"By: {author}"
@@ -732,8 +750,10 @@ class Libgen(Provider):
"filesize": filesize,
"pages": pages,
"extension": extension,
"book_id": book.get("book_id", ""),
"md5": book.get("md5", ""),
"book_id": book.get("book_id",
""),
"md5": book.get("md5",
""),
},
)
)
@@ -786,12 +806,9 @@ class Libgen(Provider):
title = ""
base_name = sanitize_filename(
title
or md5
or (
title or md5 or (
f"libgen_{_libgen_id_from_url(target)}"
if _libgen_id_from_url(target)
else "libgen"
if _libgen_id_from_url(target) else "libgen"
)
)
out_path = output_dir / base_name
@@ -819,9 +836,12 @@ class Libgen(Provider):
if now - last_progress_time[0] < 0.5:
return
total = int(content_length) if content_length and content_length > 0 else None
total = int(
content_length
) if content_length and content_length > 0 else None
downloaded = (
int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
int(bytes_downloaded)
if bytes_downloaded and bytes_downloaded > 0 else 0
)
elapsed = max(0.001, now - start_time)
speed = downloaded / elapsed
@@ -850,15 +870,19 @@ class Libgen(Provider):
# enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
if ("/ads.php" in low) or ("/get.php" in low):
ads_url = (
target if "/ads.php" in low else _libgen_ads_url_for_target(target)
target if "/ads.php" in low else
_libgen_ads_url_for_target(target)
)
if ads_url:
html = _fetch_libgen_details_html(
ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)
ads_url,
timeout=(DEFAULT_CONNECT_TIMEOUT,
4.0)
)
if html:
meta = _parse_libgen_ads_tags_html(html)
extracted_title = str(meta.get("title") or "").strip()
extracted_title = str(meta.get("title")
or "").strip()
if extracted_title:
md["title"] = extracted_title
result.tag.add(f"title:{extracted_title}")
@@ -867,8 +891,8 @@ class Libgen(Provider):
authors = (
meta.get("authors")
if isinstance(meta.get("authors"), list)
else []
if isinstance(meta.get("authors"),
list) else []
)
for a in authors or []:
aa = str(a or "").strip()
@@ -892,11 +916,12 @@ class Libgen(Provider):
isbns = (
meta.get("isbn")
if isinstance(meta.get("isbn"), list)
else []
if isinstance(meta.get("isbn"),
list) else []
)
isbns = [
str(x).strip() for x in (isbns or []) if str(x).strip()
str(x).strip() for x in (isbns or [])
if str(x).strip()
]
if isbns:
md["isbn"] = isbns
@@ -905,8 +930,8 @@ class Libgen(Provider):
free_tags = (
meta.get("tags")
if isinstance(meta.get("tags"), list)
else []
if isinstance(meta.get("tags"),
list) else []
)
for t in free_tags or []:
tt = str(t or "").strip()
@@ -919,29 +944,28 @@ class Libgen(Provider):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {
"title",
"author",
"authors",
"publisher",
"year",
"isbn",
"language",
"tags",
"title",
"author",
"authors",
"publisher",
"year",
"isbn",
"language",
"tags",
}:
continue
vv = str(v or "").strip()
if not vv:
continue
ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_")
ns = re.sub(r"[^a-z0-9]+",
"_",
lk).strip("_")
if ns:
result.tag.add(f"libgen_{ns}:{vv}")
# Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
if (
("/edition.php" in low)
or ("/file.php" in low)
or ("/series.php" in low)
):
if (("/edition.php" in low) or ("/file.php" in low)
or ("/series.php" in low)):
html = _fetch_libgen_details_html(target)
if html:
meta = _parse_libgen_details_html(html)
@@ -953,11 +977,12 @@ class Libgen(Provider):
extracted_title = str(meta.get("title") or "").strip()
extracted_isbns = (
meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
meta.get("isbn")
if isinstance(meta.get("isbn"),
list) else []
)
extracted_isbns = [
str(x).strip()
for x in (extracted_isbns or [])
str(x).strip() for x in (extracted_isbns or [])
if str(x).strip()
]
@@ -967,7 +992,9 @@ class Libgen(Provider):
if extracted_isbns:
md["isbn"] = extracted_isbns
for isbn_val in extracted_isbns:
isbn_norm = str(isbn_val).strip().replace("-", "")
isbn_norm = str(isbn_val
).strip().replace("-",
"")
if isbn_norm:
result.tag.add(f"isbn:{isbn_norm}")
if meta.get("edition_id"):
@@ -987,7 +1014,8 @@ class Libgen(Provider):
if enriched_source:
md["metadata_enriched_from"] = enriched_source
if extracted_title and ((not title) or title.startswith("http")):
if extracted_title and ((not title)
or title.startswith("http")):
title = extracted_title
except Exception:
pass
@@ -1041,7 +1069,8 @@ class LibgenSearch:
self.session = session or requests.Session()
self.session.headers.update(
{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
)
@@ -1052,7 +1081,8 @@ class LibgenSearch:
limit: int,
*,
timeout: Any = DEFAULT_TIMEOUT,
) -> List[Dict[str, Any]]:
) -> List[Dict[str,
Any]]:
"""Search libgen.rs/is/st JSON API when available.
Many LibGen mirrors expose /json.php which is less brittle than scraping.
@@ -1060,7 +1090,9 @@ class LibgenSearch:
url = f"{mirror}/json.php"
params = {
"req": query,
"res": max(1, min(100, int(limit) if limit else 50)),
"res": max(1,
min(100,
int(limit) if limit else 50)),
"column": "def",
"phrase": 1,
}
@@ -1086,7 +1118,8 @@ class LibgenSearch:
pages = item.get("Pages") or item.get("pages") or ""
language = item.get("Language") or item.get("language") or ""
size = item.get("Size") or item.get("size") or item.get("filesize") or ""
extension = item.get("Extension") or item.get("extension") or item.get("ext") or ""
extension = item.get("Extension") or item.get("extension"
) or item.get("ext") or ""
md5 = item.get("MD5") or item.get("md5") or ""
download_link = f"http://library.lol/main/{md5}" if md5 else ""
@@ -1121,7 +1154,8 @@ class LibgenSearch:
total_timeout: float = DEFAULT_SEARCH_TOTAL_TIMEOUT,
log_info: LogFn = None,
log_error: ErrorFn = None,
) -> List[Dict[str, Any]]:
) -> List[Dict[str,
Any]]:
"""Search LibGen mirrors.
Uses a total time budget across mirrors to avoid long hangs.
@@ -1135,7 +1169,10 @@ class LibgenSearch:
elapsed = time.monotonic() - started
remaining = total_timeout - elapsed
if remaining <= 0:
_call(log_error, f"[libgen] Search timed out after {total_timeout:.0f}s")
_call(
log_error,
f"[libgen] Search timed out after {total_timeout:.0f}s"
)
break
# Bound each request so we can try multiple mirrors within the budget.
@@ -1152,7 +1189,10 @@ class LibgenSearch:
results: List[Dict[str, Any]] = []
try:
results = self._search_libgen_json(
mirror, query, limit, timeout=request_timeout
mirror,
query,
limit,
timeout=request_timeout
)
except Exception:
results = []
@@ -1163,11 +1203,17 @@ class LibgenSearch:
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(
mirror, query, limit, timeout=request_timeout
mirror,
query,
limit,
timeout=request_timeout
)
else:
results = self._search_libgen_rs(
mirror, query, limit, timeout=request_timeout
mirror,
query,
limit,
timeout=request_timeout
)
if results:
@@ -1192,7 +1238,8 @@ class LibgenSearch:
limit: int,
*,
timeout: Any = DEFAULT_TIMEOUT,
) -> List[Dict[str, Any]]:
) -> List[Dict[str,
Any]]:
"""Search libgen.rs/is/st style mirrors."""
url = f"{mirror}/search.php"
params = {
@@ -1211,7 +1258,8 @@ class LibgenSearch:
return []
def _text(el: Any) -> str:
return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
return " ".join([t.strip() for t in el.itertext()
if t and str(t).strip()]).strip()
try:
doc = lxml_html.fromstring(resp.content)
@@ -1314,13 +1362,16 @@ class LibgenSearch:
limit: int,
*,
timeout: Any = DEFAULT_TIMEOUT,
) -> List[Dict[str, Any]]:
) -> List[Dict[str,
Any]]:
"""Search libgen.li/gl style mirrors."""
url = f"{mirror}/index.php"
params = {
"req": query,
# Keep the request lightweight; covers slow the HTML response.
"res": max(1, min(100, int(limit) if limit else 50)),
"res": max(1,
min(100,
int(limit) if limit else 50)),
"covers": "off",
"filesuns": "all",
}
@@ -1332,7 +1383,8 @@ class LibgenSearch:
return []
def _text(el: Any) -> str:
return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
return " ".join([t.strip() for t in el.itertext()
if t and str(t).strip()]).strip()
try:
doc = lxml_html.fromstring(resp.content)
@@ -1414,7 +1466,10 @@ class LibgenSearch:
# Extract ISBNs from meta cell (avoid using them as title)
# Matches 10 or 13-digit ISBN with optional leading 978/979.
isbn_candidates = re.findall(r"\b(?:97[89])?\d{9}[\dXx]\b", meta_text)
isbn_candidates = re.findall(
r"\b(?:97[89])?\d{9}[\dXx]\b",
meta_text
)
if isbn_candidates:
seen: List[str] = []
for s in isbn_candidates:
@@ -1453,7 +1508,8 @@ class LibgenSearch:
best_score: Optional[tuple] = None
for cand in deduped:
low = cand.lower().strip()
if low in {"cover", "edition"}:
if low in {"cover",
"edition"}:
continue
if _looks_like_isbn_blob(cand):
continue
@@ -1527,7 +1583,8 @@ def search_libgen(
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> List[Dict[str, Any]]:
) -> List[Dict[str,
Any]]:
"""Search Libgen using the robust scraper."""
searcher = LibgenSearch(session=session)
try:
@@ -1572,7 +1629,9 @@ def _resolve_download_url(
# Handle edition -> file links.
m = re.search(
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']',
html,
flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
@@ -1581,7 +1640,9 @@ def _resolve_download_url(
# Handle series -> edition links.
m = re.search(
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']',
html,
flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
@@ -1611,7 +1672,11 @@ def _resolve_download_url(
return urljoin(base_url, href)
# Next: library.lol main links.
m = re.search(r'href=["\']([^"\']*library\.lol[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*library\.lol[^"\']*)["\']',
html,
flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
@@ -1632,7 +1697,8 @@ def _resolve_download_url(
def _find_href_by_text(doc: Any, pattern: str) -> Optional[str]:
for a in doc.xpath("//a[@href]"):
t = " ".join([s.strip() for s in a.itertext() if s and str(s).strip()]).strip()
t = " ".join([s.strip() for s in a.itertext()
if s and str(s).strip()]).strip()
if t and re.search(pattern, t, re.IGNORECASE):
href = str(a.get("href") or "").strip()
if href and not href.lower().startswith("javascript:"):
@@ -1646,9 +1712,13 @@ def _resolve_download_url(
_call(log_info, f"[resolve] Checking: {current_url}")
if current_url.lower().endswith(
(".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")
):
if current_url.lower().endswith((".pdf",
".epub",
".mobi",
".djvu",
".azw3",
".cbz",
".cbr")):
return current_url
try:
@@ -1676,7 +1746,10 @@ def _resolve_download_url(
if next_url:
current_url = next_url
continue
_call(log_info, "[resolve] lxml not available and regex resolver found no links")
_call(
log_info,
"[resolve] lxml not available and regex resolver found no links"
)
return None
get_href = _find_href_by_text(doc, r"^GET$")
@@ -1722,12 +1795,16 @@ def _resolve_download_url(
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
def _guess_filename_extension(download_url: str,
headers: Dict[str,
str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)",
content_disposition,
flags=re.IGNORECASE
)
if match:
filename = unquote(match.group(1).strip('"'))
@@ -1787,8 +1864,11 @@ def download_from_mirror(
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
progress_callback: Optional[Callable[[int,
int],
None]] = None,
) -> Tuple[bool,
Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
@@ -1807,7 +1887,8 @@ def download_from_mirror(
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
headers: Dict[str,
str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()

View File

@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional
from API.loc import LOCClient
from ProviderCore.base import Provider, SearchResult
from cli_syntax import get_free_text, parse_query
from SYS.cli_syntax import get_free_text, parse_query
from SYS.logger import log
@@ -23,13 +23,16 @@ class LOC(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
_ = kwargs
parsed = parse_query(query or "")
text = get_free_text(parsed).strip()
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
fields = parsed.get("fields",
{}) if isinstance(parsed,
dict) else {}
# Allow explicit q: override.
q = str(fields.get("q") or text or "").strip()
@@ -37,7 +40,8 @@ class LOC(Provider):
return []
# Pass through any extra filters supported by the LoC API.
extra: Dict[str, Any] = {}
extra: Dict[str,
Any] = {}
if isinstance(filters, dict):
extra.update(filters)
if isinstance(fields, dict):
@@ -57,7 +61,10 @@ class LOC(Provider):
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(
q, start=start, count=page_size, extra_params=extra
q,
start=start,
count=page_size,
extra_params=extra
)
items = payload.get("results")
if not isinstance(items, list) or not items:
@@ -108,10 +115,14 @@ class LOC(Provider):
annotations=annotations,
media_kind="document",
columns=[
("Title", title),
("Date", date),
("Format", fmt_text),
("URL", url),
("Title",
title),
("Date",
date),
("Format",
fmt_text),
("URL",
url),
],
full_metadata=it,
)

View File

@@ -11,8 +11,9 @@ import requests
from ProviderCore.base import Provider
_MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
_MATRIX_INIT_CHECK_CACHE: Dict[str,
Tuple[bool,
Optional[str]]] = {}
def _sniff_mime_from_header(path: Path) -> Optional[str]:
@@ -79,9 +80,10 @@ def _sniff_mime_from_header(path: Path) -> Optional[str]:
return None
def _classify_matrix_upload(
path: Path, *, explicit_mime_type: Optional[str] = None
) -> Tuple[str, str]:
def _classify_matrix_upload(path: Path,
*,
explicit_mime_type: Optional[str] = None) -> Tuple[str,
str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
@@ -94,9 +96,11 @@ def _classify_matrix_upload(
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a", ".aac"}:
if ext in {".m4a",
".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv", ".webm"}:
if ext in {".mkv",
".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
@@ -142,7 +146,13 @@ def _classify_matrix_upload(
".3gp",
".ogv",
}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
image_exts = {".jpg",
".jpeg",
".png",
".gif",
".webp",
".bmp",
".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
@@ -162,9 +172,10 @@ def _normalize_homeserver(value: str) -> str:
return text.rstrip("/")
def _matrix_health_check(
*, homeserver: str, access_token: Optional[str]
) -> Tuple[bool, Optional[str]]:
def _matrix_health_check(*,
homeserver: str,
access_token: Optional[str]) -> Tuple[bool,
Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
- Always checks `/versions` (no auth).
@@ -180,9 +191,13 @@ def _matrix_health_check(
return False, f"Homeserver returned {resp.status_code}"
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
headers = {
"Authorization": f"Bearer {access_token}"
}
resp = requests.get(
f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5
f"{base}/_matrix/client/v3/account/whoami",
headers=headers,
timeout=5
)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
@@ -201,9 +216,10 @@ class Matrix(Provider):
self._init_reason: Optional[str] = None
matrix_conf = (
self.config.get("provider", {}).get("matrix", {})
if isinstance(self.config, dict)
else {}
self.config.get("provider",
{}).get("matrix",
{}) if isinstance(self.config,
dict) else {}
)
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
@@ -237,14 +253,18 @@ class Matrix(Provider):
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider", {}).get("matrix", {})
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
return bool(
matrix_conf.get("homeserver")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
@@ -262,8 +282,14 @@ class Matrix(Provider):
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
headers = {
"Authorization": f"Bearer {token}"
}
resp = requests.get(
f"{base}/_matrix/client/v3/joined_rooms",
headers=headers,
timeout=10
)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
@@ -275,18 +301,24 @@ class Matrix(Provider):
out.append(rid.strip())
return out
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
def list_rooms(self,
*,
room_ids: Optional[List[str]] = None) -> List[Dict[str,
Any]]:
"""Return joined rooms, optionally limited to a subset.
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
headers = {
"Authorization": f"Bearer {token}"
}
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
allowed = {str(v).strip().casefold()
for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
@@ -319,7 +351,10 @@ class Matrix(Provider):
name = maybe
except Exception:
pass
out.append({"room_id": room_id, "name": name})
out.append({
"room_id": room_id,
"name": name
})
return out
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
@@ -349,10 +384,17 @@ class Matrix(Provider):
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(
handle, total_bytes=int(path.stat().st_size), label="upload"
handle,
total_bytes=int(path.stat().st_size),
label="upload"
)
resp = requests.post(
upload_url, headers=headers, data=wrapped, params={"filename": filename}
upload_url,
headers=headers,
data=wrapped,
params={
"filename": filename
}
)
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
@@ -366,7 +408,7 @@ class Matrix(Provider):
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://") :]
rest = curi[len("mxc://"):]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
@@ -376,14 +418,24 @@ class Matrix(Provider):
except Exception:
download_url_for_store = ""
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
info = {
"mimetype": mime_type,
"size": path.stat().st_size
}
payload = {
"msgtype": msgtype,
"body": filename,
"url": content_uri,
"info": info
}
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
send_headers = {
"Authorization": f"Bearer {token}"
}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
@@ -391,8 +443,7 @@ class Matrix(Provider):
event_id = (send_resp.json() or {}).get("event_id")
link = (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
if event_id else f"https://matrix.to/#/{room_id}"
)
# Optional: if a PipeObject is provided and it already has store+hash,
@@ -403,7 +454,10 @@ class Matrix(Provider):
from Store import Store
# Prefer the direct media download URL for storage backends.
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
@@ -424,8 +478,13 @@ class Matrix(Provider):
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
payload = {"msgtype": "m.text", "body": message}
send_headers = {
"Authorization": f"Bearer {token}"
}
payload = {
"msgtype": "m.text",
"body": message
}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
@@ -433,19 +492,25 @@ class Matrix(Provider):
event_id = (send_resp.json() or {}).get("event_id")
return (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
if event_id else f"https://matrix.to/#/{room_id}"
)
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
matrix_conf = self.config.get("provider",
{}).get("matrix",
{})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
self,
selected_items: List[Any],
*,
ctx: Any,
stage_is_last: bool = True,
**_kwargs: Any
) -> bool:
"""Handle Matrix room selection via `@N`.
@@ -501,7 +566,11 @@ class Matrix(Provider):
print(f"Matrix upload file missing: {file_path}")
continue
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
link = self.upload_to_room(
str(media_path),
str(room_id),
pipe_obj=pipe_obj
)
if link:
print(link)

View File

@@ -16,7 +16,6 @@ try: # Optional dependency
except ImportError: # pragma: no cover - optional
musicbrainzngs = None
try: # Optional dependency
import yt_dlp # type: ignore
except ImportError: # pragma: no cover - optional
@@ -62,9 +61,18 @@ class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API."""
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
params = {
"term": query,
"media": "music",
"entity": "song",
"limit": limit
}
try:
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
resp = requests.get(
"https://itunes.apple.com/search",
params=params,
timeout=10
)
resp.raise_for_status()
results = resp.json().get("results", [])
except Exception as exc:
@@ -77,7 +85,8 @@ class ITunesProvider(MetadataProvider):
"title": r.get("trackName"),
"artist": r.get("artistName"),
"album": r.get("collectionName"),
"year": str(r.get("releaseDate", ""))[:4],
"year": str(r.get("releaseDate",
""))[:4],
"provider": self.name,
"raw": r,
}
@@ -100,17 +109,22 @@ class OpenLibraryMetadataProvider(MetadataProvider):
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (
10,
13,
):
if query_clean.replace("-",
"").isdigit() and len(query_clean.replace("-",
"")) in (
10,
13,
):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
resp = requests.get(
"https://openlibrary.org/search.json",
params={"q": q, "limit": limit},
params={
"q": q,
"limit": limit
},
timeout=10,
)
resp.raise_for_status()
@@ -202,7 +216,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
return []
# Prefer ISBN queries when possible
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
if query_clean.replace("-",
"").isdigit() and len(query_clean.replace("-",
"")) in (10,
13):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
@@ -210,7 +227,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
try:
resp = requests.get(
"https://www.googleapis.com/books/v1/volumes",
params={"q": q, "maxResults": limit},
params={
"q": q,
"maxResults": limit
},
timeout=10,
)
resp.raise_for_status()
@@ -228,7 +248,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
year = str(published_date)[:4] if published_date else ""
identifiers_raw = info.get("industryIdentifiers") or []
identifiers: Dict[str, Optional[str]] = {"googlebooks": volume.get("id")}
identifiers: Dict[str,
Optional[str]] = {
"googlebooks": volume.get("id")
}
for ident in identifiers_raw:
if not isinstance(ident, dict):
continue
@@ -253,7 +276,8 @@ class GoogleBooksMetadataProvider(MetadataProvider):
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
"description": info.get("description",
""),
}
)
@@ -341,7 +365,8 @@ class ISBNsearchMetadataProvider(MetadataProvider):
if m_title:
title = self._strip_html_to_text(m_title.group(1))
raw_fields: Dict[str, str] = {}
raw_fields: Dict[str,
str] = {}
strong_matches = list(re.finditer(r"(?is)<strong\b[^>]*>(.*?)</strong>", html))
for idx, m in enumerate(strong_matches):
label_raw = self._strip_html_to_text(m.group(1))
@@ -354,13 +379,14 @@ class ISBNsearchMetadataProvider(MetadataProvider):
chunk_start = m.end()
# Stop at next <strong> or end of document.
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
strong_matches[idx + 1].start() if
(idx + 1) < len(strong_matches) else len(html)
)
chunk = html[chunk_start:chunk_end]
# Prefer stopping within the same paragraph when possible.
m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
if m_end:
chunk = chunk[: m_end.start()]
chunk = chunk[:m_end.start()]
val_text = self._strip_html_to_text(chunk)
if not val_text:
@@ -391,7 +417,9 @@ class ISBNsearchMetadataProvider(MetadataProvider):
authors: List[str] = []
if author_text:
# Split on common separators; keep multi-part names intact.
for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*", author_text, flags=re.IGNORECASE):
for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*",
author_text,
flags=re.IGNORECASE):
p = str(part or "").strip()
if p:
authors.append(p)
@@ -412,23 +440,28 @@ class ISBNsearchMetadataProvider(MetadataProvider):
if t and t not in isbn_tokens:
isbn_tokens.append(t)
item: Dict[str, Any] = {
"title": title or "",
# Keep UI columns compatible with the generic metadata table.
"artist": ", ".join(authors) if authors else "",
"album": publisher or "",
"year": year or "",
"provider": self.name,
"authors": authors,
"publisher": publisher or "",
"language": language or "",
"pages": pages or "",
"identifiers": {
"isbn_13": next((t for t in isbn_tokens if len(t) == 13), None),
"isbn_10": next((t for t in isbn_tokens if len(t) == 10), None),
},
"raw_fields": raw_fields,
}
item: Dict[str,
Any] = {
"title": title or "",
# Keep UI columns compatible with the generic metadata table.
"artist": ", ".join(authors) if authors else "",
"album": publisher or "",
"year": year or "",
"provider": self.name,
"authors": authors,
"publisher": publisher or "",
"language": language or "",
"pages": pages or "",
"identifiers": {
"isbn_13":
next((t for t in isbn_tokens if len(t) == 13),
None),
"isbn_10":
next((t for t in isbn_tokens if len(t) == 10),
None),
},
"raw_fields": raw_fields,
}
# Only return usable items.
if not item.get("title") and not any(item["identifiers"].values()):
@@ -495,7 +528,10 @@ class MusicBrainzMetadataProvider(MetadataProvider):
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
if not musicbrainzngs:
log("musicbrainzngs is not installed; skipping MusicBrainz scrape", file=sys.stderr)
log(
"musicbrainzngs is not installed; skipping MusicBrainz scrape",
file=sys.stderr
)
return []
q = (query or "").strip()
@@ -526,12 +562,15 @@ class MusicBrainzMetadataProvider(MetadataProvider):
if isinstance(artist_credit, list) and artist_credit:
first = artist_credit[0]
if isinstance(first, dict):
artist = first.get("name") or first.get("artist", {}).get("name", "")
artist = first.get("name") or first.get("artist",
{}).get("name",
"")
elif isinstance(first, str):
artist = first
album = ""
release_list = rec.get("release-list") or rec.get("releases") or rec.get("release")
release_list = rec.get("release-list") or rec.get("releases"
) or rec.get("release")
if isinstance(release_list, list) and release_list:
first_rel = release_list[0]
if isinstance(first_rel, dict):
@@ -634,7 +673,8 @@ class YtdlpMetadataProvider(MetadataProvider):
upload_date = str(info.get("upload_date") or "")
release_date = str(info.get("release_date") or "")
year = (release_date or upload_date)[:4] if (release_date or upload_date) else ""
year = (release_date
or upload_date)[:4] if (release_date or upload_date) else ""
# Provide basic columns for the standard metadata selection table.
# NOTE: This is best-effort; many extractors don't provide artist/album.
@@ -716,15 +756,16 @@ class YtdlpMetadataProvider(MetadataProvider):
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"itunes": ITunesProvider,
"openlibrary": OpenLibraryMetadataProvider,
"googlebooks": GoogleBooksMetadataProvider,
"google": GoogleBooksMetadataProvider,
"isbnsearch": ISBNsearchMetadataProvider,
"musicbrainz": MusicBrainzMetadataProvider,
"ytdlp": YtdlpMetadataProvider,
}
_METADATA_PROVIDERS: Dict[str,
Type[MetadataProvider]] = {
"itunes": ITunesProvider,
"openlibrary": OpenLibraryMetadataProvider,
"googlebooks": GoogleBooksMetadataProvider,
"google": GoogleBooksMetadataProvider,
"isbnsearch": ISBNsearchMetadataProvider,
"musicbrainz": MusicBrainzMetadataProvider,
"ytdlp": YtdlpMetadataProvider,
}
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
@@ -732,7 +773,8 @@ def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
availability: Dict[str,
bool] = {}
for name, cls in _METADATA_PROVIDERS.items():
try:
_ = cls(config)
@@ -743,9 +785,10 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
return availability
def get_metadata_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[MetadataProvider]:
def get_metadata_provider(name: str,
config: Optional[Dict[str,
Any]] = None
) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None

View File

@@ -19,7 +19,7 @@ import requests
from API.HTTP import HTTPClient
from ProviderCore.base import Provider, SearchResult
from ProviderCore.download import download_file, sanitize_filename
from cli_syntax import get_field, get_free_text, parse_query
from SYS.cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import debug, log
from SYS.utils import unique_path
@@ -52,7 +52,9 @@ def _image_paths_to_pdf_bytes(images: List[str]) -> Optional[bytes]:
continue
with Image.open(img_path) as im: # type: ignore[attr-defined]
# Ensure PDF-compatible mode.
if im.mode in {"RGBA", "LA", "P"}:
if im.mode in {"RGBA",
"LA",
"P"}:
im = im.convert("RGB")
else:
im = im.convert("RGB")
@@ -125,7 +127,8 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
"""Return (lendable, status_text) using OpenLibrary volumes API."""
try:
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"):
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith(
"M"):
return False, "not-an-edition"
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
@@ -155,7 +158,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
def _resolve_archive_id(
session: requests.Session, edition_id: str, ia_candidates: List[str]
session: requests.Session,
edition_id: str,
ia_candidates: List[str]
) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
@@ -165,7 +170,10 @@ def _resolve_archive_id(
# Otherwise query the edition JSON.
try:
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
resp = session.get(
f"https://openlibrary.org/books/{edition_id}.json",
timeout=6
)
resp.raise_for_status()
data = resp.json() or {}
@@ -206,13 +214,19 @@ def _archive_id_from_url(url: str) -> str:
# - /details/<id>/...
# - /borrow/<id>
# - /download/<id>/...
if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
if len(parts) >= 2 and parts[0].lower() in {"details",
"borrow",
"download",
"stream"}:
return str(parts[1]).strip()
# Sometimes the identifier is the first segment.
if len(parts) >= 1:
first = str(parts[0]).strip()
if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
if first and first.lower() not in {"account",
"services",
"search",
"advancedsearch.php"}:
return first
return ""
@@ -249,14 +263,17 @@ def _coerce_archive_field_list(value: Any) -> List[str]:
return [s] if s else []
def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any]) -> List[str]:
def _archive_item_metadata_to_tags(archive_id: str,
item_metadata: Dict[str,
Any]) -> List[str]:
"""Map Archive.org metadata JSON (the `metadata` object) to tag strings.
This is intentionally best-effort and conservative: it focuses on stable,
useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
"""
archive_id_clean = str(archive_id or "").strip()
meta = item_metadata if isinstance(item_metadata, dict) else {}
meta = item_metadata if isinstance(item_metadata,
dict) else {}
tags: List[str] = []
seen: set[str] = set()
@@ -374,7 +391,10 @@ def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any
return tags
def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[str, Any]:
def _fetch_archive_item_metadata(archive_id: str,
*,
timeout: int = 8) -> Dict[str,
Any]:
ident = str(archive_id or "").strip()
if not ident:
return {}
@@ -384,7 +404,8 @@ def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[s
if not isinstance(data, dict):
return {}
meta = data.get("metadata")
return meta if isinstance(meta, dict) else {}
return meta if isinstance(meta,
dict) else {}
class OpenLibrary(Provider):
@@ -404,7 +425,9 @@ class OpenLibrary(Provider):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
@staticmethod
def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
def _credential_archive(config: Dict[str,
Any]) -> Tuple[Optional[str],
Optional[str]]:
"""Get Archive.org email/password from config.
Supports:
@@ -415,9 +438,11 @@ class OpenLibrary(Provider):
if not isinstance(config, dict):
return None, None
provider_config = config.get("provider", {})
provider_config = config.get("provider",
{})
if isinstance(provider_config, dict):
openlibrary_config = provider_config.get("openlibrary", {})
openlibrary_config = provider_config.get("openlibrary",
{})
if isinstance(openlibrary_config, dict):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
@@ -456,7 +481,10 @@ class OpenLibrary(Provider):
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
session = requests.Session()
token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
token_resp = session.get(
"https://archive.org/services/account/login/",
timeout=30
)
try:
token_json = token_resp.json()
except Exception as exc:
@@ -473,8 +501,14 @@ class OpenLibrary(Provider):
if not token:
raise RuntimeError("Archive login token missing")
headers = {"Content-Type": "application/x-www-form-urlencoded"}
payload = {"username": email, "password": password, "t": token}
headers = {
"Content-Type": "application/x-www-form-urlencoded"
}
payload = {
"username": email,
"password": password,
"t": token
}
login_resp = session.post(
"https://archive.org/services/account/login/",
@@ -499,22 +533,34 @@ class OpenLibrary(Provider):
@classmethod
def _archive_loan(
cls, session: requests.Session, book_id: str, *, verbose: bool = True
cls,
session: requests.Session,
book_id: str,
*,
verbose: bool = True
) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
data = {
"action": "grant_access",
"identifier": book_id
}
session.post(
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
"https://archive.org/services/loans/loan/searchInside.php",
data=data,
timeout=30
)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
response = session.post(
"https://archive.org/services/loans/loan/",
data=data,
timeout=30
)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if (
err
== "This book is not available to borrow at this time. Please try again later."
):
if (err ==
"This book is not available to borrow at this time. Please try again later."
):
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
@@ -523,15 +569,26 @@ class OpenLibrary(Provider):
raise RuntimeError("The book cannot be borrowed")
data["action"] = "create_token"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
response = session.post(
"https://archive.org/services/loans/loan/",
data=data,
timeout=30
)
if "token" in (response.text or ""):
return session
raise RuntimeError("Something went wrong when trying to borrow the book")
@staticmethod
def _archive_return_loan(session: requests.Session, book_id: str) -> None:
data = {"action": "return_loan", "identifier": book_id}
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
data = {
"action": "return_loan",
"identifier": book_id
}
response = session.post(
"https://archive.org/services/loans/loan/",
data=data,
timeout=30
)
if response.status_code == 200:
try:
if (response.json() or {}).get("success"):
@@ -551,8 +608,8 @@ class OpenLibrary(Provider):
if session is None:
return
for url in (
"https://archive.org/account/logout",
"https://archive.org/account/logout.php",
"https://archive.org/account/logout",
"https://archive.org/account/logout.php",
):
try:
resp = session.get(url, timeout=15, allow_redirects=True)
@@ -579,7 +636,9 @@ class OpenLibrary(Provider):
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
resp.raise_for_status()
data = resp.json() if resp is not None else {}
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
meta = data.get("metadata",
{}) if isinstance(data,
dict) else {}
collection = meta.get("collection") if isinstance(meta, dict) else None
values: List[str] = []
@@ -588,16 +647,20 @@ class OpenLibrary(Provider):
elif isinstance(collection, str):
values = [collection.strip().lower()]
if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
if any(v in {"inlibrary",
"printdisabled",
"lendinglibrary"} for v in values):
return True, "archive-collection"
return False, "archive-not-lendable"
except Exception:
return False, "archive-metadata-error"
@staticmethod
def _archive_get_book_infos(
session: requests.Session, url: str
) -> Tuple[str, List[str], Dict[str, Any]]:
def _archive_get_book_infos(session: requests.Session,
url: str) -> Tuple[str,
List[str],
Dict[str,
Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
@@ -620,7 +683,8 @@ class OpenLibrary(Provider):
metadata = data.get("metadata") or {}
links: List[str] = []
br_data = (data.get("brOptions") or {}).get("data", [])
br_data = (data.get("brOptions") or {}).get("data",
[])
if isinstance(br_data, list):
for item in br_data:
if isinstance(item, list):
@@ -639,7 +703,11 @@ class OpenLibrary(Provider):
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
@staticmethod
def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
def _archive_deobfuscate_image(
image_data: bytes,
link: str,
obf_header: str
) -> bytes:
if not AES or not Counter:
raise RuntimeError("Crypto library not available")
@@ -657,11 +725,18 @@ class OpenLibrary(Provider):
counter_bytes = base64.b64decode(counter_b64)
if len(counter_bytes) != 16:
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
raise ValueError(
f"Expected counter to be 16 bytes, got {len(counter_bytes)}"
)
prefix = counter_bytes[:8]
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
ctr = Counter.new(
64,
prefix=prefix,
initial_value=initial_value,
little_endian=False
) # type: ignore
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
decrypted_part = cipher.decrypt(image_data[:1024])
@@ -699,7 +774,11 @@ class OpenLibrary(Provider):
image = cls._archive_image_name(pages, i, directory)
obf_header = response.headers.get("X-Obfuscate")
if obf_header:
image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
image_content = cls._archive_deobfuscate_image(
response.content,
link,
obf_header
)
else:
image_content = response.content
@@ -715,7 +794,9 @@ class OpenLibrary(Provider):
links: List[str],
scale: int,
book_id: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
progress_callback: Optional[Callable[[int,
int],
None]] = None,
) -> List[str]:
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
pages = len(links_scaled)
@@ -748,7 +829,8 @@ class OpenLibrary(Provider):
except Exception:
pass
elif tqdm:
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
for _ in tqdm(futures.as_completed(tasks),
total=len(tasks)): # type: ignore
pass
else:
for _ in futures.as_completed(tasks):
@@ -770,11 +852,16 @@ class OpenLibrary(Provider):
if not isinstance(file_info, dict):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
if filename.endswith(".pdf") and file_info.get("source"
) == "original":
pdf_url = (
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
)
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
check_response = requests.head(
pdf_url,
timeout=4,
allow_redirects=True
)
if check_response.status_code == 200:
return True, pdf_url
return False, ""
@@ -927,7 +1014,8 @@ class OpenLibrary(Provider):
author_key = None
if isinstance(author, dict):
if isinstance(author.get("author"), dict):
author_key = author.get("author", {}).get("key")
author_key = author.get("author",
{}).get("key")
if not author_key:
author_key = author.get("key")
@@ -937,7 +1025,9 @@ class OpenLibrary(Provider):
with HTTPClient(timeout=10) as client:
author_resp = client.get(author_url)
author_resp.raise_for_status()
author_data = json_module.loads(author_resp.content.decode("utf-8"))
author_data = json_module.loads(
author_resp.content.decode("utf-8")
)
if isinstance(author_data, dict) and author_data.get("name"):
new_tags.append(f"author:{author_data['name']}")
continue
@@ -1011,7 +1101,8 @@ class OpenLibrary(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
@@ -1032,7 +1123,10 @@ class OpenLibrary(Provider):
try:
resp = self._session.get(
"https://openlibrary.org/search.json",
params={"q": q, "limit": int(limit)},
params={
"q": q,
"limit": int(limit)
},
timeout=10,
)
resp.raise_for_status()
@@ -1048,9 +1142,13 @@ class OpenLibrary(Provider):
# Availability enrichment can be slow if done sequentially (it may require multiple
# network calls per row). Do it concurrently to keep the pipeline responsive.
docs = docs[: int(limit)]
docs = docs[:int(limit)]
def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
def _compute_availability(doc_dict: Dict[str,
Any]) -> Tuple[str,
str,
str,
str]:
edition_id_local = _resolve_edition_id(doc_dict)
if not edition_id_local:
return "no-olid", "", "", ""
@@ -1066,7 +1164,9 @@ class OpenLibrary(Provider):
try:
archive_id_local = _resolve_archive_id(
session_local, edition_id_local, ia_ids_local
session_local,
edition_id_local,
ia_ids_local
)
except Exception:
archive_id_local = ""
@@ -1089,17 +1189,23 @@ class OpenLibrary(Provider):
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [
("unknown", "", "", "") for _ in range(len(docs))
]
availability_rows: List[Tuple[str,
str,
str,
str]] = [
("unknown",
"",
"",
"") for _ in range(len(docs))
]
if docs:
max_workers = min(8, max(1, len(docs)))
done = 0
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_index = {
executor.submit(_compute_availability, doc_dict): i
for i, doc_dict in enumerate(docs)
if isinstance(doc_dict, dict)
executor.submit(_compute_availability,
doc_dict): i
for i, doc_dict in enumerate(docs) if isinstance(doc_dict, dict)
}
for fut in futures.as_completed(list(future_to_index.keys())):
i = future_to_index[fut]
@@ -1145,11 +1251,16 @@ class OpenLibrary(Provider):
isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")
columns = [
("Title", book_title),
("Author", ", ".join(authors_list)),
("Year", year),
("Avail", ""),
("OLID", edition_id),
("Title",
book_title),
("Author",
", ".join(authors_list)),
("Year",
year),
("Avail",
""),
("OLID",
edition_id),
]
# Determine availability using the concurrently computed enrichment.
@@ -1170,7 +1281,8 @@ class OpenLibrary(Provider):
annotations.append(f"isbn_10:{isbn_10}")
if ia_ids:
annotations.append("archive")
if availability in {"download", "borrow"}:
if availability in {"download",
"borrow"}:
annotations.append(availability)
results.append(
@@ -1178,17 +1290,17 @@ class OpenLibrary(Provider):
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}"
if edition_id
else (
f"https://openlibrary.org/books/{edition_id}" if edition_id else
(
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/")
else "https://openlibrary.org"
if isinstance(work_key,
str) and work_key.startswith("/") else
"https://openlibrary.org"
)
),
detail=(
(f"By: {', '.join(authors_list)}" if authors_list else "")
+ (f" ({year})" if year else "")
(f"By: {', '.join(authors_list)}" if authors_list else "") +
(f" ({year})" if year else "")
).strip(),
annotations=annotations,
media_kind="book",
@@ -1216,7 +1328,11 @@ class OpenLibrary(Provider):
self,
result: SearchResult,
output_dir: Path,
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
progress_callback: Optional[Callable[[str,
int,
Optional[int],
str],
None]] = None,
) -> Optional[Path]:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
@@ -1245,7 +1361,10 @@ class OpenLibrary(Provider):
archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
if not archive_id:
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
log(
"[openlibrary] No archive identifier available; cannot download",
file=sys.stderr
)
return None
# Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
@@ -1290,12 +1409,9 @@ class OpenLibrary(Provider):
session=self._session,
progress_callback=(
(
lambda downloaded, total, label: progress_callback(
"bytes", downloaded, total, label
)
)
if progress_callback is not None
else None
lambda downloaded, total, label:
progress_callback("bytes", downloaded, total, label)
) if progress_callback is not None else None
),
)
if ok:
@@ -1307,7 +1423,10 @@ class OpenLibrary(Provider):
try:
email, password = self._credential_archive(self.config or {})
if not email or not password:
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
log(
"[openlibrary] Archive credentials missing; cannot borrow",
file=sys.stderr
)
return None
lendable = True
@@ -1369,7 +1488,10 @@ class OpenLibrary(Provider):
continue
if not links:
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
log(
f"[openlibrary] Failed to extract pages: {last_exc}",
file=sys.stderr
)
return None
try:
@@ -1388,9 +1510,10 @@ class OpenLibrary(Provider):
scale=3,
book_id=archive_id,
progress_callback=(
(lambda done, total: progress_callback("pages", done, total, "pages"))
if progress_callback is not None
else None
(
lambda done, total:
progress_callback("pages", done, total, "pages")
) if progress_callback is not None else None
),
)
@@ -1436,7 +1559,10 @@ class OpenLibrary(Provider):
try:
self._archive_return_loan(session, archive_id)
except Exception as exc:
log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
log(
f"[openlibrary] Warning: failed to return loan: {exc}",
file=sys.stderr
)
try:
self._archive_logout(session)
except Exception:

View File

@@ -15,7 +15,6 @@ from ProviderCore.base import Provider, SearchResult
from SYS.logger import log, debug
from models import ProgressBar
_SOULSEEK_NOISE_SUBSTRINGS = (
"search reply ticket does not match any search request",
"failed to receive transfer ticket on file connection",
@@ -82,11 +81,11 @@ def _configure_aioslsk_logging() -> None:
aioslsk to ERROR and stop propagation so it doesn't spam the CLI.
"""
for name in (
"aioslsk",
"aioslsk.network",
"aioslsk.search",
"aioslsk.transfer",
"aioslsk.transfer.manager",
"aioslsk",
"aioslsk.network",
"aioslsk.search",
"aioslsk.transfer",
"aioslsk.transfer.manager",
):
logger = logging.getLogger(name)
logger.setLevel(logging.ERROR)
@@ -237,7 +236,7 @@ class Soulseek(Provider):
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
try:
from config import get_soulseek_username, get_soulseek_password
from SYS.config import get_soulseek_username, get_soulseek_password
user = get_soulseek_username(self.config)
pwd = get_soulseek_password(self.config)
@@ -257,7 +256,10 @@ class Soulseek(Provider):
filename = full_metadata.get("filename") or result.path
if not username or not filename:
log(f"[soulseek] Missing metadata for download: {result.title}", file=sys.stderr)
log(
f"[soulseek] Missing metadata for download: {result.title}",
file=sys.stderr
)
return None
# This cmdlet stack is synchronous; use asyncio.run for clarity.
@@ -294,9 +296,11 @@ class Soulseek(Provider):
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
return None
async def perform_search(
self, query: str, timeout: float = 9.0, limit: int = 50
) -> List[Dict[str, Any]]:
async def perform_search(self,
query: str,
timeout: float = 9.0,
limit: int = 50) -> List[Dict[str,
Any]]:
"""Perform async Soulseek search."""
from aioslsk.client import SoulSeekClient
@@ -305,7 +309,10 @@ class Soulseek(Provider):
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
settings = Settings(
credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)
credentials=CredentialsSettings(
username=self.USERNAME,
password=self.PASSWORD
)
)
client = SoulSeekClient(settings)
@@ -315,7 +322,10 @@ class Soulseek(Provider):
await client.start()
await client.login()
except Exception as exc:
log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr)
log(
f"[soulseek] Login failed: {type(exc).__name__}: {exc}",
file=sys.stderr
)
return []
try:
@@ -323,7 +333,10 @@ class Soulseek(Provider):
await self._collect_results(search_request, timeout=timeout)
return self._flatten_results(search_request)[:limit]
except Exception as exc:
log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr)
log(
f"[soulseek] Search error: {type(exc).__name__}: {exc}",
file=sys.stderr
)
return []
finally:
# Best-effort: try to cancel/close the search request before stopping
@@ -356,8 +369,12 @@ class Soulseek(Provider):
{
"file": file_data,
"username": username,
"filename": getattr(file_data, "filename", "?"),
"size": getattr(file_data, "filesize", 0),
"filename": getattr(file_data,
"filename",
"?"),
"size": getattr(file_data,
"filesize",
0),
}
)
@@ -366,14 +383,22 @@ class Soulseek(Provider):
{
"file": file_data,
"username": username,
"filename": getattr(file_data, "filename", "?"),
"size": getattr(file_data, "filesize", 0),
"filename": getattr(file_data,
"filename",
"?"),
"size": getattr(file_data,
"filesize",
0),
}
)
return flat
async def _collect_results(self, search_request: Any, timeout: float = 75.0) -> None:
async def _collect_results(
self,
search_request: Any,
timeout: float = 75.0
) -> None:
end = time.time() + timeout
last_count = 0
while time.time() < end:
@@ -387,20 +412,28 @@ class Soulseek(Provider):
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
try:
flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit))
flat_results = asyncio.run(
self.perform_search(query,
timeout=9.0,
limit=limit)
)
if not flat_results:
return []
music_results: List[dict] = []
for item in flat_results:
filename = item["filename"]
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
ext = (
"." + filename.rsplit(".",
1)[-1].lower()
) if "." in filename else ""
if ext in self.MUSIC_EXTENSIONS:
music_results.append(item)
@@ -410,18 +443,23 @@ class Soulseek(Provider):
enriched_results: List[dict] = []
for item in music_results:
filename = item["filename"]
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
ext = (
"." + filename.rsplit(".",
1)[-1].lower()
) if "." in filename else ""
display_name = filename.replace("\\", "/").split("/")[-1]
path_parts = filename.replace("\\", "/").split("/")
artist = path_parts[-3] if len(path_parts) >= 3 else ""
album = (
path_parts[-2]
if len(path_parts) >= 3
else (path_parts[-2] if len(path_parts) == 2 else "")
path_parts[-2] if len(path_parts) >= 3 else
(path_parts[-2] if len(path_parts) == 2 else "")
)
base_name = display_name.rsplit(".", 1)[0] if "." in display_name else display_name
base_name = display_name.rsplit(
".",
1
)[0] if "." in display_name else display_name
track_num = ""
title = base_name
filename_artist = ""
@@ -457,7 +495,8 @@ class Soulseek(Provider):
if artist_filter or album_filter or track_filter:
filtered: List[dict] = []
for item in enriched_results:
if artist_filter and artist_filter not in item["artist"].lower():
if artist_filter and artist_filter not in item["artist"].lower(
):
continue
if album_filter and album_filter not in item["album"].lower():
continue
@@ -466,7 +505,9 @@ class Soulseek(Provider):
filtered.append(item)
enriched_results = filtered
enriched_results.sort(key=lambda item: (item["ext"].lower() != ".flac", -item["size"]))
enriched_results.sort(
key=lambda item: (item["ext"].lower() != ".flac", -item["size"])
)
results: List[SearchResult] = []
for item in enriched_results:
@@ -475,11 +516,16 @@ class Soulseek(Provider):
size_mb = int(item["size"] / 1024 / 1024)
columns = [
("Track", item["track_num"] or "?"),
("Title", item["title"][:40]),
("Artist", artist_display[:32]),
("Album", album_display[:32]),
("Size", f"{size_mb} MB"),
("Track",
item["track_num"] or "?"),
("Title",
item["title"][:40]),
("Artist",
artist_display[:32]),
("Album",
album_display[:32]),
("Size",
f"{size_mb} MB"),
]
results.append(
@@ -488,7 +534,8 @@ class Soulseek(Provider):
title=item["title"],
path=item["filename"],
detail=f"{artist_display} - {album_display}",
annotations=[f"{size_mb} MB", item["ext"].lstrip(".").upper()],
annotations=[f"{size_mb} MB",
item["ext"].lstrip(".").upper()],
media_kind="audio",
size_bytes=item["size"],
columns=columns,
@@ -515,7 +562,7 @@ class Soulseek(Provider):
# Require configured credentials.
try:
from config import get_soulseek_username, get_soulseek_password
from SYS.config import get_soulseek_username, get_soulseek_password
user = get_soulseek_username(self.config)
pwd = get_soulseek_password(self.config)
@@ -570,10 +617,16 @@ async def download_soulseek_file(
)
settings = Settings(
credentials=CredentialsSettings(username=login_user, password=login_pass)
credentials=CredentialsSettings(username=login_user,
password=login_pass)
)
async def _attempt_once(attempt_num: int) -> tuple[Optional[Path], Any, int, float]:
async def _attempt_once(
attempt_num: int
) -> tuple[Optional[Path],
Any,
int,
float]:
client = SoulSeekClient(settings)
with _suppress_aioslsk_noise():
async with _suppress_aioslsk_asyncio_task_noise():
@@ -586,10 +639,14 @@ async def download_soulseek_file(
f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
file=sys.stderr,
)
debug(f"[soulseek] Requesting download from {username}: {filename}")
debug(
f"[soulseek] Requesting download from {username}: {filename}"
)
transfer = await client.transfers.add(
Transfer(username, filename, TransferDirection.DOWNLOAD)
Transfer(username,
filename,
TransferDirection.DOWNLOAD)
)
transfer.local_path = str(output_path)
await client.transfers.queue(transfer)
@@ -602,14 +659,29 @@ async def download_soulseek_file(
elapsed = time.time() - start_time
if elapsed > timeout:
log(
f"[soulseek] Download timeout after {timeout}s", file=sys.stderr
f"[soulseek] Download timeout after {timeout}s",
file=sys.stderr
)
bytes_done = int(
getattr(transfer,
"bytes_transfered",
0) or 0
)
state_val = getattr(
getattr(transfer,
"state",
None),
"VALUE",
None
)
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
progress_bar.finish()
return None, state_val, bytes_done, elapsed
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
bytes_done = int(
getattr(transfer,
"bytes_transfered",
0) or 0
)
total_bytes = int(getattr(transfer, "filesize", 0) or 0)
now = time.time()
if now - last_progress_time >= 0.5:
@@ -623,11 +695,18 @@ async def download_soulseek_file(
await asyncio.sleep(1)
final_state = getattr(getattr(transfer, "state", None), "VALUE", None)
final_state = getattr(
getattr(transfer,
"state",
None),
"VALUE",
None
)
downloaded_path = (
Path(transfer.local_path)
if getattr(transfer, "local_path", None)
else output_path
if getattr(transfer,
"local_path",
None) else output_path
)
final_elapsed = time.time() - start_time
@@ -636,7 +715,8 @@ async def download_soulseek_file(
# If a file was written, treat it as success even if state is odd.
try:
if downloaded_path.exists() and downloaded_path.stat().st_size > 0:
if downloaded_path.exists() and downloaded_path.stat(
).st_size > 0:
if final_state != TransferState.COMPLETE:
log(
f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
@@ -651,7 +731,8 @@ async def download_soulseek_file(
except Exception:
pass
if final_state == TransferState.COMPLETE and downloaded_path.exists():
if final_state == TransferState.COMPLETE and downloaded_path.exists(
):
debug(f"[soulseek] Download complete: {downloaded_path}")
return (
downloaded_path,
@@ -670,7 +751,8 @@ async def download_soulseek_file(
# Clean up 0-byte placeholder.
try:
if downloaded_path.exists() and downloaded_path.stat().st_size == 0:
if downloaded_path.exists() and downloaded_path.stat(
).st_size == 0:
downloaded_path.unlink(missing_ok=True)
except Exception:
pass
@@ -696,7 +778,8 @@ async def download_soulseek_file(
should_retry = (bytes_done == 0) and (elapsed < 15.0)
if attempt < max_attempts and should_retry:
log(
f"[soulseek] Retrying after fast failure (state={final_state})", file=sys.stderr
f"[soulseek] Retrying after fast failure (state={final_state})",
file=sys.stderr
)
await asyncio.sleep(2)
continue
@@ -704,7 +787,10 @@ async def download_soulseek_file(
return None
except ImportError:
log("[soulseek] aioslsk not installed. Install with: pip install aioslsk", file=sys.stderr)
log(
"[soulseek] aioslsk not installed. Install with: pip install aioslsk",
file=sys.stderr
)
return None
except Exception as exc:
log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr)

View File

@@ -12,7 +12,6 @@ from urllib.parse import urlparse
from ProviderCore.base import Provider, SearchResult
_TELEGRAM_DEFAULT_TIMESTAMP_STEM_RE = re.compile(
r"^(?P<prefix>photo|video|document|audio|voice|animation)_(?P<date>\d{4}-\d{2}-\d{2})_(?P<time>\d{2}-\d{2}-\d{2})(?: \(\d+\))?$",
flags=re.IGNORECASE,
@@ -39,7 +38,10 @@ def _unique_path(path: Path) -> Path:
return parent / f"{stem} (copy){suffix}"
def _maybe_strip_telegram_timestamped_default_filename(*, downloaded_path: Path) -> Path:
def _maybe_strip_telegram_timestamped_default_filename(
*,
downloaded_path: Path
) -> Path:
"""Normalize Telethon's default timestamped names.
Examples:
@@ -87,7 +89,8 @@ def _looks_like_telegram_message_url(url: str) -> bool:
except Exception:
return False
host = (parsed.hostname or "").lower().strip()
if host in {"t.me", "telegram.me"}:
if host in {"t.me",
"telegram.me"}:
return True
if host.endswith(".t.me"):
return True
@@ -147,9 +150,10 @@ class Telegram(Provider):
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
telegram_conf = (
self.config.get("provider", {}).get("telegram", {})
if isinstance(self.config, dict)
else {}
self.config.get("provider",
{}).get("telegram",
{}) if isinstance(self.config,
dict) else {}
)
self._app_id = telegram_conf.get("app_id")
self._api_hash = telegram_conf.get("api_hash")
@@ -178,8 +182,10 @@ class Telegram(Provider):
If an event loop is already running in this thread (common in REPL/TUI),
runs the coroutine in a worker thread with its own loop.
"""
result: Dict[str, Any] = {}
err: Dict[str, Any] = {}
result: Dict[str,
Any] = {}
err: Dict[str,
Any] = {}
def _runner() -> None:
loop = asyncio.new_event_loop()
@@ -201,7 +207,10 @@ class Telegram(Provider):
except Exception:
pass
if pending:
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
loop.run_until_complete(
asyncio.gather(*pending,
return_exceptions=True)
)
try:
loop.run_until_complete(loop.shutdown_asyncgens())
except Exception:
@@ -261,7 +270,10 @@ class Telegram(Provider):
if not legacy_session.is_file():
return
for suffix in (".session", ".session-journal", ".session-wal", ".session-shm"):
for suffix in (".session",
".session-journal",
".session-wal",
".session-shm"):
src = Path(str(legacy_base) + suffix)
dst = Path(str(new_base) + suffix)
try:
@@ -355,7 +367,10 @@ class Telegram(Provider):
except EOFError:
choice = ""
use_bot = choice in {"2", "b", "bot", "token"}
use_bot = choice in {"2",
"b",
"bot",
"token"}
bot_token = ""
if use_bot:
sys.stderr.write("[telegram] Bot token: ")
@@ -544,7 +559,9 @@ class Telegram(Provider):
app_id = int(self._app_id) if self._app_id not in (None, "") else None
except Exception:
app_id = None
api_hash = str(self._api_hash).strip() if self._api_hash not in (None, "") else ""
api_hash = str(self._api_hash
).strip() if self._api_hash not in (None,
"") else ""
if not bool(app_id and api_hash):
return False
@@ -565,10 +582,13 @@ class Telegram(Provider):
bot_token = str(self._bot_token or "").strip()
if bot_token:
return bool(
self._ensure_session_with_bot_token(bot_token) and self._session_is_authorized()
self._ensure_session_with_bot_token(bot_token)
and self._session_is_authorized()
)
if prompt:
return bool(self._ensure_session_interactive() and self._session_is_authorized())
return bool(
self._ensure_session_interactive() and self._session_is_authorized()
)
return False
def list_chats(self, *, limit: int = 200) -> list[Dict[str, Any]]:
@@ -652,12 +672,20 @@ class Telegram(Provider):
kind = "user"
else:
kind = (
type(entity).__name__.lower() if entity is not None else "unknown"
type(entity).__name__.lower()
if entity is not None else "unknown"
)
except Exception:
kind = "unknown"
rows.append({"id": chat_id, "title": title, "username": username, "type": kind})
rows.append(
{
"id": chat_id,
"title": title,
"username": username,
"type": kind
}
)
return rows
finally:
try:
@@ -672,7 +700,9 @@ class Telegram(Provider):
# Sort for stable display.
try:
rows.sort(key=lambda r: (str(r.get("type") or ""), str(r.get("title") or "")))
rows.sort(
key=lambda r: (str(r.get("type") or ""), str(r.get("title") or ""))
)
except Exception:
pass
return rows
@@ -682,7 +712,8 @@ class Telegram(Provider):
*,
chat_ids: Sequence[int],
usernames: Sequence[str],
files: Optional[Sequence[Dict[str, Any]]] = None,
files: Optional[Sequence[Dict[str,
Any]]] = None,
file_paths: Optional[Sequence[str]] = None,
) -> None:
"""Send local file(s) to one or more chats.
@@ -713,7 +744,10 @@ class Telegram(Provider):
# Back-compat: allow callers to pass `file_paths=`.
if files is None:
files = [{"path": str(p), "title": ""} for p in (file_paths or [])]
files = [{
"path": str(p),
"title": ""
} for p in (file_paths or [])]
def _sanitize_filename(text: str) -> str:
# Windows-safe plus generally safe for Telegram.
@@ -754,7 +788,10 @@ class Telegram(Provider):
title_text = str((f or {}).get("title") or "").strip()
except Exception:
title_text = ""
jobs.append({"path": str(path_obj), "title": title_text})
jobs.append({
"path": str(path_obj),
"title": title_text
})
if not jobs:
raise Exception("No files to send")
@@ -781,7 +818,9 @@ class Telegram(Provider):
try:
await client.connect()
if not bool(await client.is_user_authorized()):
raise Exception("Telegram session is not authorized. Run: .telegram -login")
raise Exception(
"Telegram session is not authorized. Run: .telegram -login"
)
# Resolve entities: prefer IDs. Only fall back to usernames when IDs are absent.
entities: list[Any] = []
@@ -826,8 +865,7 @@ class Telegram(Provider):
fallback = path_obj.stem
base = (
_sanitize_filename(title_raw)
if title_raw
else _sanitize_filename(fallback)
if title_raw else _sanitize_filename(fallback)
)
ext = path_obj.suffix
send_name = f"{base}{ext}" if ext else base
@@ -838,7 +876,11 @@ class Telegram(Provider):
if print_progress is None:
return
try:
print_progress(send_name, int(sent or 0), int(total or 0))
print_progress(
send_name,
int(sent or 0),
int(total or 0)
)
except Exception:
return
@@ -860,7 +902,11 @@ class Telegram(Provider):
finally:
if print_final_progress is not None:
try:
print_final_progress(send_name, int(file_size or 0), 0.0)
print_final_progress(
send_name,
int(file_size or 0),
0.0
)
except Exception:
pass
except Exception as exc:
@@ -900,9 +946,12 @@ class Telegram(Provider):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
def _download_message_media_sync(
self, *, url: str, output_dir: Path
) -> Tuple[Path, Dict[str, Any]]:
def _download_message_media_sync(self,
*,
url: str,
output_dir: Path) -> Tuple[Path,
Dict[str,
Any]]:
# Ensure we have an authorized session before attempting API calls.
# Never prompt during downloads.
if not self.ensure_session(prompt=False):
@@ -928,14 +977,17 @@ class Telegram(Provider):
try:
await client.connect()
if not bool(await client.is_user_authorized()):
raise Exception("Telegram session is not authorized. Run: .telegram -login")
raise Exception(
"Telegram session is not authorized. Run: .telegram -login"
)
if chat.startswith("c:"):
channel_id = int(chat.split(":", 1)[1])
entity = PeerChannel(channel_id)
else:
entity = chat
if isinstance(entity, str) and entity and not entity.startswith("@"):
if isinstance(entity,
str) and entity and not entity.startswith("@"):
entity = "@" + entity
messages = await client.get_messages(entity, ids=[message_id])
@@ -1009,7 +1061,9 @@ class Telegram(Provider):
from models import ProgressBar
progress_bar = ProgressBar()
last_print = {"t": 0.0}
last_print = {
"t": 0.0
}
def _progress(current: int, total: int) -> None:
now = time.monotonic()
@@ -1017,7 +1071,10 @@ class Telegram(Provider):
return
last_print["t"] = now
progress_bar.update(
downloaded=int(current), total=int(total), label="telegram", file=sys.stderr
downloaded=int(current),
total=int(total),
label="telegram",
file=sys.stderr
)
part_kb = self._resolve_part_size_kb(file_size)
@@ -1030,7 +1087,9 @@ class Telegram(Provider):
)
except TypeError:
downloaded = await client.download_media(
message, file=str(output_dir), progress_callback=_progress
message,
file=str(output_dir),
progress_callback=_progress
)
progress_bar.finish()
if not downloaded:
@@ -1051,27 +1110,28 @@ class Telegram(Provider):
except Exception:
date_iso = None
info: Dict[str, Any] = {
"provider": "telegram",
"source_url": url,
"chat": {
"key": chat,
"title": chat_title,
"username": chat_username,
"id": chat_id,
},
"message": {
"id": msg_id,
"date": date_iso,
"caption": caption,
},
"file": {
"name": file_name,
"mime_type": file_mime,
"size": file_size,
"downloaded_path": str(downloaded_path),
},
}
info: Dict[str,
Any] = {
"provider": "telegram",
"source_url": url,
"chat": {
"key": chat,
"title": chat_title,
"username": chat_username,
"id": chat_id,
},
"message": {
"id": msg_id,
"date": date_iso,
"caption": caption,
},
"file": {
"name": file_name,
"mime_type": file_mime,
"size": file_size,
"downloaded_path": str(downloaded_path),
},
}
return downloaded_path, info
except errors.RPCError as exc:
raise Exception(f"Telegram RPC error: {exc}")

View File

@@ -14,14 +14,20 @@ class YouTube(Provider):
self,
query: str,
limit: int = 10,
filters: Optional[Dict[str, Any]] = None,
filters: Optional[Dict[str,
Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
# Use the yt_dlp Python module (installed via requirements.txt).
try:
import yt_dlp # type: ignore
ydl_opts: Dict[str, Any] = {"quiet": True, "skip_download": True, "extract_flat": True}
ydl_opts: Dict[str,
Any] = {
"quiet": True,
"skip_download": True,
"extract_flat": True
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
search_query = f"ytsearch{limit}:{query}"
info = ydl.extract_info(search_query, download=False)
@@ -30,13 +36,16 @@ class YouTube(Provider):
for video_data in entries[:limit]:
title = video_data.get("title", "Unknown")
video_id = video_data.get("id", "")
url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}"
url = video_data.get(
"url"
) or f"https://youtube.com/watch?v={video_id}"
uploader = video_data.get("uploader", "Unknown")
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = (
f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
f"{int(duration // 60)}:{int(duration % 60):02d}"
if duration else ""
)
views_str = f"{view_count:,}" if view_count else ""
@@ -46,13 +55,18 @@ class YouTube(Provider):
title=title,
path=url,
detail=f"By: {uploader}",
annotations=[duration_str, f"{views_str} views"],
annotations=[duration_str,
f"{views_str} views"],
media_kind="video",
columns=[
("Title", title),
("Uploader", uploader),
("Duration", duration_str),
("Views", views_str),
("Title",
title),
("Uploader",
uploader),
("Duration",
duration_str),
("Views",
views_str),
],
full_metadata={
"video_id": video_id,

View File

@@ -19,15 +19,26 @@ class ZeroXZero(Provider):
raise FileNotFoundError(f"File not found: {file_path}")
try:
headers = {"User-Agent": "Medeia-Macina/1.0"}
headers = {
"User-Agent": "Medeia-Macina/1.0"
}
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
response = client.post("https://0x0.st", files={"file": wrapped})
wrapped = ProgressFileReader(
handle,
total_bytes=total,
label="upload"
)
response = client.post(
"https://0x0.st",
files={
"file": wrapped
}
)
if response.status_code == 200:
uploaded_url = response.text.strip()
@@ -36,15 +47,19 @@ class ZeroXZero(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
Store(
self.config,
suppress_debug=True
).try_add_url_for_pipe_object(pipe_obj,
uploaded_url)
except Exception:
pass
return uploaded_url
raise Exception(f"Upload failed: {response.status_code} - {response.text}")
raise Exception(
f"Upload failed: {response.status_code} - {response.text}"
)
except Exception as exc:
log(f"[0x0] Upload error: {exc}", file=sys.stderr)