df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions

View File

@@ -95,12 +95,18 @@ class AllDebrid(Provider):
return None
# Quiet mode when download-file is mid-pipeline.
quiet = bool(self.config.get("_quiet_background_output")) if isinstance(self.config, dict) else False
quiet = (
bool(self.config.get("_quiet_background_output"))
if isinstance(self.config, dict)
else False
)
unlocked_url = target
try:
unlocked = client.unlock_link(target)
if isinstance(unlocked, str) and unlocked.strip().startswith(("http://", "https://")):
if isinstance(unlocked, str) and unlocked.strip().startswith(
("http://", "https://")
):
unlocked_url = unlocked.strip()
except Exception as exc:
# Fall back to the raw link, but warn.
@@ -136,7 +142,11 @@ class AllDebrid(Provider):
try:
if downloaded_path.exists():
size = downloaded_path.stat().st_size
if size > 0 and size <= 250_000 and downloaded_path.suffix.lower() not in (".html", ".htm"):
if (
size > 0
and size <= 250_000
and downloaded_path.suffix.lower() not in (".html", ".htm")
):
head = downloaded_path.read_bytes()[:512]
try:
text = head.decode("utf-8", errors="ignore").lower()
@@ -147,7 +157,10 @@ class AllDebrid(Provider):
downloaded_path.unlink()
except Exception:
pass
log("[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.", file=sys.stderr)
log(
"[alldebrid] Download returned HTML page (not file bytes). Try again or check AllDebrid link status.",
file=sys.stderr,
)
return None
except Exception:
pass
@@ -160,7 +173,9 @@ class AllDebrid(Provider):
return None
@staticmethod
def _flatten_files(items: Any, *, _prefix: Optional[List[str]] = None) -> Iterable[Dict[str, Any]]:
def _flatten_files(
items: Any, *, _prefix: Optional[List[str]] = None
) -> Iterable[Dict[str, Any]]:
"""Flatten AllDebrid magnet file tree into file dicts, preserving relative paths.
API commonly returns:
@@ -185,17 +200,17 @@ class AllDebrid(Provider):
if not isinstance(node, dict):
continue
children = node.get('e') or node.get('children')
children = node.get("e") or node.get("children")
if isinstance(children, list):
folder_name = node.get('n') or node.get('name')
folder_name = node.get("n") or node.get("name")
next_prefix = prefix
if isinstance(folder_name, str) and folder_name.strip():
next_prefix = prefix + [folder_name.strip()]
yield from AllDebrid._flatten_files(children, _prefix=next_prefix)
continue
name = node.get('n') or node.get('name')
link = node.get('l') or node.get('link')
name = node.get("n") or node.get("name")
link = node.get("l") or node.get("link")
if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip():
rel_parts = prefix + [name.strip()]
relpath = "/".join([p for p in rel_parts if p])
@@ -253,10 +268,15 @@ class AllDebrid(Provider):
except Exception:
magnet_status = {}
magnet_name = str(magnet_status.get('filename') or magnet_status.get('name') or magnet_status.get('hash') or f"magnet-{magnet_id}")
status_code = magnet_status.get('statusCode')
status_text = str(magnet_status.get('status') or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get('ready'))
magnet_name = str(
magnet_status.get("filename")
or magnet_status.get("name")
or magnet_status.get("hash")
or f"magnet-{magnet_id}"
)
status_code = magnet_status.get("statusCode")
status_text = str(magnet_status.get("status") or "").strip() or "unknown"
ready = status_code == 4 or bool(magnet_status.get("ready"))
if not ready:
return [
@@ -280,18 +300,23 @@ class AllDebrid(Provider):
try:
files_result = client.magnet_links([magnet_id])
magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
file_tree = magnet_files.get('files', []) if isinstance(magnet_files, dict) else []
magnet_files = (
files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
)
file_tree = magnet_files.get("files", []) if isinstance(magnet_files, dict) else []
except Exception as exc:
log(f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}", file=sys.stderr)
log(
f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}",
file=sys.stderr,
)
file_tree = []
results: List[SearchResult] = []
for file_node in self._flatten_files(file_tree):
file_name = str(file_node.get('n') or file_node.get('name') or '').strip()
file_url = str(file_node.get('l') or file_node.get('link') or '').strip()
relpath = str(file_node.get('_relpath') or file_name or '').strip()
file_size = file_node.get('s') or file_node.get('size')
file_name = str(file_node.get("n") or file_node.get("name") or "").strip()
file_url = str(file_node.get("l") or file_node.get("link") or "").strip()
relpath = str(file_node.get("_relpath") or file_name or "").strip()
file_size = file_node.get("s") or file_node.get("size")
if not file_name or not file_url:
continue
@@ -356,16 +381,21 @@ class AllDebrid(Provider):
continue
try:
magnet_id = int(magnet.get('id'))
magnet_id = int(magnet.get("id"))
except Exception:
continue
magnet_name = str(magnet.get('filename') or magnet.get('name') or magnet.get('hash') or f"magnet-{magnet_id}")
magnet_name = str(
magnet.get("filename")
or magnet.get("name")
or magnet.get("hash")
or f"magnet-{magnet_id}"
)
magnet_name_lower = magnet_name.lower()
status_text = str(magnet.get('status') or "").strip() or "unknown"
status_code = magnet.get('statusCode')
ready = status_code == 4 or bool(magnet.get('ready'))
status_text = str(magnet.get("status") or "").strip() or "unknown"
status_code = magnet.get("statusCode")
ready = status_code == 4 or bool(magnet.get("ready"))
if wanted_id is not None:
if magnet_id != wanted_id:
@@ -375,7 +405,7 @@ class AllDebrid(Provider):
size_bytes: Optional[int] = None
try:
size_val = magnet.get('size')
size_val = magnet.get("size")
if isinstance(size_val, (int, float)):
size_bytes = int(size_val)
elif isinstance(size_val, str) and size_val.isdigit():
@@ -392,7 +422,8 @@ class AllDebrid(Provider):
annotations=["folder"],
media_kind="folder",
size_bytes=size_bytes,
tag={"alldebrid", "folder", str(magnet_id)} | ({"ready"} if ready else {"not-ready"}),
tag={"alldebrid", "folder", str(magnet_id)}
| ({"ready"} if ready else {"not-ready"}),
columns=[
("Folder", magnet_name),
("ID", str(magnet_id)),

View File

@@ -38,7 +38,9 @@ class Bandcamp(Provider):
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(self, page: Any, artist_url: str, limit: int = 50) -> List[SearchResult]:
def _scrape_artist_page(
self, page: Any, artist_url: str, limit: int = 50
) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
@@ -74,13 +76,15 @@ class Bandcamp(Provider):
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title") or item.query_selector(".title")
title = (title_node.inner_text().strip() if title_node else "")
title = title_node.inner_text().strip() if title_node else ""
if title:
title = " ".join(title.split())
if not title:
title = target.rsplit("/", 1)[-1]
kind = "album" if "/album/" in target else ("track" if "/track/" in target else "item")
kind = (
"album" if "/album/" in target else ("track" if "/track/" in target else "item")
)
results.append(
SearchResult(
@@ -107,7 +111,9 @@ class Bandcamp(Provider):
return results
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Bandcamp `@N` selection.
If the selected item is an ARTIST result, selecting it auto-expands into
@@ -155,12 +161,16 @@ class Bandcamp(Provider):
continue
title = str(payload.get("title") or "").strip()
url_val = str(payload.get("url") or payload.get("path") or meta.get("url") or "").strip()
url_val = str(
payload.get("url") or payload.get("path") or meta.get("url") or ""
).strip()
base = self._base_url(url_val)
if not base:
continue
chosen.append({"title": title, "url": base, "location": str(meta.get("artist") or "").strip()})
chosen.append(
{"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}
)
if not chosen:
return False
@@ -198,7 +208,13 @@ class Bandcamp(Provider):
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append({"table": "bandcamp", "title": getattr(r, "title", ""), "path": getattr(r, "path", "")})
results_payload.append(
{
"table": "bandcamp",
"title": getattr(r, "title", ""),
"path": getattr(r, "path", ""),
}
)
try:
ctx.set_last_result_table(table, results_payload)

View File

@@ -9,159 +9,171 @@ from SYS.logger import log
def _pick_provider_config(config: Any) -> Dict[str, Any]:
if not isinstance(config, dict):
return {}
provider = config.get("provider")
if not isinstance(provider, dict):
return {}
entry = provider.get("file.io")
if isinstance(entry, dict):
return entry
return {}
if not isinstance(config, dict):
return {}
provider = config.get("provider")
if not isinstance(provider, dict):
return {}
entry = provider.get("file.io")
if isinstance(entry, dict):
return entry
return {}
def _extract_link(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("link", "url", "downloadLink", "download_url"):
val = payload.get(key)
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_link(nested)
if found:
return found
return None
if isinstance(payload, dict):
for key in ("link", "url", "downloadLink", "download_url"):
val = payload.get(key)
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_link(nested)
if found:
return found
return None
def _extract_key(payload: Any) -> Optional[str]:
if isinstance(payload, dict):
for key in ("key", "id", "uuid"):
val = payload.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_key(nested)
if found:
return found
return None
if isinstance(payload, dict):
for key in ("key", "id", "uuid"):
val = payload.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
for nested_key in ("data", "file", "result"):
nested = payload.get(nested_key)
found = _extract_key(nested)
if found:
return found
return None
class FileIO(Provider):
"""File provider for file.io."""
"""File provider for file.io."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
if self._default_max_downloads is None:
self._default_max_downloads = conf.get("max_downloads")
self._default_auto_delete = conf.get("autoDelete")
if self._default_auto_delete is None:
self._default_auto_delete = conf.get("auto_delete")
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
self._api_key = conf.get("api_key")
self._default_expires = conf.get("expires")
self._default_max_downloads = conf.get("maxDownloads")
if self._default_max_downloads is None:
self._default_max_downloads = conf.get("max_downloads")
self._default_auto_delete = conf.get("autoDelete")
if self._default_auto_delete is None:
self._default_auto_delete = conf.get("auto_delete")
def validate(self) -> bool:
return True
def validate(self) -> bool:
return True
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from models import ProgressFileReader
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
from models import ProgressFileReader
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
data: Dict[str, Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get("maxDownloads", kwargs.get("max_downloads", self._default_max_downloads))
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
data: Dict[str, Any] = {}
expires = kwargs.get("expires", self._default_expires)
max_downloads = kwargs.get(
"maxDownloads", kwargs.get("max_downloads", self._default_max_downloads)
)
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
if expires not in (None, ""):
data["expires"] = expires
if max_downloads not in (None, ""):
data["maxDownloads"] = max_downloads
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
if expires not in (None, ""):
data["expires"] = expires
if max_downloads not in (None, ""):
data["maxDownloads"] = max_downloads
if auto_delete not in (None, ""):
data["autoDelete"] = auto_delete
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
if isinstance(self._api_key, str) and self._api_key.strip():
# Some file.io plans use bearer tokens; keep optional.
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
try:
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
filename = os.path.basename(file_path)
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={"file": (filename, wrapped)},
follow_redirects=True,
raise_for_status=False,
)
try:
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
filename = os.path.basename(file_path)
try:
total = os.path.getsize(file_path)
except Exception:
total = None
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
response = client.request(
"POST",
f"{self._base_url}/upload",
data=data or None,
files={"file": (filename, wrapped)},
follow_redirects=True,
raise_for_status=False,
)
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
raise Exception(f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}")
if response.status_code >= 400:
location = response.headers.get("location") or response.headers.get("Location")
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
raise Exception(
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
)
payload: Any
try:
payload = response.json()
except Exception:
payload = None
payload: Any
try:
payload = response.json()
except Exception:
payload = None
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (response.headers.get("content-type") or response.headers.get("Content-Type") or "").lower()
if (payload is None) and ("text/html" in ct):
raise Exception("file.io returned HTML instead of JSON; expected API response from /upload")
# If the server ignored our Accept header and returned HTML, this is almost
# certainly the wrong endpoint or an upstream block.
ct = (
response.headers.get("content-type") or response.headers.get("Content-Type") or ""
).lower()
if (payload is None) and ("text/html" in ct):
raise Exception(
"file.io returned HTML instead of JSON; expected API response from /upload"
)
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
if isinstance(payload, dict) and payload.get("success") is False:
reason = payload.get("message") or payload.get("error") or payload.get("status")
raise Exception(str(reason or "Upload failed"))
uploaded_url = _extract_link(payload)
if not uploaded_url:
# Some APIs may return the link as plain text.
text = str(response.text or "").strip()
if text.startswith(("http://", "https://")):
uploaded_url = text
uploaded_url = _extract_link(payload)
if not uploaded_url:
# Some APIs may return the link as plain text.
text = str(response.text or "").strip()
if text.startswith(("http://", "https://")):
uploaded_url = text
if not uploaded_url:
key = _extract_key(payload)
if key:
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
if not uploaded_url:
key = _extract_key(payload)
if key:
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
if not uploaded_url:
try:
snippet = (response.text or "").strip()
if len(snippet) > 300:
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(f"Upload succeeded but response did not include a link (response: {snippet})")
if not uploaded_url:
try:
snippet = (response.text or "").strip()
if len(snippet) > 300:
snippet = snippet[:300] + "..."
except Exception:
snippet = "<unreadable response>"
raise Exception(
f"Upload succeeded but response did not include a link (response: {snippet})"
)
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
except Exception:
pass
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass
return uploaded_url
return uploaded_url
except Exception as exc:
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
raise
except Exception as exc:
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
raise

View File

@@ -110,7 +110,12 @@ def is_download_file_url(url: str) -> bool:
if not host.endswith("archive.org"):
return False
# /download/<identifier>/<filename>
return len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip()) and bool(parts[2].strip())
return (
len(parts) >= 3
and parts[0].lower() == "download"
and bool(parts[1].strip())
and bool(parts[2].strip())
)
def list_download_files(identifier: str) -> List[Dict[str, Any]]:
@@ -482,12 +487,14 @@ class InternetArchive(Provider):
name = f.get("name")
if not name:
continue
files.append({
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
})
files.append(
{
"name": str(name),
"size": getattr(f, "size", None),
"format": getattr(f, "format", None),
"source": getattr(f, "source", None),
}
)
except Exception:
files = []
@@ -657,7 +664,9 @@ class InternetArchive(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, item_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, item_url
)
except Exception:
pass

View File

@@ -111,7 +111,19 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
score = 0
for ln in lines:
lo = ln.lower()
if ":" in ln and any(k in lo for k in ("title", "author", "publisher", "year", "isbn", "language", "series", "tags")):
if ":" in ln and any(
k in lo
for k in (
"title",
"author",
"publisher",
"year",
"isbn",
"language",
"series",
"tags",
)
):
score += 1
if score > best_score:
best_score = score
@@ -260,7 +272,9 @@ def _prefer_isbn(isbns: List[str]) -> str:
return vals[0] if vals else ""
def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] = None) -> Tuple[List[str], str]:
def _enrich_book_tags_from_isbn(
isbn: str, *, config: Optional[Dict[str, Any]] = None
) -> Tuple[List[str], str]:
"""Return (tags, source_name) for the given ISBN.
Priority:
@@ -378,7 +392,9 @@ def _enrich_book_tags_from_isbn(isbn: str, *, config: Optional[Dict[str, Any]] =
return [], ""
def _fetch_libgen_details_html(url: str, *, timeout: Optional[Tuple[float, float]] = None) -> Optional[str]:
def _fetch_libgen_details_html(
url: str, *, timeout: Optional[Tuple[float, float]] = None
) -> Optional[str]:
try:
if timeout is None:
timeout = (DEFAULT_CONNECT_TIMEOUT, DEFAULT_READ_TIMEOUT)
@@ -450,7 +466,9 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
label = label[:-1].strip()
chunk_start = m.end()
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
)
raw_val_html = s[chunk_start:chunk_end]
# If we already have a value for this label from a table row, keep it.
@@ -600,7 +618,19 @@ def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author(s)", "authors", "author", "publisher", "year", "isbn", "language", "oclc/worldcat", "tags", "edition id"}:
if lk in {
"title",
"author(s)",
"authors",
"author",
"publisher",
"year",
"isbn",
"language",
"oclc/worldcat",
"tags",
"edition id",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -755,7 +785,15 @@ class Libgen(Provider):
if title and title.startswith("http"):
title = ""
base_name = sanitize_filename(title or md5 or (f"libgen_{_libgen_id_from_url(target)}" if _libgen_id_from_url(target) else "libgen"))
base_name = sanitize_filename(
title
or md5
or (
f"libgen_{_libgen_id_from_url(target)}"
if _libgen_id_from_url(target)
else "libgen"
)
)
out_path = output_dir / base_name
if extension:
out_path = out_path.with_suffix(f".{extension}")
@@ -782,14 +820,23 @@ class Libgen(Provider):
return
total = int(content_length) if content_length and content_length > 0 else None
downloaded = int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
downloaded = (
int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
)
elapsed = max(0.001, now - start_time)
speed = downloaded / elapsed
progress_bar.update(downloaded=downloaded, total=total, label=str(label or "download"), file=sys.stderr)
progress_bar.update(
downloaded=downloaded,
total=total,
label=str(label or "download"),
file=sys.stderr,
)
last_progress_time[0] = now
ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback)
ok, final_path = download_from_mirror(
target, out_path, progress_callback=progress_callback
)
progress_bar.finish()
if ok and final_path:
# After the download completes, best-effort fetch details metadata (title + ISBN)
@@ -802,9 +849,13 @@ class Libgen(Provider):
# Parse it post-download (best-effort) and do NOT perform external
# enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
if ("/ads.php" in low) or ("/get.php" in low):
ads_url = target if "/ads.php" in low else _libgen_ads_url_for_target(target)
ads_url = (
target if "/ads.php" in low else _libgen_ads_url_for_target(target)
)
if ads_url:
html = _fetch_libgen_details_html(ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0))
html = _fetch_libgen_details_html(
ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)
)
if html:
meta = _parse_libgen_ads_tags_html(html)
extracted_title = str(meta.get("title") or "").strip()
@@ -814,8 +865,12 @@ class Libgen(Provider):
if (not title) or title.startswith("http"):
title = extracted_title
authors = meta.get("authors") if isinstance(meta.get("authors"), list) else []
for a in (authors or []):
authors = (
meta.get("authors")
if isinstance(meta.get("authors"), list)
else []
)
for a in authors or []:
aa = str(a or "").strip()
if aa:
result.tag.add(f"author:{aa}")
@@ -835,15 +890,25 @@ class Libgen(Provider):
md["language"] = language
result.tag.add(f"language:{language}")
isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
isbns = [str(x).strip() for x in (isbns or []) if str(x).strip()]
isbns = (
meta.get("isbn")
if isinstance(meta.get("isbn"), list)
else []
)
isbns = [
str(x).strip() for x in (isbns or []) if str(x).strip()
]
if isbns:
md["isbn"] = isbns
for isbn_val in isbns:
result.tag.add(f"isbn:{isbn_val}")
free_tags = meta.get("tags") if isinstance(meta.get("tags"), list) else []
for t in (free_tags or []):
free_tags = (
meta.get("tags")
if isinstance(meta.get("tags"), list)
else []
)
for t in free_tags or []:
tt = str(t or "").strip()
if tt:
result.tag.add(tt)
@@ -853,7 +918,16 @@ class Libgen(Provider):
if isinstance(raw_fields, dict):
for k, v in raw_fields.items():
lk = str(k or "").strip().lower()
if lk in {"title", "author", "authors", "publisher", "year", "isbn", "language", "tags"}:
if lk in {
"title",
"author",
"authors",
"publisher",
"year",
"isbn",
"language",
"tags",
}:
continue
vv = str(v or "").strip()
if not vv:
@@ -863,7 +937,11 @@ class Libgen(Provider):
result.tag.add(f"libgen_{ns}:{vv}")
# Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
if ("/edition.php" in low) or ("/file.php" in low) or ("/series.php" in low):
if (
("/edition.php" in low)
or ("/file.php" in low)
or ("/series.php" in low)
):
html = _fetch_libgen_details_html(target)
if html:
meta = _parse_libgen_details_html(html)
@@ -874,8 +952,14 @@ class Libgen(Provider):
meta["edition_id"] = eid
extracted_title = str(meta.get("title") or "").strip()
extracted_isbns = meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
extracted_isbns = [str(x).strip() for x in (extracted_isbns or []) if str(x).strip()]
extracted_isbns = (
meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
)
extracted_isbns = [
str(x).strip()
for x in (extracted_isbns or [])
if str(x).strip()
]
if extracted_title:
md["title"] = extracted_title
@@ -955,9 +1039,11 @@ class LibgenSearch:
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
self.session.headers.update(
{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
)
def _search_libgen_json(
self,
@@ -1005,20 +1091,22 @@ class LibgenSearch:
download_link = f"http://library.lol/main/{md5}" if md5 else ""
results.append({
"id": str(raw_id),
"title": str(title),
"author": str(author),
"publisher": str(publisher),
"year": str(year),
"pages": str(pages),
"language": str(language),
"filesize_str": str(size),
"extension": str(extension),
"md5": str(md5),
"mirror_url": download_link,
"cover": "",
})
results.append(
{
"id": str(raw_id),
"title": str(title),
"author": str(author),
"publisher": str(publisher),
"year": str(year),
"pages": str(pages),
"language": str(language),
"filesize_str": str(size),
"extension": str(extension),
"md5": str(md5),
"mirror_url": download_link,
"cover": "",
}
)
if len(results) >= limit:
break
@@ -1063,7 +1151,9 @@ class LibgenSearch:
# Try JSON first on *all* mirrors (including .gl/.li), then fall back to HTML scraping.
results: List[Dict[str, Any]] = []
try:
results = self._search_libgen_json(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_json(
mirror, query, limit, timeout=request_timeout
)
except Exception:
results = []
@@ -1072,9 +1162,13 @@ class LibgenSearch:
continue
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_li(
mirror, query, limit, timeout=request_timeout
)
else:
results = self._search_libgen_rs(mirror, query, limit, timeout=request_timeout)
results = self._search_libgen_rs(
mirror, query, limit, timeout=request_timeout
)
if results:
_call(log_info, f"[libgen] Using mirror: {mirror}")
@@ -1477,28 +1571,40 @@ def _resolve_download_url(
# get.php?md5=... -> file response
# Handle edition -> file links.
m = re.search(r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle series -> edition links.
m = re.search(r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Handle file -> ads/get links (sometimes present as the "Libgen" mirror).
m = re.search(r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*ads\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
return urljoin(base_url, href)
# Prefer explicit get.php md5 links (most common successful chain).
m = re.search(r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']', html, flags=re.IGNORECASE)
m = re.search(
r'href=["\']([^"\']*get\.php\?md5=[a-fA-F0-9]{32}[^"\']*)["\']',
html,
flags=re.IGNORECASE,
)
if m:
href = str(m.group(1) or "").strip()
if href and not href.lower().startswith("javascript:"):
@@ -1540,7 +1646,9 @@ def _resolve_download_url(
_call(log_info, f"[resolve] Checking: {current_url}")
if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")):
if current_url.lower().endswith(
(".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")
):
return current_url
try:
@@ -1618,7 +1726,9 @@ def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Opt
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE)
match = re.search(
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE
)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix

View File

@@ -56,7 +56,9 @@ class LOC(Provider):
page_size = max(1, min(int(limit), 50))
while len(results) < max(0, int(limit)):
payload = client.search_chronicling_america(q, start=start, count=page_size, extra_params=extra)
payload = client.search_chronicling_america(
q, start=start, count=page_size, extra_params=extra
)
items = payload.get("results")
if not isinstance(items, list) or not items:
break

View File

@@ -16,457 +16,513 @@ _MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
def _sniff_mime_from_header(path: Path) -> Optional[str]:
"""Best-effort MIME sniffing from file headers.
"""Best-effort MIME sniffing from file headers.
Used when the file has no/unknown extension (common for exported/temp files).
Keeps dependencies to stdlib only.
"""
try:
if not path.exists() or not path.is_file():
return None
with open(path, "rb") as handle:
header = handle.read(512)
if not header:
return None
Used when the file has no/unknown extension (common for exported/temp files).
Keeps dependencies to stdlib only.
"""
try:
if not path.exists() or not path.is_file():
return None
with open(path, "rb") as handle:
header = handle.read(512)
if not header:
return None
# Images
if header.startswith(b"\xFF\xD8\xFF"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
return "image/webp"
# Images
if header.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
return "image/webp"
# Audio
if header.startswith(b"fLaC"):
return "audio/flac"
if header.startswith(b"OggS"):
# Could be audio or video; treat as audio unless extension suggests video.
return "audio/ogg"
if header.startswith(b"ID3"):
return "audio/mpeg"
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
return "audio/mpeg"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
return "audio/wav"
# Audio
if header.startswith(b"fLaC"):
return "audio/flac"
if header.startswith(b"OggS"):
# Could be audio or video; treat as audio unless extension suggests video.
return "audio/ogg"
if header.startswith(b"ID3"):
return "audio/mpeg"
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
return "audio/mpeg"
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
return "audio/wav"
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1A\x45\xDF\xA3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
return "video/mp4"
# MPEG-TS / M2TS (sync byte every 188 bytes)
try:
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
with open(path, "rb") as handle:
handle.seek(188)
b = handle.read(1)
if b == b"\x47":
return "video/mp2t"
except Exception:
pass
# Video
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
return "video/x-msvideo"
if header.startswith(b"\x1a\x45\xdf\xa3"):
# EBML container: Matroska/WebM.
return "video/x-matroska"
if len(header) >= 12 and header[4:8] == b"ftyp":
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
return "video/mp4"
# MPEG-TS / M2TS (sync byte every 188 bytes)
try:
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
with open(path, "rb") as handle:
handle.seek(188)
b = handle.read(1)
if b == b"\x47":
return "video/mp2t"
except Exception:
pass
return None
except Exception:
return None
return None
except Exception:
return None
def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = None) -> Tuple[str, str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
def _classify_matrix_upload(
path: Path, *, explicit_mime_type: Optional[str] = None
) -> Tuple[str, str]:
"""Return (mime_type, msgtype) for Matrix uploads."""
mime_type = str(explicit_mime_type or "").strip() or None
if not mime_type:
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
mime_type, _ = mimetypes.guess_type(str(path))
if not mime_type:
mime_type = _sniff_mime_from_header(path)
if not mime_type:
mime_type = _sniff_mime_from_header(path)
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a", ".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv", ".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
# Refinements based on extension for ambiguous containers.
ext = path.suffix.lower()
if ext in {".m4a", ".aac"}:
mime_type = mime_type or "audio/mp4"
if ext in {".mkv", ".webm"}:
mime_type = mime_type or "video/x-matroska"
if ext in {".ogv"}:
mime_type = mime_type or "video/ogg"
msgtype = "m.file"
if mime_type:
mt = mime_type.casefold()
if mt.startswith("image/"):
msgtype = "m.image"
elif mt.startswith("audio/"):
msgtype = "m.audio"
elif mt.startswith("video/"):
msgtype = "m.video"
msgtype = "m.file"
if mime_type:
mt = mime_type.casefold()
if mt.startswith("image/"):
msgtype = "m.image"
elif mt.startswith("audio/"):
msgtype = "m.audio"
elif mt.startswith("video/"):
msgtype = "m.video"
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", ".m2ts", ".mts", ".3gp", ".ogv"}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
# Final fallback for unknown MIME types.
if msgtype == "m.file":
audio_exts = {
".mp3",
".flac",
".wav",
".m4a",
".aac",
".ogg",
".opus",
".wma",
".mka",
".alac",
}
video_exts = {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
".m2ts",
".mts",
".3gp",
".ogv",
}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
return (mime_type or "application/octet-stream"), msgtype
return (mime_type or "application/octet-stream"), msgtype
def _normalize_homeserver(value: str) -> str:
text = str(value or "").strip()
if not text:
return ""
if not text.startswith("http"):
text = f"https://{text}"
return text.rstrip("/")
text = str(value or "").strip()
if not text:
return ""
if not text.startswith("http"):
text = f"https://{text}"
return text.rstrip("/")
def _matrix_health_check(*, homeserver: str, access_token: Optional[str]) -> Tuple[bool, Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
def _matrix_health_check(
*, homeserver: str, access_token: Optional[str]
) -> Tuple[bool, Optional[str]]:
"""Lightweight Matrix reachability/auth validation.
- Always checks `/versions` (no auth).
- If `access_token` is present, also checks `/whoami`.
"""
try:
base = _normalize_homeserver(homeserver)
if not base:
return False, "Matrix homeserver missing"
- Always checks `/versions` (no auth).
- If `access_token` is present, also checks `/whoami`.
"""
try:
base = _normalize_homeserver(homeserver)
if not base:
return False, "Matrix homeserver missing"
resp = requests.get(f"{base}/_matrix/client/versions", timeout=5)
if resp.status_code != 200:
return False, f"Homeserver returned {resp.status_code}"
resp = requests.get(f"{base}/_matrix/client/versions", timeout=5)
if resp.status_code != 200:
return False, f"Homeserver returned {resp.status_code}"
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
resp = requests.get(f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
if access_token:
headers = {"Authorization": f"Bearer {access_token}"}
resp = requests.get(
f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5
)
if resp.status_code != 200:
return False, f"Authentication failed: {resp.status_code}"
return True, None
except Exception as exc:
return False, str(exc)
return True, None
except Exception as exc:
return False, str(exc)
class Matrix(Provider):
"""File provider for Matrix (Element) chat rooms."""
"""File provider for Matrix (Element) chat rooms."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._init_ok: Optional[bool] = None
self._init_reason: Optional[str] = None
matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {}
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
password = matrix_conf.get("password")
matrix_conf = (
self.config.get("provider", {}).get("matrix", {})
if isinstance(self.config, dict)
else {}
)
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
password = matrix_conf.get("password")
# Not configured: keep instance but mark invalid via validate().
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
# the user to select a room dynamically.
if not (homeserver and (access_token or password)):
self._init_ok = None
self._init_reason = None
return
# Not configured: keep instance but mark invalid via validate().
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
# the user to select a room dynamically.
if not (homeserver and (access_token or password)):
self._init_ok = None
self._init_reason = None
return
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(homeserver=str(homeserver), access_token=str(access_token) if access_token else None)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
if cached is None:
ok, reason = _matrix_health_check(
homeserver=str(homeserver), access_token=str(access_token) if access_token else None
)
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
else:
ok, reason = cached
self._init_ok = ok
self._init_reason = reason
if not ok:
raise Exception(reason or "Matrix unavailable")
self._init_ok = ok
self._init_reason = reason
if not ok:
raise Exception(reason or "Matrix unavailable")
def validate(self) -> bool:
if not self.config:
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider", {}).get("matrix", {})
return bool(
matrix_conf.get("homeserver")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def validate(self) -> bool:
if not self.config:
return False
if self._init_ok is False:
return False
matrix_conf = self.config.get("provider", {}).get("matrix", {})
return bool(
matrix_conf.get("homeserver")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
base = _normalize_homeserver(str(homeserver))
if not base:
raise Exception("Matrix homeserver missing")
return base, str(access_token)
def _get_homeserver_and_token(self) -> Tuple[str, str]:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
base = _normalize_homeserver(str(homeserver))
if not base:
raise Exception("Matrix homeserver missing")
return base, str(access_token)
def list_joined_room_ids(self) -> List[str]:
"""Return joined room IDs for the current user.
def list_joined_room_ids(self) -> List[str]:
"""Return joined room IDs for the current user.
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
rooms = data.get("joined_rooms") or []
out: List[str] = []
for rid in rooms:
if not isinstance(rid, str) or not rid.strip():
continue
out.append(rid.strip())
return out
Uses `GET /_matrix/client/v3/joined_rooms`.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
if resp.status_code != 200:
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
data = resp.json() or {}
rooms = data.get("joined_rooms") or []
out: List[str] = []
for rid in rooms:
if not isinstance(rid, str) or not rid.strip():
continue
out.append(rid.strip())
return out
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""Return joined rooms, optionally limited to a subset.
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""Return joined rooms, optionally limited to a subset.
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
Performance note: room names require additional per-room HTTP requests.
If `room_ids` is provided, only those rooms will have name lookups.
"""
base, token = self._get_homeserver_and_token()
headers = {"Authorization": f"Bearer {token}"}
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
r = str(rid or "").strip()
if not r:
return False
rc = r.casefold()
if rc in allowed:
return True
short = r.split(":", 1)[0].strip().casefold()
return bool(short) and short in allowed
joined = self.list_joined_room_ids()
if room_ids:
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
if allowed:
# Accept either full IDs (!id:hs) or short IDs (!id).
def _is_allowed(rid: str) -> bool:
r = str(rid or "").strip()
if not r:
return False
rc = r.casefold()
if rc in allowed:
return True
short = r.split(":", 1)[0].strip().casefold()
return bool(short) and short in allowed
joined = [rid for rid in joined if _is_allowed(rid)]
joined = [rid for rid in joined if _is_allowed(rid)]
out: List[Dict[str, Any]] = []
for room_id in joined:
name = ""
# Best-effort room name lookup (safe to fail).
try:
encoded = quote(room_id, safe="")
name_resp = requests.get(
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
headers=headers,
timeout=5,
)
if name_resp.status_code == 200:
payload = name_resp.json() or {}
maybe = payload.get("name")
if isinstance(maybe, str):
name = maybe
except Exception:
pass
out.append({"room_id": room_id, "name": name})
return out
out: List[Dict[str, Any]] = []
for room_id in joined:
name = ""
# Best-effort room name lookup (safe to fail).
try:
encoded = quote(room_id, safe="")
name_resp = requests.get(
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
headers=headers,
timeout=5,
)
if name_resp.status_code == 200:
payload = name_resp.json() or {}
maybe = payload.get("name")
if isinstance(maybe, str):
name = maybe
except Exception:
pass
out.append({"room_id": room_id, "name": name})
return out
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from models import ProgressFileReader
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not room_id:
raise Exception("Matrix room_id missing")
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
"""Upload a file and send it to a specific room."""
from models import ProgressFileReader
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/octet-stream",
}
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not room_id:
raise Exception("Matrix room_id missing")
mime_type, msgtype = _classify_matrix_upload(path, explicit_mime_type=kwargs.get("mime_type"))
headers["Content-Type"] = mime_type
base, token = self._get_homeserver_and_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/octet-stream",
}
filename = path.name
mime_type, msgtype = _classify_matrix_upload(
path, explicit_mime_type=kwargs.get("mime_type")
)
headers["Content-Type"] = mime_type
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(handle, total_bytes=int(path.stat().st_size), label="upload")
resp = requests.post(upload_url, headers=headers, data=wrapped, params={"filename": filename})
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
filename = path.name
# Build a fragment-free URL suitable for storage backends.
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
download_url_for_store = ""
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://"):]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
media_id = str(media_id).strip()
if server_name and media_id:
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
except Exception:
download_url_for_store = ""
# Upload media
upload_url = f"{base}/_matrix/media/v3/upload"
with open(path, "rb") as handle:
wrapped = ProgressFileReader(
handle, total_bytes=int(path.stat().st_size), label="upload"
)
resp = requests.post(
upload_url, headers=headers, data=wrapped, params={"filename": filename}
)
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = (resp.json() or {}).get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
# Build a fragment-free URL suitable for storage backends.
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
download_url_for_store = ""
try:
curi = str(content_uri or "").strip()
if curi.startswith("mxc://"):
rest = curi[len("mxc://") :]
if "/" in rest:
server_name, media_id = rest.split("/", 1)
server_name = str(server_name).strip()
media_id = str(media_id).strip()
if server_name and media_id:
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
except Exception:
download_url_for_store = ""
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
event_id = (send_resp.json() or {}).get("event_id")
link = f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
# Correct Matrix client API send endpoint requires a transaction ID.
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
encoded_room = quote(str(room_id), safe="")
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send message failed: {send_resp.text}")
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
event_id = (send_resp.json() or {}).get("event_id")
link = (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
# Prefer the direct media download URL for storage backends.
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
except Exception:
pass
# Optional: if a PipeObject is provided and it already has store+hash,
# attach the uploaded URL back to the stored file.
try:
pipe_obj = kwargs.get("pipe_obj")
if pipe_obj is not None:
from Store import Store
return link
# Prefer the direct media download URL for storage backends.
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj,
download_url_for_store or link,
)
except Exception:
pass
def send_text_to_room(self, text: str, room_id: str) -> str:
"""Send a plain text message to a specific room."""
message = str(text or "").strip()
if not message:
return ""
if not room_id:
raise Exception("Matrix room_id missing")
return link
base, token = self._get_homeserver_and_token()
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
payload = {"msgtype": "m.text", "body": message}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
def send_text_to_room(self, text: str, room_id: str) -> str:
"""Send a plain text message to a specific room."""
message = str(text or "").strip()
if not message:
return ""
if not room_id:
raise Exception("Matrix room_id missing")
event_id = (send_resp.json() or {}).get("event_id")
return f"https://matrix.to/#/{room_id}/{event_id}" if event_id else f"https://matrix.to/#/{room_id}"
base, token = self._get_homeserver_and_token()
encoded_room = quote(str(room_id), safe="")
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
send_headers = {"Authorization": f"Bearer {token}"}
payload = {"msgtype": "m.text", "body": message}
send_resp = requests.put(send_url, headers=send_headers, json=payload)
if send_resp.status_code != 200:
raise Exception(f"Matrix send text failed: {send_resp.text}")
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
event_id = (send_resp.json() or {}).get("event_id")
return (
f"https://matrix.to/#/{room_id}/{event_id}"
if event_id
else f"https://matrix.to/#/{room_id}"
)
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
"""Handle Matrix room selection via `@N`.
def upload(self, file_path: str, **kwargs: Any) -> str:
matrix_conf = self.config.get("provider", {}).get("matrix", {})
room_id = matrix_conf.get("room_id")
if not room_id:
raise Exception("Matrix room_id missing")
return self.upload_to_room(file_path, str(room_id))
If the CLI has a pending upload stash, selecting a room triggers an upload.
"""
if not stage_is_last:
return False
def selector(
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
) -> bool:
"""Handle Matrix room selection via `@N`.
pending = None
try:
pending = ctx.load_value('matrix_pending_uploads', default=None)
except Exception:
pending = None
If the CLI has a pending upload stash, selecting a room triggers an upload.
"""
if not stage_is_last:
return False
pending_list = list(pending) if isinstance(pending, list) else []
if not pending_list:
return False
pending = None
try:
pending = ctx.load_value("matrix_pending_uploads", default=None)
except Exception:
pending = None
room_ids: List[str] = []
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get('room_id') or item.get('id')
else:
rid = getattr(item, 'room_id', None) or getattr(item, 'id', None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
pending_list = list(pending) if isinstance(pending, list) else []
if not pending_list:
return False
if not room_ids:
print("No Matrix room selected\n")
return True
room_ids: List[str] = []
for item in selected_items or []:
rid = None
if isinstance(item, dict):
rid = item.get("room_id") or item.get("id")
else:
rid = getattr(item, "room_id", None) or getattr(item, "id", None)
if rid and str(rid).strip():
room_ids.append(str(rid).strip())
any_failed = False
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ''
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get('path') or '')
delete_after = bool(payload.get('delete_after', False))
pipe_obj = payload.get('pipe_obj')
else:
file_path = str(getattr(payload, 'path', '') or '')
if not file_path:
any_failed = True
continue
if not room_ids:
print("No Matrix room selected\n")
return True
media_path = Path(file_path)
if not media_path.exists():
any_failed = True
print(f"Matrix upload file missing: {file_path}")
continue
any_failed = False
for room_id in room_ids:
for payload in pending_list:
try:
file_path = ""
delete_after = False
pipe_obj = None
if isinstance(payload, dict):
file_path = str(payload.get("path") or "")
delete_after = bool(payload.get("delete_after", False))
pipe_obj = payload.get("pipe_obj")
else:
file_path = str(getattr(payload, "path", "") or "")
if not file_path:
any_failed = True
continue
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
if link:
print(link)
media_path = Path(file_path)
if not media_path.exists():
any_failed = True
print(f"Matrix upload file missing: {file_path}")
continue
if delete_after:
try:
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if media_path.exists():
media_path.unlink()
except Exception:
pass
except Exception as exc:
any_failed = True
print(f"Matrix upload failed: {exc}")
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
if link:
print(link)
try:
ctx.store_value('matrix_pending_uploads', [])
except Exception:
pass
if delete_after:
try:
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if media_path.exists():
media_path.unlink()
except Exception:
pass
except Exception as exc:
any_failed = True
print(f"Matrix upload failed: {exc}")
if any_failed:
print("\nOne or more Matrix uploads failed\n")
return True
try:
ctx.store_value("matrix_pending_uploads", [])
except Exception:
pass
if any_failed:
print("\nOne or more Matrix uploads failed\n")
return True

View File

@@ -100,7 +100,10 @@ class OpenLibraryMetadataProvider(MetadataProvider):
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (
10,
13,
):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
@@ -135,23 +138,25 @@ class OpenLibraryMetadataProvider(MetadataProvider):
if isinstance(key, str) and key:
olid = key.split("/")[-1]
items.append({
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
})
items.append(
{
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
}
)
return items
@@ -238,17 +243,19 @@ class GoogleBooksMetadataProvider(MetadataProvider):
else:
identifiers.setdefault(ident_type, ident_value)
items.append({
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
})
items.append(
{
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
}
)
return items
@@ -346,7 +353,9 @@ class ISBNsearchMetadataProvider(MetadataProvider):
chunk_start = m.end()
# Stop at next <strong> or end of document.
chunk_end = strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
chunk_end = (
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
)
chunk = html[chunk_start:chunk_end]
# Prefer stopping within the same paragraph when possible.
m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
@@ -389,7 +398,10 @@ class ISBNsearchMetadataProvider(MetadataProvider):
# Prefer parsed title, but fall back to og:title if needed.
if not title:
m_og = re.search(r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>", html)
m_og = re.search(
r"(?is)<meta\b[^>]*property=['\"]og:title['\"][^>]*content=['\"](.*?)['\"][^>]*>",
html,
)
if m_og:
title = self._strip_html_to_text(m_og.group(1))
@@ -534,15 +546,17 @@ class MusicBrainzMetadataProvider(MetadataProvider):
year = str(release_date)[:4] if release_date else ""
mbid = rec.get("id") or ""
items.append({
"title": title,
"artist": artist,
"album": album,
"year": year,
"provider": self.name,
"mbid": mbid,
"raw": rec,
})
items.append(
{
"title": title,
"artist": artist,
"album": album,
"year": year,
"provider": self.name,
"mbid": mbid,
"raw": rec,
}
)
return items
@@ -624,12 +638,7 @@ class YtdlpMetadataProvider(MetadataProvider):
# Provide basic columns for the standard metadata selection table.
# NOTE: This is best-effort; many extractors don't provide artist/album.
artist = (
info.get("artist")
or info.get("uploader")
or info.get("channel")
or ""
)
artist = info.get("artist") or info.get("uploader") or info.get("channel") or ""
album = info.get("album") or info.get("playlist_title") or ""
title = info.get("title") or ""
@@ -734,7 +743,9 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
def get_metadata_provider(
name: str, config: Optional[Dict[str, Any]] = None
) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None

View File

@@ -154,7 +154,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
return False, "api-error"
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
def _resolve_archive_id(
session: requests.Session, edition_id: str, ia_candidates: List[str]
) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
first = ia_candidates[0].strip()
@@ -420,18 +422,24 @@ class OpenLibrary(Provider):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
@@ -452,10 +460,14 @@ class OpenLibrary(Provider):
try:
token_json = token_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}"
)
if not token_json.get("success"):
raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}"
)
token = (token_json.get("value") or {}).get("token")
if not token:
@@ -474,7 +486,9 @@ class OpenLibrary(Provider):
try:
login_json = login_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
raise RuntimeError(
f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}"
)
if login_json.get("success") is False:
if login_json.get("value") == "bad_login":
@@ -484,16 +498,23 @@ class OpenLibrary(Provider):
return session
@classmethod
def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
def _archive_loan(
cls, session: requests.Session, book_id: str, *, verbose: bool = True
) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
session.post(
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if err == "This book is not available to borrow at this time. Please try again later.":
if (
err
== "This book is not available to borrow at this time. Please try again later."
):
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
@@ -574,7 +595,9 @@ class OpenLibrary(Provider):
return False, "archive-metadata-error"
@staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
def _archive_get_book_infos(
session: requests.Session, url: str
) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
@@ -748,7 +771,9 @@ class OpenLibrary(Provider):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
pdf_url = (
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
)
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
@@ -1040,7 +1065,9 @@ class OpenLibrary(Provider):
session_local = requests.Session()
try:
archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
archive_id_local = _resolve_archive_id(
session_local, edition_id_local, ia_ids_local
)
except Exception:
archive_id_local = ""
@@ -1062,7 +1089,9 @@ class OpenLibrary(Provider):
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
availability_rows: List[Tuple[str, str, str, str]] = [
("unknown", "", "", "") for _ in range(len(docs))
]
if docs:
max_workers = min(8, max(1, len(docs)))
done = 0
@@ -1080,7 +1109,6 @@ class OpenLibrary(Provider):
availability_rows[i] = ("unknown", "", "", "")
done += 1
for idx, doc in enumerate(docs):
if not isinstance(doc, dict):
continue
@@ -1150,8 +1178,12 @@ class OpenLibrary(Provider):
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else (
f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
f"https://openlibrary.org/books/{edition_id}"
if edition_id
else (
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/")
else "https://openlibrary.org"
)
),
detail=(
@@ -1257,7 +1289,11 @@ class OpenLibrary(Provider):
out_path,
session=self._session,
progress_callback=(
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
(
lambda downloaded, total, label: progress_callback(
"bytes", downloaded, total, label
)
)
if progress_callback is not None
else None
),
@@ -1315,7 +1351,10 @@ class OpenLibrary(Provider):
except Exception:
pass
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
urls = [
f"https://archive.org/borrow/{archive_id}",
f"https://archive.org/details/{archive_id}",
]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
@@ -1358,7 +1397,10 @@ class OpenLibrary(Provider):
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
log(
"[openlibrary] PDF conversion failed; keeping images folder",
file=sys.stderr,
)
return Path(temp_dir)
try:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@ class YouTube(Provider):
# Use the yt_dlp Python module (installed via requirements.txt).
try:
import yt_dlp # type: ignore
ydl_opts: Dict[str, Any] = {"quiet": True, "skip_download": True, "extract_flat": True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
search_query = f"ytsearch{limit}:{query}"
@@ -34,7 +35,9 @@ class YouTube(Provider):
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
duration_str = (
f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
)
views_str = f"{view_count:,}" if view_count else ""
results.append(
@@ -67,6 +70,7 @@ class YouTube(Provider):
def validate(self) -> bool:
try:
import yt_dlp # type: ignore
return True
except Exception:
return False

View File

@@ -36,7 +36,9 @@ class ZeroXZero(Provider):
if pipe_obj is not None:
from Store import Store
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(pipe_obj, uploaded_url)
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
pipe_obj, uploaded_url
)
except Exception:
pass