Add YAPF style + ignore, and format tracked Python files
This commit is contained in:
@@ -37,7 +37,7 @@ def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]:
|
||||
|
||||
# 2) store.debrid block (canonical for debrid store configuration)
|
||||
try:
|
||||
from config import get_debrid_api_key
|
||||
from SYS.config import get_debrid_api_key
|
||||
|
||||
key = get_debrid_api_key(config, service="All-debrid")
|
||||
return key.strip() if key else None
|
||||
@@ -97,23 +97,27 @@ class AllDebrid(Provider):
|
||||
# Quiet mode when download-file is mid-pipeline.
|
||||
quiet = (
|
||||
bool(self.config.get("_quiet_background_output"))
|
||||
if isinstance(self.config, dict)
|
||||
else False
|
||||
if isinstance(self.config,
|
||||
dict) else False
|
||||
)
|
||||
|
||||
unlocked_url = target
|
||||
try:
|
||||
unlocked = client.unlock_link(target)
|
||||
if isinstance(unlocked, str) and unlocked.strip().startswith(
|
||||
("http://", "https://")
|
||||
):
|
||||
if isinstance(unlocked,
|
||||
str) and unlocked.strip().startswith(("http://",
|
||||
"https://")):
|
||||
unlocked_url = unlocked.strip()
|
||||
except Exception as exc:
|
||||
# Fall back to the raw link, but warn.
|
||||
log(f"[alldebrid] Failed to unlock link: {exc}", file=sys.stderr)
|
||||
|
||||
# Prefer provider title as the output filename.
|
||||
suggested = sanitize_filename(str(getattr(result, "title", "") or "").strip())
|
||||
suggested = sanitize_filename(
|
||||
str(getattr(result,
|
||||
"title",
|
||||
"") or "").strip()
|
||||
)
|
||||
suggested_name = suggested if suggested else None
|
||||
|
||||
try:
|
||||
@@ -142,11 +146,9 @@ class AllDebrid(Provider):
|
||||
try:
|
||||
if downloaded_path.exists():
|
||||
size = downloaded_path.stat().st_size
|
||||
if (
|
||||
size > 0
|
||||
and size <= 250_000
|
||||
and downloaded_path.suffix.lower() not in (".html", ".htm")
|
||||
):
|
||||
if (size > 0 and size <= 250_000
|
||||
and downloaded_path.suffix.lower() not in (".html",
|
||||
".htm")):
|
||||
head = downloaded_path.read_bytes()[:512]
|
||||
try:
|
||||
text = head.decode("utf-8", errors="ignore").lower()
|
||||
@@ -173,9 +175,10 @@ class AllDebrid(Provider):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _flatten_files(
|
||||
items: Any, *, _prefix: Optional[List[str]] = None
|
||||
) -> Iterable[Dict[str, Any]]:
|
||||
def _flatten_files(items: Any,
|
||||
*,
|
||||
_prefix: Optional[List[str]] = None) -> Iterable[Dict[str,
|
||||
Any]]:
|
||||
"""Flatten AllDebrid magnet file tree into file dicts, preserving relative paths.
|
||||
|
||||
API commonly returns:
|
||||
@@ -211,7 +214,9 @@ class AllDebrid(Provider):
|
||||
|
||||
name = node.get("n") or node.get("name")
|
||||
link = node.get("l") or node.get("link")
|
||||
if isinstance(name, str) and name.strip() and isinstance(link, str) and link.strip():
|
||||
if isinstance(name,
|
||||
str) and name.strip() and isinstance(link,
|
||||
str) and link.strip():
|
||||
rel_parts = prefix + [name.strip()]
|
||||
relpath = "/".join([p for p in rel_parts if p])
|
||||
enriched = dict(node)
|
||||
@@ -222,7 +227,8 @@ class AllDebrid(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
@@ -247,7 +253,9 @@ class AllDebrid(Provider):
|
||||
return []
|
||||
|
||||
q_lower = q.lower()
|
||||
needle = "" if q_lower in {"*", "all", "list"} else q_lower
|
||||
needle = "" if q_lower in {"*",
|
||||
"all",
|
||||
"list"} else q_lower
|
||||
|
||||
# Second-stage: list files for a specific magnet id.
|
||||
if view == "files":
|
||||
@@ -262,17 +270,16 @@ class AllDebrid(Provider):
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
magnet_status: Dict[str, Any] = {}
|
||||
magnet_status: Dict[str,
|
||||
Any] = {}
|
||||
try:
|
||||
magnet_status = client.magnet_status(magnet_id)
|
||||
except Exception:
|
||||
magnet_status = {}
|
||||
|
||||
magnet_name = str(
|
||||
magnet_status.get("filename")
|
||||
or magnet_status.get("name")
|
||||
or magnet_status.get("hash")
|
||||
or f"magnet-{magnet_id}"
|
||||
magnet_status.get("filename") or magnet_status.get("name")
|
||||
or magnet_status.get("hash") or f"magnet-{magnet_id}"
|
||||
)
|
||||
status_code = magnet_status.get("statusCode")
|
||||
status_text = str(magnet_status.get("status") or "").strip() or "unknown"
|
||||
@@ -285,25 +292,40 @@ class AllDebrid(Provider):
|
||||
title=magnet_name,
|
||||
path=f"alldebrid:magnet:{magnet_id}",
|
||||
detail=status_text,
|
||||
annotations=["folder", "not-ready"],
|
||||
annotations=["folder",
|
||||
"not-ready"],
|
||||
media_kind="folder",
|
||||
tag={"alldebrid", "folder", str(magnet_id), "not-ready"},
|
||||
tag={"alldebrid",
|
||||
"folder",
|
||||
str(magnet_id),
|
||||
"not-ready"},
|
||||
columns=[
|
||||
("Folder", magnet_name),
|
||||
("ID", str(magnet_id)),
|
||||
("Status", status_text),
|
||||
("Ready", "no"),
|
||||
("Folder",
|
||||
magnet_name),
|
||||
("ID",
|
||||
str(magnet_id)),
|
||||
("Status",
|
||||
status_text),
|
||||
("Ready",
|
||||
"no"),
|
||||
],
|
||||
full_metadata={"magnet": magnet_status, "magnet_id": magnet_id},
|
||||
full_metadata={
|
||||
"magnet": magnet_status,
|
||||
"magnet_id": magnet_id
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
try:
|
||||
files_result = client.magnet_links([magnet_id])
|
||||
magnet_files = (
|
||||
files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {}
|
||||
files_result.get(str(magnet_id),
|
||||
{}) if isinstance(files_result,
|
||||
dict) else {}
|
||||
)
|
||||
file_tree = magnet_files.get("files", []) if isinstance(magnet_files, dict) else []
|
||||
file_tree = magnet_files.get("files",
|
||||
[]) if isinstance(magnet_files,
|
||||
dict) else []
|
||||
except Exception as exc:
|
||||
log(
|
||||
f"[alldebrid] Failed to list files for magnet {magnet_id}: {exc}",
|
||||
@@ -313,8 +335,10 @@ class AllDebrid(Provider):
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for file_node in self._flatten_files(file_tree):
|
||||
file_name = str(file_node.get("n") or file_node.get("name") or "").strip()
|
||||
file_url = str(file_node.get("l") or file_node.get("link") or "").strip()
|
||||
file_name = str(file_node.get("n") or file_node.get("name")
|
||||
or "").strip()
|
||||
file_url = str(file_node.get("l") or file_node.get("link")
|
||||
or "").strip()
|
||||
relpath = str(file_node.get("_relpath") or file_name or "").strip()
|
||||
file_size = file_node.get("s") or file_node.get("size")
|
||||
if not file_name or not file_url:
|
||||
@@ -341,11 +365,16 @@ class AllDebrid(Provider):
|
||||
annotations=["file"],
|
||||
media_kind="file",
|
||||
size_bytes=size_bytes,
|
||||
tag={"alldebrid", "file", str(magnet_id)},
|
||||
tag={"alldebrid",
|
||||
"file",
|
||||
str(magnet_id)},
|
||||
columns=[
|
||||
("File", file_name),
|
||||
("Folder", magnet_name),
|
||||
("ID", str(magnet_id)),
|
||||
("File",
|
||||
file_name),
|
||||
("Folder",
|
||||
magnet_name),
|
||||
("ID",
|
||||
str(magnet_id)),
|
||||
],
|
||||
full_metadata={
|
||||
"magnet": magnet_status,
|
||||
@@ -386,9 +415,7 @@ class AllDebrid(Provider):
|
||||
continue
|
||||
|
||||
magnet_name = str(
|
||||
magnet.get("filename")
|
||||
or magnet.get("name")
|
||||
or magnet.get("hash")
|
||||
magnet.get("filename") or magnet.get("name") or magnet.get("hash")
|
||||
or f"magnet-{magnet_id}"
|
||||
)
|
||||
magnet_name_lower = magnet_name.lower()
|
||||
@@ -422,15 +449,24 @@ class AllDebrid(Provider):
|
||||
annotations=["folder"],
|
||||
media_kind="folder",
|
||||
size_bytes=size_bytes,
|
||||
tag={"alldebrid", "folder", str(magnet_id)}
|
||||
tag={"alldebrid",
|
||||
"folder",
|
||||
str(magnet_id)}
|
||||
| ({"ready"} if ready else {"not-ready"}),
|
||||
columns=[
|
||||
("Folder", magnet_name),
|
||||
("ID", str(magnet_id)),
|
||||
("Status", status_text),
|
||||
("Ready", "yes" if ready else "no"),
|
||||
("Folder",
|
||||
magnet_name),
|
||||
("ID",
|
||||
str(magnet_id)),
|
||||
("Status",
|
||||
status_text),
|
||||
("Ready",
|
||||
"yes" if ready else "no"),
|
||||
],
|
||||
full_metadata={"magnet": magnet, "magnet_id": magnet_id},
|
||||
full_metadata={
|
||||
"magnet": magnet,
|
||||
"magnet_id": magnet_id
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -38,9 +38,10 @@ class Bandcamp(Provider):
|
||||
# Bandcamp discography lives under /music.
|
||||
return base.rstrip("/") + "/music"
|
||||
|
||||
def _scrape_artist_page(
|
||||
self, page: Any, artist_url: str, limit: int = 50
|
||||
) -> List[SearchResult]:
|
||||
def _scrape_artist_page(self,
|
||||
page: Any,
|
||||
artist_url: str,
|
||||
limit: int = 50) -> List[SearchResult]:
|
||||
"""Scrape an artist page for albums/tracks (discography)."""
|
||||
base = self._base_url(artist_url)
|
||||
discography_url = self._discography_url(artist_url)
|
||||
@@ -75,7 +76,8 @@ class Bandcamp(Provider):
|
||||
else:
|
||||
target = base.rstrip("/") + "/" + href
|
||||
|
||||
title_node = item.query_selector("p.title") or item.query_selector(".title")
|
||||
title_node = item.query_selector("p.title"
|
||||
) or item.query_selector(".title")
|
||||
title = title_node.inner_text().strip() if title_node else ""
|
||||
if title:
|
||||
title = " ".join(title.split())
|
||||
@@ -83,7 +85,8 @@ class Bandcamp(Provider):
|
||||
title = target.rsplit("/", 1)[-1]
|
||||
|
||||
kind = (
|
||||
"album" if "/album/" in target else ("track" if "/track/" in target else "item")
|
||||
"album" if "/album/" in target else
|
||||
("track" if "/track/" in target else "item")
|
||||
)
|
||||
|
||||
results.append(
|
||||
@@ -95,9 +98,12 @@ class Bandcamp(Provider):
|
||||
annotations=[kind],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Type", kind),
|
||||
("Url", target),
|
||||
("Title",
|
||||
title),
|
||||
("Type",
|
||||
kind),
|
||||
("Url",
|
||||
target),
|
||||
],
|
||||
full_metadata={
|
||||
"type": kind,
|
||||
@@ -112,7 +118,12 @@ class Bandcamp(Provider):
|
||||
return results
|
||||
|
||||
def selector(
|
||||
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any
|
||||
) -> bool:
|
||||
"""Handle Bandcamp `@N` selection.
|
||||
|
||||
@@ -128,7 +139,8 @@ class Bandcamp(Provider):
|
||||
# Only handle artist selections.
|
||||
chosen: List[Dict[str, Any]] = []
|
||||
for item in selected_items or []:
|
||||
payload: Dict[str, Any] = {}
|
||||
payload: Dict[str,
|
||||
Any] = {}
|
||||
if isinstance(item, dict):
|
||||
payload = item
|
||||
else:
|
||||
@@ -140,11 +152,21 @@ class Bandcamp(Provider):
|
||||
if not payload:
|
||||
try:
|
||||
payload = {
|
||||
"title": getattr(item, "title", None),
|
||||
"url": getattr(item, "url", None),
|
||||
"path": getattr(item, "path", None),
|
||||
"metadata": getattr(item, "metadata", None),
|
||||
"extra": getattr(item, "extra", None),
|
||||
"title": getattr(item,
|
||||
"title",
|
||||
None),
|
||||
"url": getattr(item,
|
||||
"url",
|
||||
None),
|
||||
"path": getattr(item,
|
||||
"path",
|
||||
None),
|
||||
"metadata": getattr(item,
|
||||
"metadata",
|
||||
None),
|
||||
"extra": getattr(item,
|
||||
"extra",
|
||||
None),
|
||||
}
|
||||
except Exception:
|
||||
payload = {}
|
||||
@@ -154,7 +176,10 @@ class Bandcamp(Provider):
|
||||
meta = {}
|
||||
extra = payload.get("extra")
|
||||
if isinstance(extra, dict):
|
||||
meta = {**meta, **extra}
|
||||
meta = {
|
||||
**meta,
|
||||
**extra
|
||||
}
|
||||
|
||||
type_val = str(meta.get("type") or "").strip().lower()
|
||||
if type_val != "artist":
|
||||
@@ -169,7 +194,11 @@ class Bandcamp(Provider):
|
||||
continue
|
||||
|
||||
chosen.append(
|
||||
{"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}
|
||||
{
|
||||
"title": title,
|
||||
"url": base,
|
||||
"location": str(meta.get("artist") or "").strip()
|
||||
}
|
||||
)
|
||||
|
||||
if not chosen:
|
||||
@@ -211,8 +240,12 @@ class Bandcamp(Provider):
|
||||
results_payload.append(
|
||||
{
|
||||
"table": "bandcamp",
|
||||
"title": getattr(r, "title", ""),
|
||||
"path": getattr(r, "path", ""),
|
||||
"title": getattr(r,
|
||||
"title",
|
||||
""),
|
||||
"path": getattr(r,
|
||||
"path",
|
||||
""),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -234,7 +267,8 @@ class Bandcamp(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
if sync_playwright is None:
|
||||
@@ -305,10 +339,14 @@ class Bandcamp(Provider):
|
||||
annotations=[media_type],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Location", artist),
|
||||
("Type", media_type),
|
||||
("Url", base_url or str(target_url or "")),
|
||||
("Title",
|
||||
title),
|
||||
("Location",
|
||||
artist),
|
||||
("Type",
|
||||
media_type),
|
||||
("Url",
|
||||
base_url or str(target_url or "")),
|
||||
],
|
||||
full_metadata={
|
||||
"artist": artist,
|
||||
|
||||
@@ -54,7 +54,8 @@ class FileIO(Provider):
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
conf = _pick_provider_config(self.config)
|
||||
self._base_url = str(conf.get("base_url") or "https://file.io").strip().rstrip("/")
|
||||
self._base_url = str(conf.get("base_url")
|
||||
or "https://file.io").strip().rstrip("/")
|
||||
self._api_key = conf.get("api_key")
|
||||
self._default_expires = conf.get("expires")
|
||||
self._default_max_downloads = conf.get("maxDownloads")
|
||||
@@ -74,12 +75,19 @@ class FileIO(Provider):
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
data: Dict[str, Any] = {}
|
||||
data: Dict[str,
|
||||
Any] = {}
|
||||
expires = kwargs.get("expires", self._default_expires)
|
||||
max_downloads = kwargs.get(
|
||||
"maxDownloads", kwargs.get("max_downloads", self._default_max_downloads)
|
||||
"maxDownloads",
|
||||
kwargs.get("max_downloads",
|
||||
self._default_max_downloads)
|
||||
)
|
||||
auto_delete = kwargs.get(
|
||||
"autoDelete",
|
||||
kwargs.get("auto_delete",
|
||||
self._default_auto_delete)
|
||||
)
|
||||
auto_delete = kwargs.get("autoDelete", kwargs.get("auto_delete", self._default_auto_delete))
|
||||
|
||||
if expires not in (None, ""):
|
||||
data["expires"] = expires
|
||||
@@ -88,7 +96,11 @@ class FileIO(Provider):
|
||||
if auto_delete not in (None, ""):
|
||||
data["autoDelete"] = auto_delete
|
||||
|
||||
headers: Dict[str, str] = {"User-Agent": "Medeia-Macina/1.0", "Accept": "application/json"}
|
||||
headers: Dict[str,
|
||||
str] = {
|
||||
"User-Agent": "Medeia-Macina/1.0",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
if isinstance(self._api_key, str) and self._api_key.strip():
|
||||
# Some file.io plans use bearer tokens; keep optional.
|
||||
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
|
||||
@@ -101,19 +113,28 @@ class FileIO(Provider):
|
||||
total = os.path.getsize(file_path)
|
||||
except Exception:
|
||||
total = None
|
||||
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
|
||||
wrapped = ProgressFileReader(
|
||||
handle,
|
||||
total_bytes=total,
|
||||
label="upload"
|
||||
)
|
||||
response = client.request(
|
||||
"POST",
|
||||
f"{self._base_url}/upload",
|
||||
data=data or None,
|
||||
files={"file": (filename, wrapped)},
|
||||
files={
|
||||
"file": (filename,
|
||||
wrapped)
|
||||
},
|
||||
follow_redirects=True,
|
||||
raise_for_status=False,
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
location = response.headers.get("location") or response.headers.get("Location")
|
||||
ct = response.headers.get("content-type") or response.headers.get("Content-Type")
|
||||
location = response.headers.get("location"
|
||||
) or response.headers.get("Location")
|
||||
ct = response.headers.get("content-type"
|
||||
) or response.headers.get("Content-Type")
|
||||
raise Exception(
|
||||
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
|
||||
)
|
||||
@@ -127,7 +148,8 @@ class FileIO(Provider):
|
||||
# If the server ignored our Accept header and returned HTML, this is almost
|
||||
# certainly the wrong endpoint or an upstream block.
|
||||
ct = (
|
||||
response.headers.get("content-type") or response.headers.get("Content-Type") or ""
|
||||
response.headers.get("content-type")
|
||||
or response.headers.get("Content-Type") or ""
|
||||
).lower()
|
||||
if (payload is None) and ("text/html" in ct):
|
||||
raise Exception(
|
||||
@@ -135,7 +157,8 @@ class FileIO(Provider):
|
||||
)
|
||||
|
||||
if isinstance(payload, dict) and payload.get("success") is False:
|
||||
reason = payload.get("message") or payload.get("error") or payload.get("status")
|
||||
reason = payload.get("message"
|
||||
) or payload.get("error") or payload.get("status")
|
||||
raise Exception(str(reason or "Upload failed"))
|
||||
|
||||
uploaded_url = _extract_link(payload)
|
||||
@@ -166,9 +189,11 @@ class FileIO(Provider):
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
|
||||
pipe_obj, uploaded_url
|
||||
)
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(pipe_obj,
|
||||
uploaded_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -35,7 +35,9 @@ def _pick_provider_config(config: Any) -> Dict[str, Any]:
|
||||
|
||||
def _looks_fielded_query(q: str) -> bool:
|
||||
low = (q or "").lower()
|
||||
return (":" in low) or (" and " in low) or (" or " in low) or (" not " in low) or ("(" in low)
|
||||
return (":" in low) or (" and " in low) or (" or "
|
||||
in low) or (" not "
|
||||
in low) or ("(" in low)
|
||||
|
||||
|
||||
def _extract_identifier_from_any(value: str) -> str:
|
||||
@@ -111,9 +113,7 @@ def is_download_file_url(url: str) -> bool:
|
||||
return False
|
||||
# /download/<identifier>/<filename>
|
||||
return (
|
||||
len(parts) >= 3
|
||||
and parts[0].lower() == "download"
|
||||
and bool(parts[1].strip())
|
||||
len(parts) >= 3 and parts[0].lower() == "download" and bool(parts[1].strip())
|
||||
and bool(parts[2].strip())
|
||||
)
|
||||
|
||||
@@ -158,9 +158,15 @@ def list_download_files(identifier: str) -> List[Dict[str, Any]]:
|
||||
files.append(
|
||||
{
|
||||
"name": str(name),
|
||||
"size": getattr(f, "size", None),
|
||||
"format": getattr(f, "format", None),
|
||||
"source": getattr(f, "source", None),
|
||||
"size": getattr(f,
|
||||
"size",
|
||||
None),
|
||||
"format": getattr(f,
|
||||
"format",
|
||||
None),
|
||||
"source": getattr(f,
|
||||
"source",
|
||||
None),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
@@ -179,13 +185,16 @@ def list_download_files(identifier: str) -> List[Dict[str, Any]]:
|
||||
|
||||
if source == "metadata":
|
||||
return True
|
||||
if fmt in {"metadata", "archive bittorrent"}:
|
||||
if fmt in {"metadata",
|
||||
"archive bittorrent"}:
|
||||
return True
|
||||
if fmt.startswith("thumbnail"):
|
||||
return True
|
||||
return False
|
||||
|
||||
candidates = [f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)]
|
||||
candidates = [
|
||||
f for f in files if isinstance(f, dict) and not _is_ia_metadata_file(f)
|
||||
]
|
||||
if not candidates:
|
||||
candidates = [f for f in files if isinstance(f, dict)]
|
||||
|
||||
@@ -266,7 +275,8 @@ def _best_file_candidate(files: List[Dict[str, Any]]) -> Optional[Dict[str, Any]
|
||||
fmt = str(f.get("format") or "").strip().lower()
|
||||
if source == "metadata":
|
||||
return True
|
||||
if fmt in {"metadata", "archive bittorrent"}:
|
||||
if fmt in {"metadata",
|
||||
"archive bittorrent"}:
|
||||
return True
|
||||
if fmt.startswith("thumbnail"):
|
||||
return True
|
||||
@@ -283,7 +293,10 @@ def _best_file_candidate(files: List[Dict[str, Any]]) -> Optional[Dict[str, Any]
|
||||
candidates = list(files)
|
||||
|
||||
# Prefer originals.
|
||||
originals = [f for f in candidates if str(f.get("source") or "").strip().lower() == "original"]
|
||||
originals = [
|
||||
f for f in candidates
|
||||
if str(f.get("source") or "").strip().lower() == "original"
|
||||
]
|
||||
pool = originals if originals else candidates
|
||||
|
||||
pool = [f for f in pool if str(f.get("name") or "").strip()]
|
||||
@@ -330,7 +343,8 @@ class InternetArchive(Provider):
|
||||
mt = str(mediatype or "").strip().lower()
|
||||
if mt in {"texts"}:
|
||||
return "book"
|
||||
if mt in {"audio", "etree"}:
|
||||
if mt in {"audio",
|
||||
"etree"}:
|
||||
return "audio"
|
||||
if mt in {"movies"}:
|
||||
return "video"
|
||||
@@ -342,7 +356,8 @@ class InternetArchive(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**_kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
ia = _ia()
|
||||
@@ -355,7 +370,8 @@ class InternetArchive(Provider):
|
||||
return []
|
||||
|
||||
# If the user supplied a plain string, default to title search.
|
||||
if not _looks_fielded_query(q) and q not in {"*", "*.*"}:
|
||||
if not _looks_fielded_query(q) and q not in {"*",
|
||||
"*.*"}:
|
||||
q = f'title:("{q}")'
|
||||
|
||||
fields = [
|
||||
@@ -419,10 +435,14 @@ class InternetArchive(Provider):
|
||||
size_bytes=None,
|
||||
tag=set(),
|
||||
columns=[
|
||||
("title", title),
|
||||
("mediatype", mediatype),
|
||||
("date", date),
|
||||
("creator", creator),
|
||||
("title",
|
||||
title),
|
||||
("mediatype",
|
||||
mediatype),
|
||||
("date",
|
||||
date),
|
||||
("creator",
|
||||
creator),
|
||||
],
|
||||
full_metadata=dict(row),
|
||||
)
|
||||
@@ -437,7 +457,12 @@ class InternetArchive(Provider):
|
||||
- https://archive.org/details/<identifier>
|
||||
- https://archive.org/download/<identifier>/<filename>
|
||||
"""
|
||||
sr = SearchResult(table="internetarchive", title=str(url), path=str(url), full_metadata={})
|
||||
sr = SearchResult(
|
||||
table="internetarchive",
|
||||
title=str(url),
|
||||
path=str(url),
|
||||
full_metadata={}
|
||||
)
|
||||
return self.download(sr, output_dir)
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
@@ -449,7 +474,11 @@ class InternetArchive(Provider):
|
||||
if not callable(download_fn):
|
||||
raise Exception("internetarchive.download is not available")
|
||||
|
||||
identifier = _extract_identifier_from_any(str(getattr(result, "path", "") or ""))
|
||||
identifier = _extract_identifier_from_any(
|
||||
str(getattr(result,
|
||||
"path",
|
||||
"") or "")
|
||||
)
|
||||
if not identifier:
|
||||
return None
|
||||
|
||||
@@ -490,9 +519,15 @@ class InternetArchive(Provider):
|
||||
files.append(
|
||||
{
|
||||
"name": str(name),
|
||||
"size": getattr(f, "size", None),
|
||||
"format": getattr(f, "format", None),
|
||||
"source": getattr(f, "source", None),
|
||||
"size": getattr(f,
|
||||
"size",
|
||||
None),
|
||||
"format": getattr(f,
|
||||
"format",
|
||||
None),
|
||||
"source": getattr(f,
|
||||
"source",
|
||||
None),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
@@ -616,7 +651,8 @@ class InternetArchive(Provider):
|
||||
if not identifier:
|
||||
raise Exception("Could not determine Internet Archive identifier")
|
||||
|
||||
meta: Dict[str, Any] = {}
|
||||
meta: Dict[str,
|
||||
Any] = {}
|
||||
if title:
|
||||
meta["title"] = title
|
||||
else:
|
||||
@@ -628,7 +664,10 @@ class InternetArchive(Provider):
|
||||
meta["mediatype"] = self._mediatype.strip()
|
||||
|
||||
# Build upload options; credentials are optional if the user has internetarchive configured globally.
|
||||
upload_kwargs: Dict[str, Any] = {"metadata": meta}
|
||||
upload_kwargs: Dict[str,
|
||||
Any] = {
|
||||
"metadata": meta
|
||||
}
|
||||
ak = os.getenv("IA_ACCESS_KEY") or self._access_key
|
||||
sk = os.getenv("IA_SECRET_KEY") or self._secret_key
|
||||
if isinstance(ak, str) and ak.strip():
|
||||
@@ -638,7 +677,9 @@ class InternetArchive(Provider):
|
||||
|
||||
# Use a friendly uploaded filename.
|
||||
upload_name = sanitize_filename(p.name)
|
||||
files = {upload_name: str(p)}
|
||||
files = {
|
||||
upload_name: str(p)
|
||||
}
|
||||
|
||||
try:
|
||||
resp: Any = upload_fn(identifier, files=files, **upload_kwargs)
|
||||
@@ -664,9 +705,11 @@ class InternetArchive(Provider):
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
|
||||
pipe_obj, item_url
|
||||
)
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(pipe_obj,
|
||||
item_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@ from ProviderCore.download import sanitize_filename
|
||||
from SYS.logger import log
|
||||
from models import ProgressBar
|
||||
|
||||
|
||||
# Optional dependency for HTML scraping fallbacks
|
||||
try:
|
||||
from lxml import html as lxml_html
|
||||
@@ -111,9 +110,7 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
|
||||
score = 0
|
||||
for ln in lines:
|
||||
lo = ln.lower()
|
||||
if ":" in ln and any(
|
||||
k in lo
|
||||
for k in (
|
||||
if ":" in ln and any(k in lo for k in (
|
||||
"title",
|
||||
"author",
|
||||
"publisher",
|
||||
@@ -121,9 +118,7 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
|
||||
"isbn",
|
||||
"language",
|
||||
"series",
|
||||
"tags",
|
||||
)
|
||||
):
|
||||
"tags", )):
|
||||
score += 1
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
@@ -133,15 +128,20 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
|
||||
if not best_lines:
|
||||
best_lines = _strip_html_to_lines(s)
|
||||
|
||||
raw_fields: Dict[str, str] = {}
|
||||
raw_fields: Dict[str,
|
||||
str] = {}
|
||||
pending_key: Optional[str] = None
|
||||
|
||||
def _norm_key(k: str) -> str:
|
||||
kk = str(k or "").strip().lower()
|
||||
kk = re.sub(r"\s+", " ", kk)
|
||||
if kk in {"authors", "author(s)", "author(s).", "author(s):"}:
|
||||
if kk in {"authors",
|
||||
"author(s)",
|
||||
"author(s).",
|
||||
"author(s):"}:
|
||||
return "author"
|
||||
if kk in {"tag", "tags"}:
|
||||
if kk in {"tag",
|
||||
"tags"}:
|
||||
return "tags"
|
||||
return kk
|
||||
|
||||
@@ -166,7 +166,10 @@ def _parse_libgen_ads_tags_html(html: str) -> Dict[str, Any]:
|
||||
raw_fields[pending_key] = line
|
||||
pending_key = None
|
||||
|
||||
out: Dict[str, Any] = {"_raw_fields": dict(raw_fields)}
|
||||
out: Dict[str,
|
||||
Any] = {
|
||||
"_raw_fields": dict(raw_fields)
|
||||
}
|
||||
|
||||
title = str(raw_fields.get("title") or "").strip()
|
||||
if title:
|
||||
@@ -272,9 +275,11 @@ def _prefer_isbn(isbns: List[str]) -> str:
|
||||
return vals[0] if vals else ""
|
||||
|
||||
|
||||
def _enrich_book_tags_from_isbn(
|
||||
isbn: str, *, config: Optional[Dict[str, Any]] = None
|
||||
) -> Tuple[List[str], str]:
|
||||
def _enrich_book_tags_from_isbn(isbn: str,
|
||||
*,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Tuple[List[str],
|
||||
str]:
|
||||
"""Return (tags, source_name) for the given ISBN.
|
||||
|
||||
Priority:
|
||||
@@ -378,7 +383,8 @@ def _enrich_book_tags_from_isbn(
|
||||
try:
|
||||
from Provider.metadata_provider import get_metadata_provider
|
||||
|
||||
provider = get_metadata_provider("isbnsearch", config or {})
|
||||
provider = get_metadata_provider("isbnsearch",
|
||||
config or {})
|
||||
if provider is None:
|
||||
return [], ""
|
||||
items = provider.search(isbn_clean, limit=1)
|
||||
@@ -393,7 +399,10 @@ def _enrich_book_tags_from_isbn(
|
||||
|
||||
|
||||
def _fetch_libgen_details_html(
|
||||
url: str, *, timeout: Optional[Tuple[float, float]] = None
|
||||
url: str,
|
||||
*,
|
||||
timeout: Optional[Tuple[float,
|
||||
float]] = None
|
||||
) -> Optional[str]:
|
||||
try:
|
||||
if timeout is None:
|
||||
@@ -401,7 +410,8 @@ def _fetch_libgen_details_html(
|
||||
session = requests.Session()
|
||||
session.headers.update(
|
||||
{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
|
||||
}
|
||||
)
|
||||
with session.get(str(url), stream=True, timeout=timeout) as resp:
|
||||
@@ -420,14 +430,16 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
|
||||
Best-effort and intentionally tolerant of mirror variations.
|
||||
"""
|
||||
|
||||
out: Dict[str, Any] = {}
|
||||
raw_fields: Dict[str, str] = {}
|
||||
out: Dict[str,
|
||||
Any] = {}
|
||||
raw_fields: Dict[str,
|
||||
str] = {}
|
||||
s = str(html or "")
|
||||
|
||||
# Fast path: try to pull simple Label/Value table rows.
|
||||
for m in re.finditer(
|
||||
r"(?is)<tr\b[^>]*>\s*<t[dh]\b[^>]*>\s*([^<]{1,80}?)\s*:??\s*</t[dh]>\s*<t[dh]\b[^>]*>(.*?)</t[dh]>\s*</tr>",
|
||||
s,
|
||||
r"(?is)<tr\b[^>]*>\s*<t[dh]\b[^>]*>\s*([^<]{1,80}?)\s*:??\s*</t[dh]>\s*<t[dh]\b[^>]*>(.*?)</t[dh]>\s*</tr>",
|
||||
s,
|
||||
):
|
||||
label = _strip_html_to_text(m.group(1))
|
||||
raw_val_html = str(m.group(2) or "")
|
||||
@@ -467,7 +479,8 @@ def _parse_libgen_details_html(html: str) -> Dict[str, Any]:
|
||||
|
||||
chunk_start = m.end()
|
||||
chunk_end = (
|
||||
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(s)
|
||||
strong_matches[idx + 1].start() if
|
||||
(idx + 1) < len(strong_matches) else len(s)
|
||||
)
|
||||
raw_val_html = s[chunk_start:chunk_end]
|
||||
|
||||
@@ -619,17 +632,17 @@ def _libgen_metadata_to_tags(meta: Dict[str, Any]) -> List[str]:
|
||||
for k, v in raw_fields.items():
|
||||
lk = str(k or "").strip().lower()
|
||||
if lk in {
|
||||
"title",
|
||||
"author(s)",
|
||||
"authors",
|
||||
"author",
|
||||
"publisher",
|
||||
"year",
|
||||
"isbn",
|
||||
"language",
|
||||
"oclc/worldcat",
|
||||
"tags",
|
||||
"edition id",
|
||||
"title",
|
||||
"author(s)",
|
||||
"authors",
|
||||
"author",
|
||||
"publisher",
|
||||
"year",
|
||||
"isbn",
|
||||
"language",
|
||||
"oclc/worldcat",
|
||||
"tags",
|
||||
"edition id",
|
||||
}:
|
||||
continue
|
||||
vv = str(v or "").strip()
|
||||
@@ -658,13 +671,14 @@ class Libgen(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
from cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.logger import is_debug_enabled
|
||||
|
||||
parsed = parse_query(query)
|
||||
@@ -701,10 +715,14 @@ class Libgen(Provider):
|
||||
mirror_url = book.get("mirror_url", "")
|
||||
|
||||
columns = [
|
||||
("Title", title),
|
||||
("Author", author),
|
||||
("Pages", str(pages)),
|
||||
("Ext", str(extension)),
|
||||
("Title",
|
||||
title),
|
||||
("Author",
|
||||
author),
|
||||
("Pages",
|
||||
str(pages)),
|
||||
("Ext",
|
||||
str(extension)),
|
||||
]
|
||||
|
||||
detail = f"By: {author}"
|
||||
@@ -732,8 +750,10 @@ class Libgen(Provider):
|
||||
"filesize": filesize,
|
||||
"pages": pages,
|
||||
"extension": extension,
|
||||
"book_id": book.get("book_id", ""),
|
||||
"md5": book.get("md5", ""),
|
||||
"book_id": book.get("book_id",
|
||||
""),
|
||||
"md5": book.get("md5",
|
||||
""),
|
||||
},
|
||||
)
|
||||
)
|
||||
@@ -786,12 +806,9 @@ class Libgen(Provider):
|
||||
title = ""
|
||||
|
||||
base_name = sanitize_filename(
|
||||
title
|
||||
or md5
|
||||
or (
|
||||
title or md5 or (
|
||||
f"libgen_{_libgen_id_from_url(target)}"
|
||||
if _libgen_id_from_url(target)
|
||||
else "libgen"
|
||||
if _libgen_id_from_url(target) else "libgen"
|
||||
)
|
||||
)
|
||||
out_path = output_dir / base_name
|
||||
@@ -819,9 +836,12 @@ class Libgen(Provider):
|
||||
if now - last_progress_time[0] < 0.5:
|
||||
return
|
||||
|
||||
total = int(content_length) if content_length and content_length > 0 else None
|
||||
total = int(
|
||||
content_length
|
||||
) if content_length and content_length > 0 else None
|
||||
downloaded = (
|
||||
int(bytes_downloaded) if bytes_downloaded and bytes_downloaded > 0 else 0
|
||||
int(bytes_downloaded)
|
||||
if bytes_downloaded and bytes_downloaded > 0 else 0
|
||||
)
|
||||
elapsed = max(0.001, now - start_time)
|
||||
speed = downloaded / elapsed
|
||||
@@ -850,15 +870,19 @@ class Libgen(Provider):
|
||||
# enrichment (OpenLibrary/isbnsearch) unless the user later chooses to.
|
||||
if ("/ads.php" in low) or ("/get.php" in low):
|
||||
ads_url = (
|
||||
target if "/ads.php" in low else _libgen_ads_url_for_target(target)
|
||||
target if "/ads.php" in low else
|
||||
_libgen_ads_url_for_target(target)
|
||||
)
|
||||
if ads_url:
|
||||
html = _fetch_libgen_details_html(
|
||||
ads_url, timeout=(DEFAULT_CONNECT_TIMEOUT, 4.0)
|
||||
ads_url,
|
||||
timeout=(DEFAULT_CONNECT_TIMEOUT,
|
||||
4.0)
|
||||
)
|
||||
if html:
|
||||
meta = _parse_libgen_ads_tags_html(html)
|
||||
extracted_title = str(meta.get("title") or "").strip()
|
||||
extracted_title = str(meta.get("title")
|
||||
or "").strip()
|
||||
if extracted_title:
|
||||
md["title"] = extracted_title
|
||||
result.tag.add(f"title:{extracted_title}")
|
||||
@@ -867,8 +891,8 @@ class Libgen(Provider):
|
||||
|
||||
authors = (
|
||||
meta.get("authors")
|
||||
if isinstance(meta.get("authors"), list)
|
||||
else []
|
||||
if isinstance(meta.get("authors"),
|
||||
list) else []
|
||||
)
|
||||
for a in authors or []:
|
||||
aa = str(a or "").strip()
|
||||
@@ -892,11 +916,12 @@ class Libgen(Provider):
|
||||
|
||||
isbns = (
|
||||
meta.get("isbn")
|
||||
if isinstance(meta.get("isbn"), list)
|
||||
else []
|
||||
if isinstance(meta.get("isbn"),
|
||||
list) else []
|
||||
)
|
||||
isbns = [
|
||||
str(x).strip() for x in (isbns or []) if str(x).strip()
|
||||
str(x).strip() for x in (isbns or [])
|
||||
if str(x).strip()
|
||||
]
|
||||
if isbns:
|
||||
md["isbn"] = isbns
|
||||
@@ -905,8 +930,8 @@ class Libgen(Provider):
|
||||
|
||||
free_tags = (
|
||||
meta.get("tags")
|
||||
if isinstance(meta.get("tags"), list)
|
||||
else []
|
||||
if isinstance(meta.get("tags"),
|
||||
list) else []
|
||||
)
|
||||
for t in free_tags or []:
|
||||
tt = str(t or "").strip()
|
||||
@@ -919,29 +944,28 @@ class Libgen(Provider):
|
||||
for k, v in raw_fields.items():
|
||||
lk = str(k or "").strip().lower()
|
||||
if lk in {
|
||||
"title",
|
||||
"author",
|
||||
"authors",
|
||||
"publisher",
|
||||
"year",
|
||||
"isbn",
|
||||
"language",
|
||||
"tags",
|
||||
"title",
|
||||
"author",
|
||||
"authors",
|
||||
"publisher",
|
||||
"year",
|
||||
"isbn",
|
||||
"language",
|
||||
"tags",
|
||||
}:
|
||||
continue
|
||||
vv = str(v or "").strip()
|
||||
if not vv:
|
||||
continue
|
||||
ns = re.sub(r"[^a-z0-9]+", "_", lk).strip("_")
|
||||
ns = re.sub(r"[^a-z0-9]+",
|
||||
"_",
|
||||
lk).strip("_")
|
||||
if ns:
|
||||
result.tag.add(f"libgen_{ns}:{vv}")
|
||||
|
||||
# Legacy: edition/file/series details pages (title + ISBN) + external enrichment.
|
||||
if (
|
||||
("/edition.php" in low)
|
||||
or ("/file.php" in low)
|
||||
or ("/series.php" in low)
|
||||
):
|
||||
if (("/edition.php" in low) or ("/file.php" in low)
|
||||
or ("/series.php" in low)):
|
||||
html = _fetch_libgen_details_html(target)
|
||||
if html:
|
||||
meta = _parse_libgen_details_html(html)
|
||||
@@ -953,11 +977,12 @@ class Libgen(Provider):
|
||||
|
||||
extracted_title = str(meta.get("title") or "").strip()
|
||||
extracted_isbns = (
|
||||
meta.get("isbn") if isinstance(meta.get("isbn"), list) else []
|
||||
meta.get("isbn")
|
||||
if isinstance(meta.get("isbn"),
|
||||
list) else []
|
||||
)
|
||||
extracted_isbns = [
|
||||
str(x).strip()
|
||||
for x in (extracted_isbns or [])
|
||||
str(x).strip() for x in (extracted_isbns or [])
|
||||
if str(x).strip()
|
||||
]
|
||||
|
||||
@@ -967,7 +992,9 @@ class Libgen(Provider):
|
||||
if extracted_isbns:
|
||||
md["isbn"] = extracted_isbns
|
||||
for isbn_val in extracted_isbns:
|
||||
isbn_norm = str(isbn_val).strip().replace("-", "")
|
||||
isbn_norm = str(isbn_val
|
||||
).strip().replace("-",
|
||||
"")
|
||||
if isbn_norm:
|
||||
result.tag.add(f"isbn:{isbn_norm}")
|
||||
if meta.get("edition_id"):
|
||||
@@ -987,7 +1014,8 @@ class Libgen(Provider):
|
||||
if enriched_source:
|
||||
md["metadata_enriched_from"] = enriched_source
|
||||
|
||||
if extracted_title and ((not title) or title.startswith("http")):
|
||||
if extracted_title and ((not title)
|
||||
or title.startswith("http")):
|
||||
title = extracted_title
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1041,7 +1069,8 @@ class LibgenSearch:
|
||||
self.session = session or requests.Session()
|
||||
self.session.headers.update(
|
||||
{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1052,7 +1081,8 @@ class LibgenSearch:
|
||||
limit: int,
|
||||
*,
|
||||
timeout: Any = DEFAULT_TIMEOUT,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Search libgen.rs/is/st JSON API when available.
|
||||
|
||||
Many LibGen mirrors expose /json.php which is less brittle than scraping.
|
||||
@@ -1060,7 +1090,9 @@ class LibgenSearch:
|
||||
url = f"{mirror}/json.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": max(1, min(100, int(limit) if limit else 50)),
|
||||
"res": max(1,
|
||||
min(100,
|
||||
int(limit) if limit else 50)),
|
||||
"column": "def",
|
||||
"phrase": 1,
|
||||
}
|
||||
@@ -1086,7 +1118,8 @@ class LibgenSearch:
|
||||
pages = item.get("Pages") or item.get("pages") or ""
|
||||
language = item.get("Language") or item.get("language") or ""
|
||||
size = item.get("Size") or item.get("size") or item.get("filesize") or ""
|
||||
extension = item.get("Extension") or item.get("extension") or item.get("ext") or ""
|
||||
extension = item.get("Extension") or item.get("extension"
|
||||
) or item.get("ext") or ""
|
||||
md5 = item.get("MD5") or item.get("md5") or ""
|
||||
|
||||
download_link = f"http://library.lol/main/{md5}" if md5 else ""
|
||||
@@ -1121,7 +1154,8 @@ class LibgenSearch:
|
||||
total_timeout: float = DEFAULT_SEARCH_TOTAL_TIMEOUT,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Search LibGen mirrors.
|
||||
|
||||
Uses a total time budget across mirrors to avoid long hangs.
|
||||
@@ -1135,7 +1169,10 @@ class LibgenSearch:
|
||||
elapsed = time.monotonic() - started
|
||||
remaining = total_timeout - elapsed
|
||||
if remaining <= 0:
|
||||
_call(log_error, f"[libgen] Search timed out after {total_timeout:.0f}s")
|
||||
_call(
|
||||
log_error,
|
||||
f"[libgen] Search timed out after {total_timeout:.0f}s"
|
||||
)
|
||||
break
|
||||
|
||||
# Bound each request so we can try multiple mirrors within the budget.
|
||||
@@ -1152,7 +1189,10 @@ class LibgenSearch:
|
||||
results: List[Dict[str, Any]] = []
|
||||
try:
|
||||
results = self._search_libgen_json(
|
||||
mirror, query, limit, timeout=request_timeout
|
||||
mirror,
|
||||
query,
|
||||
limit,
|
||||
timeout=request_timeout
|
||||
)
|
||||
except Exception:
|
||||
results = []
|
||||
@@ -1163,11 +1203,17 @@ class LibgenSearch:
|
||||
|
||||
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||
results = self._search_libgen_li(
|
||||
mirror, query, limit, timeout=request_timeout
|
||||
mirror,
|
||||
query,
|
||||
limit,
|
||||
timeout=request_timeout
|
||||
)
|
||||
else:
|
||||
results = self._search_libgen_rs(
|
||||
mirror, query, limit, timeout=request_timeout
|
||||
mirror,
|
||||
query,
|
||||
limit,
|
||||
timeout=request_timeout
|
||||
)
|
||||
|
||||
if results:
|
||||
@@ -1192,7 +1238,8 @@ class LibgenSearch:
|
||||
limit: int,
|
||||
*,
|
||||
timeout: Any = DEFAULT_TIMEOUT,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Search libgen.rs/is/st style mirrors."""
|
||||
url = f"{mirror}/search.php"
|
||||
params = {
|
||||
@@ -1211,7 +1258,8 @@ class LibgenSearch:
|
||||
return []
|
||||
|
||||
def _text(el: Any) -> str:
|
||||
return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
|
||||
return " ".join([t.strip() for t in el.itertext()
|
||||
if t and str(t).strip()]).strip()
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(resp.content)
|
||||
@@ -1314,13 +1362,16 @@ class LibgenSearch:
|
||||
limit: int,
|
||||
*,
|
||||
timeout: Any = DEFAULT_TIMEOUT,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Search libgen.li/gl style mirrors."""
|
||||
url = f"{mirror}/index.php"
|
||||
params = {
|
||||
"req": query,
|
||||
# Keep the request lightweight; covers slow the HTML response.
|
||||
"res": max(1, min(100, int(limit) if limit else 50)),
|
||||
"res": max(1,
|
||||
min(100,
|
||||
int(limit) if limit else 50)),
|
||||
"covers": "off",
|
||||
"filesuns": "all",
|
||||
}
|
||||
@@ -1332,7 +1383,8 @@ class LibgenSearch:
|
||||
return []
|
||||
|
||||
def _text(el: Any) -> str:
|
||||
return " ".join([t.strip() for t in el.itertext() if t and str(t).strip()]).strip()
|
||||
return " ".join([t.strip() for t in el.itertext()
|
||||
if t and str(t).strip()]).strip()
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(resp.content)
|
||||
@@ -1414,7 +1466,10 @@ class LibgenSearch:
|
||||
|
||||
# Extract ISBNs from meta cell (avoid using them as title)
|
||||
# Matches 10 or 13-digit ISBN with optional leading 978/979.
|
||||
isbn_candidates = re.findall(r"\b(?:97[89])?\d{9}[\dXx]\b", meta_text)
|
||||
isbn_candidates = re.findall(
|
||||
r"\b(?:97[89])?\d{9}[\dXx]\b",
|
||||
meta_text
|
||||
)
|
||||
if isbn_candidates:
|
||||
seen: List[str] = []
|
||||
for s in isbn_candidates:
|
||||
@@ -1453,7 +1508,8 @@ class LibgenSearch:
|
||||
best_score: Optional[tuple] = None
|
||||
for cand in deduped:
|
||||
low = cand.lower().strip()
|
||||
if low in {"cover", "edition"}:
|
||||
if low in {"cover",
|
||||
"edition"}:
|
||||
continue
|
||||
if _looks_like_isbn_blob(cand):
|
||||
continue
|
||||
@@ -1527,7 +1583,8 @@ def search_libgen(
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Search Libgen using the robust scraper."""
|
||||
searcher = LibgenSearch(session=session)
|
||||
try:
|
||||
@@ -1572,7 +1629,9 @@ def _resolve_download_url(
|
||||
|
||||
# Handle edition -> file links.
|
||||
m = re.search(
|
||||
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
|
||||
r'href=["\']([^"\']*file\.php\?id=\d+[^"\']*)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
href = str(m.group(1) or "").strip()
|
||||
@@ -1581,7 +1640,9 @@ def _resolve_download_url(
|
||||
|
||||
# Handle series -> edition links.
|
||||
m = re.search(
|
||||
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']', html, flags=re.IGNORECASE
|
||||
r'href=["\']([^"\']*edition\.php\?id=\d+[^"\']*)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
href = str(m.group(1) or "").strip()
|
||||
@@ -1611,7 +1672,11 @@ def _resolve_download_url(
|
||||
return urljoin(base_url, href)
|
||||
|
||||
# Next: library.lol main links.
|
||||
m = re.search(r'href=["\']([^"\']*library\.lol[^"\']*)["\']', html, flags=re.IGNORECASE)
|
||||
m = re.search(
|
||||
r'href=["\']([^"\']*library\.lol[^"\']*)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
href = str(m.group(1) or "").strip()
|
||||
if href and not href.lower().startswith("javascript:"):
|
||||
@@ -1632,7 +1697,8 @@ def _resolve_download_url(
|
||||
|
||||
def _find_href_by_text(doc: Any, pattern: str) -> Optional[str]:
|
||||
for a in doc.xpath("//a[@href]"):
|
||||
t = " ".join([s.strip() for s in a.itertext() if s and str(s).strip()]).strip()
|
||||
t = " ".join([s.strip() for s in a.itertext()
|
||||
if s and str(s).strip()]).strip()
|
||||
if t and re.search(pattern, t, re.IGNORECASE):
|
||||
href = str(a.get("href") or "").strip()
|
||||
if href and not href.lower().startswith("javascript:"):
|
||||
@@ -1646,9 +1712,13 @@ def _resolve_download_url(
|
||||
|
||||
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||
|
||||
if current_url.lower().endswith(
|
||||
(".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")
|
||||
):
|
||||
if current_url.lower().endswith((".pdf",
|
||||
".epub",
|
||||
".mobi",
|
||||
".djvu",
|
||||
".azw3",
|
||||
".cbz",
|
||||
".cbr")):
|
||||
return current_url
|
||||
|
||||
try:
|
||||
@@ -1676,7 +1746,10 @@ def _resolve_download_url(
|
||||
if next_url:
|
||||
current_url = next_url
|
||||
continue
|
||||
_call(log_info, "[resolve] lxml not available and regex resolver found no links")
|
||||
_call(
|
||||
log_info,
|
||||
"[resolve] lxml not available and regex resolver found no links"
|
||||
)
|
||||
return None
|
||||
|
||||
get_href = _find_href_by_text(doc, r"^GET$")
|
||||
@@ -1722,12 +1795,16 @@ def _resolve_download_url(
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
def _guess_filename_extension(download_url: str,
|
||||
headers: Dict[str,
|
||||
str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(
|
||||
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE
|
||||
r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)",
|
||||
content_disposition,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
@@ -1787,8 +1864,11 @@ def download_from_mirror(
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
progress_callback: Optional[Callable[[int,
|
||||
int],
|
||||
None]] = None,
|
||||
) -> Tuple[bool,
|
||||
Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
@@ -1807,7 +1887,8 @@ def download_from_mirror(
|
||||
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
headers: Dict[str,
|
||||
str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from API.loc import LOCClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from cli_syntax import get_free_text, parse_query
|
||||
from SYS.cli_syntax import get_free_text, parse_query
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
@@ -23,13 +23,16 @@ class LOC(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
_ = kwargs
|
||||
parsed = parse_query(query or "")
|
||||
text = get_free_text(parsed).strip()
|
||||
fields = parsed.get("fields", {}) if isinstance(parsed, dict) else {}
|
||||
fields = parsed.get("fields",
|
||||
{}) if isinstance(parsed,
|
||||
dict) else {}
|
||||
|
||||
# Allow explicit q: override.
|
||||
q = str(fields.get("q") or text or "").strip()
|
||||
@@ -37,7 +40,8 @@ class LOC(Provider):
|
||||
return []
|
||||
|
||||
# Pass through any extra filters supported by the LoC API.
|
||||
extra: Dict[str, Any] = {}
|
||||
extra: Dict[str,
|
||||
Any] = {}
|
||||
if isinstance(filters, dict):
|
||||
extra.update(filters)
|
||||
if isinstance(fields, dict):
|
||||
@@ -57,7 +61,10 @@ class LOC(Provider):
|
||||
|
||||
while len(results) < max(0, int(limit)):
|
||||
payload = client.search_chronicling_america(
|
||||
q, start=start, count=page_size, extra_params=extra
|
||||
q,
|
||||
start=start,
|
||||
count=page_size,
|
||||
extra_params=extra
|
||||
)
|
||||
items = payload.get("results")
|
||||
if not isinstance(items, list) or not items:
|
||||
@@ -108,10 +115,14 @@ class LOC(Provider):
|
||||
annotations=annotations,
|
||||
media_kind="document",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Date", date),
|
||||
("Format", fmt_text),
|
||||
("URL", url),
|
||||
("Title",
|
||||
title),
|
||||
("Date",
|
||||
date),
|
||||
("Format",
|
||||
fmt_text),
|
||||
("URL",
|
||||
url),
|
||||
],
|
||||
full_metadata=it,
|
||||
)
|
||||
|
||||
@@ -11,8 +11,9 @@ import requests
|
||||
|
||||
from ProviderCore.base import Provider
|
||||
|
||||
|
||||
_MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
|
||||
_MATRIX_INIT_CHECK_CACHE: Dict[str,
|
||||
Tuple[bool,
|
||||
Optional[str]]] = {}
|
||||
|
||||
|
||||
def _sniff_mime_from_header(path: Path) -> Optional[str]:
|
||||
@@ -79,9 +80,10 @@ def _sniff_mime_from_header(path: Path) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _classify_matrix_upload(
|
||||
path: Path, *, explicit_mime_type: Optional[str] = None
|
||||
) -> Tuple[str, str]:
|
||||
def _classify_matrix_upload(path: Path,
|
||||
*,
|
||||
explicit_mime_type: Optional[str] = None) -> Tuple[str,
|
||||
str]:
|
||||
"""Return (mime_type, msgtype) for Matrix uploads."""
|
||||
mime_type = str(explicit_mime_type or "").strip() or None
|
||||
|
||||
@@ -94,9 +96,11 @@ def _classify_matrix_upload(
|
||||
|
||||
# Refinements based on extension for ambiguous containers.
|
||||
ext = path.suffix.lower()
|
||||
if ext in {".m4a", ".aac"}:
|
||||
if ext in {".m4a",
|
||||
".aac"}:
|
||||
mime_type = mime_type or "audio/mp4"
|
||||
if ext in {".mkv", ".webm"}:
|
||||
if ext in {".mkv",
|
||||
".webm"}:
|
||||
mime_type = mime_type or "video/x-matroska"
|
||||
if ext in {".ogv"}:
|
||||
mime_type = mime_type or "video/ogg"
|
||||
@@ -142,7 +146,13 @@ def _classify_matrix_upload(
|
||||
".3gp",
|
||||
".ogv",
|
||||
}
|
||||
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
|
||||
image_exts = {".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".tiff"}
|
||||
if ext in audio_exts:
|
||||
msgtype = "m.audio"
|
||||
elif ext in video_exts:
|
||||
@@ -162,9 +172,10 @@ def _normalize_homeserver(value: str) -> str:
|
||||
return text.rstrip("/")
|
||||
|
||||
|
||||
def _matrix_health_check(
|
||||
*, homeserver: str, access_token: Optional[str]
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
def _matrix_health_check(*,
|
||||
homeserver: str,
|
||||
access_token: Optional[str]) -> Tuple[bool,
|
||||
Optional[str]]:
|
||||
"""Lightweight Matrix reachability/auth validation.
|
||||
|
||||
- Always checks `/versions` (no auth).
|
||||
@@ -180,9 +191,13 @@ def _matrix_health_check(
|
||||
return False, f"Homeserver returned {resp.status_code}"
|
||||
|
||||
if access_token:
|
||||
headers = {"Authorization": f"Bearer {access_token}"}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}"
|
||||
}
|
||||
resp = requests.get(
|
||||
f"{base}/_matrix/client/v3/account/whoami", headers=headers, timeout=5
|
||||
f"{base}/_matrix/client/v3/account/whoami",
|
||||
headers=headers,
|
||||
timeout=5
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return False, f"Authentication failed: {resp.status_code}"
|
||||
@@ -201,9 +216,10 @@ class Matrix(Provider):
|
||||
self._init_reason: Optional[str] = None
|
||||
|
||||
matrix_conf = (
|
||||
self.config.get("provider", {}).get("matrix", {})
|
||||
if isinstance(self.config, dict)
|
||||
else {}
|
||||
self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{}) if isinstance(self.config,
|
||||
dict) else {}
|
||||
)
|
||||
homeserver = matrix_conf.get("homeserver")
|
||||
access_token = matrix_conf.get("access_token")
|
||||
@@ -237,14 +253,18 @@ class Matrix(Provider):
|
||||
return False
|
||||
if self._init_ok is False:
|
||||
return False
|
||||
matrix_conf = self.config.get("provider", {}).get("matrix", {})
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
return bool(
|
||||
matrix_conf.get("homeserver")
|
||||
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
|
||||
)
|
||||
|
||||
def _get_homeserver_and_token(self) -> Tuple[str, str]:
|
||||
matrix_conf = self.config.get("provider", {}).get("matrix", {})
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
homeserver = matrix_conf.get("homeserver")
|
||||
access_token = matrix_conf.get("access_token")
|
||||
if not homeserver:
|
||||
@@ -262,8 +282,14 @@ class Matrix(Provider):
|
||||
Uses `GET /_matrix/client/v3/joined_rooms`.
|
||||
"""
|
||||
base, token = self._get_homeserver_and_token()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
resp = requests.get(f"{base}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
resp = requests.get(
|
||||
f"{base}/_matrix/client/v3/joined_rooms",
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
|
||||
data = resp.json() or {}
|
||||
@@ -275,18 +301,24 @@ class Matrix(Provider):
|
||||
out.append(rid.strip())
|
||||
return out
|
||||
|
||||
def list_rooms(self, *, room_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
||||
def list_rooms(self,
|
||||
*,
|
||||
room_ids: Optional[List[str]] = None) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Return joined rooms, optionally limited to a subset.
|
||||
|
||||
Performance note: room names require additional per-room HTTP requests.
|
||||
If `room_ids` is provided, only those rooms will have name lookups.
|
||||
"""
|
||||
base, token = self._get_homeserver_and_token()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
|
||||
joined = self.list_joined_room_ids()
|
||||
if room_ids:
|
||||
allowed = {str(v).strip().casefold() for v in room_ids if str(v).strip()}
|
||||
allowed = {str(v).strip().casefold()
|
||||
for v in room_ids if str(v).strip()}
|
||||
if allowed:
|
||||
# Accept either full IDs (!id:hs) or short IDs (!id).
|
||||
def _is_allowed(rid: str) -> bool:
|
||||
@@ -319,7 +351,10 @@ class Matrix(Provider):
|
||||
name = maybe
|
||||
except Exception:
|
||||
pass
|
||||
out.append({"room_id": room_id, "name": name})
|
||||
out.append({
|
||||
"room_id": room_id,
|
||||
"name": name
|
||||
})
|
||||
return out
|
||||
|
||||
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
|
||||
@@ -349,10 +384,17 @@ class Matrix(Provider):
|
||||
upload_url = f"{base}/_matrix/media/v3/upload"
|
||||
with open(path, "rb") as handle:
|
||||
wrapped = ProgressFileReader(
|
||||
handle, total_bytes=int(path.stat().st_size), label="upload"
|
||||
handle,
|
||||
total_bytes=int(path.stat().st_size),
|
||||
label="upload"
|
||||
)
|
||||
resp = requests.post(
|
||||
upload_url, headers=headers, data=wrapped, params={"filename": filename}
|
||||
upload_url,
|
||||
headers=headers,
|
||||
data=wrapped,
|
||||
params={
|
||||
"filename": filename
|
||||
}
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix upload failed: {resp.text}")
|
||||
@@ -366,7 +408,7 @@ class Matrix(Provider):
|
||||
try:
|
||||
curi = str(content_uri or "").strip()
|
||||
if curi.startswith("mxc://"):
|
||||
rest = curi[len("mxc://") :]
|
||||
rest = curi[len("mxc://"):]
|
||||
if "/" in rest:
|
||||
server_name, media_id = rest.split("/", 1)
|
||||
server_name = str(server_name).strip()
|
||||
@@ -376,14 +418,24 @@ class Matrix(Provider):
|
||||
except Exception:
|
||||
download_url_for_store = ""
|
||||
|
||||
info = {"mimetype": mime_type, "size": path.stat().st_size}
|
||||
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
|
||||
info = {
|
||||
"mimetype": mime_type,
|
||||
"size": path.stat().st_size
|
||||
}
|
||||
payload = {
|
||||
"msgtype": msgtype,
|
||||
"body": filename,
|
||||
"url": content_uri,
|
||||
"info": info
|
||||
}
|
||||
|
||||
# Correct Matrix client API send endpoint requires a transaction ID.
|
||||
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
|
||||
encoded_room = quote(str(room_id), safe="")
|
||||
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
|
||||
send_headers = {"Authorization": f"Bearer {token}"}
|
||||
send_headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
send_resp = requests.put(send_url, headers=send_headers, json=payload)
|
||||
if send_resp.status_code != 200:
|
||||
raise Exception(f"Matrix send message failed: {send_resp.text}")
|
||||
@@ -391,8 +443,7 @@ class Matrix(Provider):
|
||||
event_id = (send_resp.json() or {}).get("event_id")
|
||||
link = (
|
||||
f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
if event_id
|
||||
else f"https://matrix.to/#/{room_id}"
|
||||
if event_id else f"https://matrix.to/#/{room_id}"
|
||||
)
|
||||
|
||||
# Optional: if a PipeObject is provided and it already has store+hash,
|
||||
@@ -403,7 +454,10 @@ class Matrix(Provider):
|
||||
from Store import Store
|
||||
|
||||
# Prefer the direct media download URL for storage backends.
|
||||
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(
|
||||
pipe_obj,
|
||||
download_url_for_store or link,
|
||||
)
|
||||
@@ -424,8 +478,13 @@ class Matrix(Provider):
|
||||
encoded_room = quote(str(room_id), safe="")
|
||||
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
|
||||
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
|
||||
send_headers = {"Authorization": f"Bearer {token}"}
|
||||
payload = {"msgtype": "m.text", "body": message}
|
||||
send_headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
payload = {
|
||||
"msgtype": "m.text",
|
||||
"body": message
|
||||
}
|
||||
send_resp = requests.put(send_url, headers=send_headers, json=payload)
|
||||
if send_resp.status_code != 200:
|
||||
raise Exception(f"Matrix send text failed: {send_resp.text}")
|
||||
@@ -433,19 +492,25 @@ class Matrix(Provider):
|
||||
event_id = (send_resp.json() or {}).get("event_id")
|
||||
return (
|
||||
f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
if event_id
|
||||
else f"https://matrix.to/#/{room_id}"
|
||||
if event_id else f"https://matrix.to/#/{room_id}"
|
||||
)
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
matrix_conf = self.config.get("provider", {}).get("matrix", {})
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
room_id = matrix_conf.get("room_id")
|
||||
if not room_id:
|
||||
raise Exception("Matrix room_id missing")
|
||||
return self.upload_to_room(file_path, str(room_id))
|
||||
|
||||
def selector(
|
||||
self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any
|
||||
) -> bool:
|
||||
"""Handle Matrix room selection via `@N`.
|
||||
|
||||
@@ -501,7 +566,11 @@ class Matrix(Provider):
|
||||
print(f"Matrix upload file missing: {file_path}")
|
||||
continue
|
||||
|
||||
link = self.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
|
||||
link = self.upload_to_room(
|
||||
str(media_path),
|
||||
str(room_id),
|
||||
pipe_obj=pipe_obj
|
||||
)
|
||||
if link:
|
||||
print(link)
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ try: # Optional dependency
|
||||
except ImportError: # pragma: no cover - optional
|
||||
musicbrainzngs = None
|
||||
|
||||
|
||||
try: # Optional dependency
|
||||
import yt_dlp # type: ignore
|
||||
except ImportError: # pragma: no cover - optional
|
||||
@@ -62,9 +61,18 @@ class ITunesProvider(MetadataProvider):
|
||||
"""Metadata provider using the iTunes Search API."""
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
|
||||
params = {
|
||||
"term": query,
|
||||
"media": "music",
|
||||
"entity": "song",
|
||||
"limit": limit
|
||||
}
|
||||
try:
|
||||
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
|
||||
resp = requests.get(
|
||||
"https://itunes.apple.com/search",
|
||||
params=params,
|
||||
timeout=10
|
||||
)
|
||||
resp.raise_for_status()
|
||||
results = resp.json().get("results", [])
|
||||
except Exception as exc:
|
||||
@@ -77,7 +85,8 @@ class ITunesProvider(MetadataProvider):
|
||||
"title": r.get("trackName"),
|
||||
"artist": r.get("artistName"),
|
||||
"album": r.get("collectionName"),
|
||||
"year": str(r.get("releaseDate", ""))[:4],
|
||||
"year": str(r.get("releaseDate",
|
||||
""))[:4],
|
||||
"provider": self.name,
|
||||
"raw": r,
|
||||
}
|
||||
@@ -100,17 +109,22 @@ class OpenLibraryMetadataProvider(MetadataProvider):
|
||||
|
||||
try:
|
||||
# Prefer ISBN-specific search when the query looks like one
|
||||
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (
|
||||
10,
|
||||
13,
|
||||
):
|
||||
if query_clean.replace("-",
|
||||
"").isdigit() and len(query_clean.replace("-",
|
||||
"")) in (
|
||||
10,
|
||||
13,
|
||||
):
|
||||
q = f"isbn:{query_clean.replace('-', '')}"
|
||||
else:
|
||||
q = query_clean
|
||||
|
||||
resp = requests.get(
|
||||
"https://openlibrary.org/search.json",
|
||||
params={"q": q, "limit": limit},
|
||||
params={
|
||||
"q": q,
|
||||
"limit": limit
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
@@ -202,7 +216,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
|
||||
return []
|
||||
|
||||
# Prefer ISBN queries when possible
|
||||
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
|
||||
if query_clean.replace("-",
|
||||
"").isdigit() and len(query_clean.replace("-",
|
||||
"")) in (10,
|
||||
13):
|
||||
q = f"isbn:{query_clean.replace('-', '')}"
|
||||
else:
|
||||
q = query_clean
|
||||
@@ -210,7 +227,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://www.googleapis.com/books/v1/volumes",
|
||||
params={"q": q, "maxResults": limit},
|
||||
params={
|
||||
"q": q,
|
||||
"maxResults": limit
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
@@ -228,7 +248,10 @@ class GoogleBooksMetadataProvider(MetadataProvider):
|
||||
year = str(published_date)[:4] if published_date else ""
|
||||
|
||||
identifiers_raw = info.get("industryIdentifiers") or []
|
||||
identifiers: Dict[str, Optional[str]] = {"googlebooks": volume.get("id")}
|
||||
identifiers: Dict[str,
|
||||
Optional[str]] = {
|
||||
"googlebooks": volume.get("id")
|
||||
}
|
||||
for ident in identifiers_raw:
|
||||
if not isinstance(ident, dict):
|
||||
continue
|
||||
@@ -253,7 +276,8 @@ class GoogleBooksMetadataProvider(MetadataProvider):
|
||||
"authors": authors,
|
||||
"publisher": publisher,
|
||||
"identifiers": identifiers,
|
||||
"description": info.get("description", ""),
|
||||
"description": info.get("description",
|
||||
""),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -341,7 +365,8 @@ class ISBNsearchMetadataProvider(MetadataProvider):
|
||||
if m_title:
|
||||
title = self._strip_html_to_text(m_title.group(1))
|
||||
|
||||
raw_fields: Dict[str, str] = {}
|
||||
raw_fields: Dict[str,
|
||||
str] = {}
|
||||
strong_matches = list(re.finditer(r"(?is)<strong\b[^>]*>(.*?)</strong>", html))
|
||||
for idx, m in enumerate(strong_matches):
|
||||
label_raw = self._strip_html_to_text(m.group(1))
|
||||
@@ -354,13 +379,14 @@ class ISBNsearchMetadataProvider(MetadataProvider):
|
||||
chunk_start = m.end()
|
||||
# Stop at next <strong> or end of document.
|
||||
chunk_end = (
|
||||
strong_matches[idx + 1].start() if (idx + 1) < len(strong_matches) else len(html)
|
||||
strong_matches[idx + 1].start() if
|
||||
(idx + 1) < len(strong_matches) else len(html)
|
||||
)
|
||||
chunk = html[chunk_start:chunk_end]
|
||||
# Prefer stopping within the same paragraph when possible.
|
||||
m_end = re.search(r"(?is)(</p>|<br\s*/?>)", chunk)
|
||||
if m_end:
|
||||
chunk = chunk[: m_end.start()]
|
||||
chunk = chunk[:m_end.start()]
|
||||
|
||||
val_text = self._strip_html_to_text(chunk)
|
||||
if not val_text:
|
||||
@@ -391,7 +417,9 @@ class ISBNsearchMetadataProvider(MetadataProvider):
|
||||
authors: List[str] = []
|
||||
if author_text:
|
||||
# Split on common separators; keep multi-part names intact.
|
||||
for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*", author_text, flags=re.IGNORECASE):
|
||||
for part in re.split(r"\s*(?:,|;|\band\b|\&|\|)\s*",
|
||||
author_text,
|
||||
flags=re.IGNORECASE):
|
||||
p = str(part or "").strip()
|
||||
if p:
|
||||
authors.append(p)
|
||||
@@ -412,23 +440,28 @@ class ISBNsearchMetadataProvider(MetadataProvider):
|
||||
if t and t not in isbn_tokens:
|
||||
isbn_tokens.append(t)
|
||||
|
||||
item: Dict[str, Any] = {
|
||||
"title": title or "",
|
||||
# Keep UI columns compatible with the generic metadata table.
|
||||
"artist": ", ".join(authors) if authors else "",
|
||||
"album": publisher or "",
|
||||
"year": year or "",
|
||||
"provider": self.name,
|
||||
"authors": authors,
|
||||
"publisher": publisher or "",
|
||||
"language": language or "",
|
||||
"pages": pages or "",
|
||||
"identifiers": {
|
||||
"isbn_13": next((t for t in isbn_tokens if len(t) == 13), None),
|
||||
"isbn_10": next((t for t in isbn_tokens if len(t) == 10), None),
|
||||
},
|
||||
"raw_fields": raw_fields,
|
||||
}
|
||||
item: Dict[str,
|
||||
Any] = {
|
||||
"title": title or "",
|
||||
# Keep UI columns compatible with the generic metadata table.
|
||||
"artist": ", ".join(authors) if authors else "",
|
||||
"album": publisher or "",
|
||||
"year": year or "",
|
||||
"provider": self.name,
|
||||
"authors": authors,
|
||||
"publisher": publisher or "",
|
||||
"language": language or "",
|
||||
"pages": pages or "",
|
||||
"identifiers": {
|
||||
"isbn_13":
|
||||
next((t for t in isbn_tokens if len(t) == 13),
|
||||
None),
|
||||
"isbn_10":
|
||||
next((t for t in isbn_tokens if len(t) == 10),
|
||||
None),
|
||||
},
|
||||
"raw_fields": raw_fields,
|
||||
}
|
||||
|
||||
# Only return usable items.
|
||||
if not item.get("title") and not any(item["identifiers"].values()):
|
||||
@@ -495,7 +528,10 @@ class MusicBrainzMetadataProvider(MetadataProvider):
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
if not musicbrainzngs:
|
||||
log("musicbrainzngs is not installed; skipping MusicBrainz scrape", file=sys.stderr)
|
||||
log(
|
||||
"musicbrainzngs is not installed; skipping MusicBrainz scrape",
|
||||
file=sys.stderr
|
||||
)
|
||||
return []
|
||||
|
||||
q = (query or "").strip()
|
||||
@@ -526,12 +562,15 @@ class MusicBrainzMetadataProvider(MetadataProvider):
|
||||
if isinstance(artist_credit, list) and artist_credit:
|
||||
first = artist_credit[0]
|
||||
if isinstance(first, dict):
|
||||
artist = first.get("name") or first.get("artist", {}).get("name", "")
|
||||
artist = first.get("name") or first.get("artist",
|
||||
{}).get("name",
|
||||
"")
|
||||
elif isinstance(first, str):
|
||||
artist = first
|
||||
|
||||
album = ""
|
||||
release_list = rec.get("release-list") or rec.get("releases") or rec.get("release")
|
||||
release_list = rec.get("release-list") or rec.get("releases"
|
||||
) or rec.get("release")
|
||||
if isinstance(release_list, list) and release_list:
|
||||
first_rel = release_list[0]
|
||||
if isinstance(first_rel, dict):
|
||||
@@ -634,7 +673,8 @@ class YtdlpMetadataProvider(MetadataProvider):
|
||||
|
||||
upload_date = str(info.get("upload_date") or "")
|
||||
release_date = str(info.get("release_date") or "")
|
||||
year = (release_date or upload_date)[:4] if (release_date or upload_date) else ""
|
||||
year = (release_date
|
||||
or upload_date)[:4] if (release_date or upload_date) else ""
|
||||
|
||||
# Provide basic columns for the standard metadata selection table.
|
||||
# NOTE: This is best-effort; many extractors don't provide artist/album.
|
||||
@@ -716,15 +756,16 @@ class YtdlpMetadataProvider(MetadataProvider):
|
||||
|
||||
# Registry ---------------------------------------------------------------
|
||||
|
||||
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
|
||||
"itunes": ITunesProvider,
|
||||
"openlibrary": OpenLibraryMetadataProvider,
|
||||
"googlebooks": GoogleBooksMetadataProvider,
|
||||
"google": GoogleBooksMetadataProvider,
|
||||
"isbnsearch": ISBNsearchMetadataProvider,
|
||||
"musicbrainz": MusicBrainzMetadataProvider,
|
||||
"ytdlp": YtdlpMetadataProvider,
|
||||
}
|
||||
_METADATA_PROVIDERS: Dict[str,
|
||||
Type[MetadataProvider]] = {
|
||||
"itunes": ITunesProvider,
|
||||
"openlibrary": OpenLibraryMetadataProvider,
|
||||
"googlebooks": GoogleBooksMetadataProvider,
|
||||
"google": GoogleBooksMetadataProvider,
|
||||
"isbnsearch": ISBNsearchMetadataProvider,
|
||||
"musicbrainz": MusicBrainzMetadataProvider,
|
||||
"ytdlp": YtdlpMetadataProvider,
|
||||
}
|
||||
|
||||
|
||||
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
|
||||
@@ -732,7 +773,8 @@ def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
|
||||
|
||||
|
||||
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
availability: Dict[str, bool] = {}
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, cls in _METADATA_PROVIDERS.items():
|
||||
try:
|
||||
_ = cls(config)
|
||||
@@ -743,9 +785,10 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
|
||||
return availability
|
||||
|
||||
|
||||
def get_metadata_provider(
|
||||
name: str, config: Optional[Dict[str, Any]] = None
|
||||
) -> Optional[MetadataProvider]:
|
||||
def get_metadata_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None
|
||||
) -> Optional[MetadataProvider]:
|
||||
cls = _METADATA_PROVIDERS.get(name.lower())
|
||||
if not cls:
|
||||
return None
|
||||
|
||||
@@ -19,7 +19,7 @@ import requests
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from ProviderCore.download import download_file, sanitize_filename
|
||||
from cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.logger import debug, log
|
||||
from SYS.utils import unique_path
|
||||
|
||||
@@ -52,7 +52,9 @@ def _image_paths_to_pdf_bytes(images: List[str]) -> Optional[bytes]:
|
||||
continue
|
||||
with Image.open(img_path) as im: # type: ignore[attr-defined]
|
||||
# Ensure PDF-compatible mode.
|
||||
if im.mode in {"RGBA", "LA", "P"}:
|
||||
if im.mode in {"RGBA",
|
||||
"LA",
|
||||
"P"}:
|
||||
im = im.convert("RGB")
|
||||
else:
|
||||
im = im.convert("RGB")
|
||||
@@ -125,7 +127,8 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
|
||||
def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
|
||||
"""Return (lendable, status_text) using OpenLibrary volumes API."""
|
||||
try:
|
||||
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"):
|
||||
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith(
|
||||
"M"):
|
||||
return False, "not-an-edition"
|
||||
|
||||
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
|
||||
@@ -155,7 +158,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
|
||||
|
||||
|
||||
def _resolve_archive_id(
|
||||
session: requests.Session, edition_id: str, ia_candidates: List[str]
|
||||
session: requests.Session,
|
||||
edition_id: str,
|
||||
ia_candidates: List[str]
|
||||
) -> str:
|
||||
# Prefer IA identifiers already present in search results.
|
||||
if ia_candidates:
|
||||
@@ -165,7 +170,10 @@ def _resolve_archive_id(
|
||||
|
||||
# Otherwise query the edition JSON.
|
||||
try:
|
||||
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
|
||||
resp = session.get(
|
||||
f"https://openlibrary.org/books/{edition_id}.json",
|
||||
timeout=6
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() or {}
|
||||
|
||||
@@ -206,13 +214,19 @@ def _archive_id_from_url(url: str) -> str:
|
||||
# - /details/<id>/...
|
||||
# - /borrow/<id>
|
||||
# - /download/<id>/...
|
||||
if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
|
||||
if len(parts) >= 2 and parts[0].lower() in {"details",
|
||||
"borrow",
|
||||
"download",
|
||||
"stream"}:
|
||||
return str(parts[1]).strip()
|
||||
|
||||
# Sometimes the identifier is the first segment.
|
||||
if len(parts) >= 1:
|
||||
first = str(parts[0]).strip()
|
||||
if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
|
||||
if first and first.lower() not in {"account",
|
||||
"services",
|
||||
"search",
|
||||
"advancedsearch.php"}:
|
||||
return first
|
||||
|
||||
return ""
|
||||
@@ -249,14 +263,17 @@ def _coerce_archive_field_list(value: Any) -> List[str]:
|
||||
return [s] if s else []
|
||||
|
||||
|
||||
def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any]) -> List[str]:
|
||||
def _archive_item_metadata_to_tags(archive_id: str,
|
||||
item_metadata: Dict[str,
|
||||
Any]) -> List[str]:
|
||||
"""Map Archive.org metadata JSON (the `metadata` object) to tag strings.
|
||||
|
||||
This is intentionally best-effort and conservative: it focuses on stable,
|
||||
useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
|
||||
"""
|
||||
archive_id_clean = str(archive_id or "").strip()
|
||||
meta = item_metadata if isinstance(item_metadata, dict) else {}
|
||||
meta = item_metadata if isinstance(item_metadata,
|
||||
dict) else {}
|
||||
|
||||
tags: List[str] = []
|
||||
seen: set[str] = set()
|
||||
@@ -374,7 +391,10 @@ def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any
|
||||
return tags
|
||||
|
||||
|
||||
def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[str, Any]:
|
||||
def _fetch_archive_item_metadata(archive_id: str,
|
||||
*,
|
||||
timeout: int = 8) -> Dict[str,
|
||||
Any]:
|
||||
ident = str(archive_id or "").strip()
|
||||
if not ident:
|
||||
return {}
|
||||
@@ -384,7 +404,8 @@ def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[s
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
meta = data.get("metadata")
|
||||
return meta if isinstance(meta, dict) else {}
|
||||
return meta if isinstance(meta,
|
||||
dict) else {}
|
||||
|
||||
|
||||
class OpenLibrary(Provider):
|
||||
@@ -404,7 +425,9 @@ class OpenLibrary(Provider):
|
||||
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||
|
||||
@staticmethod
|
||||
def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
||||
def _credential_archive(config: Dict[str,
|
||||
Any]) -> Tuple[Optional[str],
|
||||
Optional[str]]:
|
||||
"""Get Archive.org email/password from config.
|
||||
|
||||
Supports:
|
||||
@@ -415,9 +438,11 @@ class OpenLibrary(Provider):
|
||||
if not isinstance(config, dict):
|
||||
return None, None
|
||||
|
||||
provider_config = config.get("provider", {})
|
||||
provider_config = config.get("provider",
|
||||
{})
|
||||
if isinstance(provider_config, dict):
|
||||
openlibrary_config = provider_config.get("openlibrary", {})
|
||||
openlibrary_config = provider_config.get("openlibrary",
|
||||
{})
|
||||
if isinstance(openlibrary_config, dict):
|
||||
email = openlibrary_config.get("email")
|
||||
password = openlibrary_config.get("password")
|
||||
@@ -456,7 +481,10 @@ class OpenLibrary(Provider):
|
||||
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
|
||||
session = requests.Session()
|
||||
|
||||
token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
|
||||
token_resp = session.get(
|
||||
"https://archive.org/services/account/login/",
|
||||
timeout=30
|
||||
)
|
||||
try:
|
||||
token_json = token_resp.json()
|
||||
except Exception as exc:
|
||||
@@ -473,8 +501,14 @@ class OpenLibrary(Provider):
|
||||
if not token:
|
||||
raise RuntimeError("Archive login token missing")
|
||||
|
||||
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
||||
payload = {"username": email, "password": password, "t": token}
|
||||
headers = {
|
||||
"Content-Type": "application/x-www-form-urlencoded"
|
||||
}
|
||||
payload = {
|
||||
"username": email,
|
||||
"password": password,
|
||||
"t": token
|
||||
}
|
||||
|
||||
login_resp = session.post(
|
||||
"https://archive.org/services/account/login/",
|
||||
@@ -499,22 +533,34 @@ class OpenLibrary(Provider):
|
||||
|
||||
@classmethod
|
||||
def _archive_loan(
|
||||
cls, session: requests.Session, book_id: str, *, verbose: bool = True
|
||||
cls,
|
||||
session: requests.Session,
|
||||
book_id: str,
|
||||
*,
|
||||
verbose: bool = True
|
||||
) -> requests.Session:
|
||||
data = {"action": "grant_access", "identifier": book_id}
|
||||
data = {
|
||||
"action": "grant_access",
|
||||
"identifier": book_id
|
||||
}
|
||||
session.post(
|
||||
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
|
||||
"https://archive.org/services/loans/loan/searchInside.php",
|
||||
data=data,
|
||||
timeout=30
|
||||
)
|
||||
data["action"] = "browse_book"
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
response = session.post(
|
||||
"https://archive.org/services/loans/loan/",
|
||||
data=data,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 400:
|
||||
try:
|
||||
err = (response.json() or {}).get("error")
|
||||
if (
|
||||
err
|
||||
== "This book is not available to borrow at this time. Please try again later."
|
||||
):
|
||||
if (err ==
|
||||
"This book is not available to borrow at this time. Please try again later."
|
||||
):
|
||||
raise cls.BookNotAvailableError("Book is waitlisted or in use")
|
||||
raise RuntimeError(f"Borrow failed: {err or response.text}")
|
||||
except cls.BookNotAvailableError:
|
||||
@@ -523,15 +569,26 @@ class OpenLibrary(Provider):
|
||||
raise RuntimeError("The book cannot be borrowed")
|
||||
|
||||
data["action"] = "create_token"
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
response = session.post(
|
||||
"https://archive.org/services/loans/loan/",
|
||||
data=data,
|
||||
timeout=30
|
||||
)
|
||||
if "token" in (response.text or ""):
|
||||
return session
|
||||
raise RuntimeError("Something went wrong when trying to borrow the book")
|
||||
|
||||
@staticmethod
|
||||
def _archive_return_loan(session: requests.Session, book_id: str) -> None:
|
||||
data = {"action": "return_loan", "identifier": book_id}
|
||||
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
|
||||
data = {
|
||||
"action": "return_loan",
|
||||
"identifier": book_id
|
||||
}
|
||||
response = session.post(
|
||||
"https://archive.org/services/loans/loan/",
|
||||
data=data,
|
||||
timeout=30
|
||||
)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
if (response.json() or {}).get("success"):
|
||||
@@ -551,8 +608,8 @@ class OpenLibrary(Provider):
|
||||
if session is None:
|
||||
return
|
||||
for url in (
|
||||
"https://archive.org/account/logout",
|
||||
"https://archive.org/account/logout.php",
|
||||
"https://archive.org/account/logout",
|
||||
"https://archive.org/account/logout.php",
|
||||
):
|
||||
try:
|
||||
resp = session.get(url, timeout=15, allow_redirects=True)
|
||||
@@ -579,7 +636,9 @@ class OpenLibrary(Provider):
|
||||
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
|
||||
meta = data.get("metadata",
|
||||
{}) if isinstance(data,
|
||||
dict) else {}
|
||||
collection = meta.get("collection") if isinstance(meta, dict) else None
|
||||
|
||||
values: List[str] = []
|
||||
@@ -588,16 +647,20 @@ class OpenLibrary(Provider):
|
||||
elif isinstance(collection, str):
|
||||
values = [collection.strip().lower()]
|
||||
|
||||
if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
|
||||
if any(v in {"inlibrary",
|
||||
"printdisabled",
|
||||
"lendinglibrary"} for v in values):
|
||||
return True, "archive-collection"
|
||||
return False, "archive-not-lendable"
|
||||
except Exception:
|
||||
return False, "archive-metadata-error"
|
||||
|
||||
@staticmethod
|
||||
def _archive_get_book_infos(
|
||||
session: requests.Session, url: str
|
||||
) -> Tuple[str, List[str], Dict[str, Any]]:
|
||||
def _archive_get_book_infos(session: requests.Session,
|
||||
url: str) -> Tuple[str,
|
||||
List[str],
|
||||
Dict[str,
|
||||
Any]]:
|
||||
"""Extract page links from Archive.org book reader."""
|
||||
r = session.get(url, timeout=30).text
|
||||
|
||||
@@ -620,7 +683,8 @@ class OpenLibrary(Provider):
|
||||
|
||||
metadata = data.get("metadata") or {}
|
||||
links: List[str] = []
|
||||
br_data = (data.get("brOptions") or {}).get("data", [])
|
||||
br_data = (data.get("brOptions") or {}).get("data",
|
||||
[])
|
||||
if isinstance(br_data, list):
|
||||
for item in br_data:
|
||||
if isinstance(item, list):
|
||||
@@ -639,7 +703,11 @@ class OpenLibrary(Provider):
|
||||
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
|
||||
|
||||
@staticmethod
|
||||
def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
|
||||
def _archive_deobfuscate_image(
|
||||
image_data: bytes,
|
||||
link: str,
|
||||
obf_header: str
|
||||
) -> bytes:
|
||||
if not AES or not Counter:
|
||||
raise RuntimeError("Crypto library not available")
|
||||
|
||||
@@ -657,11 +725,18 @@ class OpenLibrary(Provider):
|
||||
|
||||
counter_bytes = base64.b64decode(counter_b64)
|
||||
if len(counter_bytes) != 16:
|
||||
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
|
||||
raise ValueError(
|
||||
f"Expected counter to be 16 bytes, got {len(counter_bytes)}"
|
||||
)
|
||||
|
||||
prefix = counter_bytes[:8]
|
||||
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
|
||||
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
|
||||
ctr = Counter.new(
|
||||
64,
|
||||
prefix=prefix,
|
||||
initial_value=initial_value,
|
||||
little_endian=False
|
||||
) # type: ignore
|
||||
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
|
||||
|
||||
decrypted_part = cipher.decrypt(image_data[:1024])
|
||||
@@ -699,7 +774,11 @@ class OpenLibrary(Provider):
|
||||
image = cls._archive_image_name(pages, i, directory)
|
||||
obf_header = response.headers.get("X-Obfuscate")
|
||||
if obf_header:
|
||||
image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
|
||||
image_content = cls._archive_deobfuscate_image(
|
||||
response.content,
|
||||
link,
|
||||
obf_header
|
||||
)
|
||||
else:
|
||||
image_content = response.content
|
||||
|
||||
@@ -715,7 +794,9 @@ class OpenLibrary(Provider):
|
||||
links: List[str],
|
||||
scale: int,
|
||||
book_id: str,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
progress_callback: Optional[Callable[[int,
|
||||
int],
|
||||
None]] = None,
|
||||
) -> List[str]:
|
||||
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
|
||||
pages = len(links_scaled)
|
||||
@@ -748,7 +829,8 @@ class OpenLibrary(Provider):
|
||||
except Exception:
|
||||
pass
|
||||
elif tqdm:
|
||||
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
|
||||
for _ in tqdm(futures.as_completed(tasks),
|
||||
total=len(tasks)): # type: ignore
|
||||
pass
|
||||
else:
|
||||
for _ in futures.as_completed(tasks):
|
||||
@@ -770,11 +852,16 @@ class OpenLibrary(Provider):
|
||||
if not isinstance(file_info, dict):
|
||||
continue
|
||||
filename = str(file_info.get("name", ""))
|
||||
if filename.endswith(".pdf") and file_info.get("source") == "original":
|
||||
if filename.endswith(".pdf") and file_info.get("source"
|
||||
) == "original":
|
||||
pdf_url = (
|
||||
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
|
||||
)
|
||||
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
|
||||
check_response = requests.head(
|
||||
pdf_url,
|
||||
timeout=4,
|
||||
allow_redirects=True
|
||||
)
|
||||
if check_response.status_code == 200:
|
||||
return True, pdf_url
|
||||
return False, ""
|
||||
@@ -927,7 +1014,8 @@ class OpenLibrary(Provider):
|
||||
author_key = None
|
||||
if isinstance(author, dict):
|
||||
if isinstance(author.get("author"), dict):
|
||||
author_key = author.get("author", {}).get("key")
|
||||
author_key = author.get("author",
|
||||
{}).get("key")
|
||||
if not author_key:
|
||||
author_key = author.get("key")
|
||||
|
||||
@@ -937,7 +1025,9 @@ class OpenLibrary(Provider):
|
||||
with HTTPClient(timeout=10) as client:
|
||||
author_resp = client.get(author_url)
|
||||
author_resp.raise_for_status()
|
||||
author_data = json_module.loads(author_resp.content.decode("utf-8"))
|
||||
author_data = json_module.loads(
|
||||
author_resp.content.decode("utf-8")
|
||||
)
|
||||
if isinstance(author_data, dict) and author_data.get("name"):
|
||||
new_tags.append(f"author:{author_data['name']}")
|
||||
continue
|
||||
@@ -1011,7 +1101,8 @@ class OpenLibrary(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
@@ -1032,7 +1123,10 @@ class OpenLibrary(Provider):
|
||||
try:
|
||||
resp = self._session.get(
|
||||
"https://openlibrary.org/search.json",
|
||||
params={"q": q, "limit": int(limit)},
|
||||
params={
|
||||
"q": q,
|
||||
"limit": int(limit)
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
@@ -1048,9 +1142,13 @@ class OpenLibrary(Provider):
|
||||
|
||||
# Availability enrichment can be slow if done sequentially (it may require multiple
|
||||
# network calls per row). Do it concurrently to keep the pipeline responsive.
|
||||
docs = docs[: int(limit)]
|
||||
docs = docs[:int(limit)]
|
||||
|
||||
def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
|
||||
def _compute_availability(doc_dict: Dict[str,
|
||||
Any]) -> Tuple[str,
|
||||
str,
|
||||
str,
|
||||
str]:
|
||||
edition_id_local = _resolve_edition_id(doc_dict)
|
||||
if not edition_id_local:
|
||||
return "no-olid", "", "", ""
|
||||
@@ -1066,7 +1164,9 @@ class OpenLibrary(Provider):
|
||||
|
||||
try:
|
||||
archive_id_local = _resolve_archive_id(
|
||||
session_local, edition_id_local, ia_ids_local
|
||||
session_local,
|
||||
edition_id_local,
|
||||
ia_ids_local
|
||||
)
|
||||
except Exception:
|
||||
archive_id_local = ""
|
||||
@@ -1089,17 +1189,23 @@ class OpenLibrary(Provider):
|
||||
|
||||
return "unavailable", reason_local, archive_id_local, ""
|
||||
|
||||
availability_rows: List[Tuple[str, str, str, str]] = [
|
||||
("unknown", "", "", "") for _ in range(len(docs))
|
||||
]
|
||||
availability_rows: List[Tuple[str,
|
||||
str,
|
||||
str,
|
||||
str]] = [
|
||||
("unknown",
|
||||
"",
|
||||
"",
|
||||
"") for _ in range(len(docs))
|
||||
]
|
||||
if docs:
|
||||
max_workers = min(8, max(1, len(docs)))
|
||||
done = 0
|
||||
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_to_index = {
|
||||
executor.submit(_compute_availability, doc_dict): i
|
||||
for i, doc_dict in enumerate(docs)
|
||||
if isinstance(doc_dict, dict)
|
||||
executor.submit(_compute_availability,
|
||||
doc_dict): i
|
||||
for i, doc_dict in enumerate(docs) if isinstance(doc_dict, dict)
|
||||
}
|
||||
for fut in futures.as_completed(list(future_to_index.keys())):
|
||||
i = future_to_index[fut]
|
||||
@@ -1145,11 +1251,16 @@ class OpenLibrary(Provider):
|
||||
isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")
|
||||
|
||||
columns = [
|
||||
("Title", book_title),
|
||||
("Author", ", ".join(authors_list)),
|
||||
("Year", year),
|
||||
("Avail", ""),
|
||||
("OLID", edition_id),
|
||||
("Title",
|
||||
book_title),
|
||||
("Author",
|
||||
", ".join(authors_list)),
|
||||
("Year",
|
||||
year),
|
||||
("Avail",
|
||||
""),
|
||||
("OLID",
|
||||
edition_id),
|
||||
]
|
||||
|
||||
# Determine availability using the concurrently computed enrichment.
|
||||
@@ -1170,7 +1281,8 @@ class OpenLibrary(Provider):
|
||||
annotations.append(f"isbn_10:{isbn_10}")
|
||||
if ia_ids:
|
||||
annotations.append("archive")
|
||||
if availability in {"download", "borrow"}:
|
||||
if availability in {"download",
|
||||
"borrow"}:
|
||||
annotations.append(availability)
|
||||
|
||||
results.append(
|
||||
@@ -1178,17 +1290,17 @@ class OpenLibrary(Provider):
|
||||
table="openlibrary",
|
||||
title=book_title,
|
||||
path=(
|
||||
f"https://openlibrary.org/books/{edition_id}"
|
||||
if edition_id
|
||||
else (
|
||||
f"https://openlibrary.org/books/{edition_id}" if edition_id else
|
||||
(
|
||||
f"https://openlibrary.org{work_key}"
|
||||
if isinstance(work_key, str) and work_key.startswith("/")
|
||||
else "https://openlibrary.org"
|
||||
if isinstance(work_key,
|
||||
str) and work_key.startswith("/") else
|
||||
"https://openlibrary.org"
|
||||
)
|
||||
),
|
||||
detail=(
|
||||
(f"By: {', '.join(authors_list)}" if authors_list else "")
|
||||
+ (f" ({year})" if year else "")
|
||||
(f"By: {', '.join(authors_list)}" if authors_list else "") +
|
||||
(f" ({year})" if year else "")
|
||||
).strip(),
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
@@ -1216,7 +1328,11 @@ class OpenLibrary(Provider):
|
||||
self,
|
||||
result: SearchResult,
|
||||
output_dir: Path,
|
||||
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
||||
progress_callback: Optional[Callable[[str,
|
||||
int,
|
||||
Optional[int],
|
||||
str],
|
||||
None]] = None,
|
||||
) -> Optional[Path]:
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -1245,7 +1361,10 @@ class OpenLibrary(Provider):
|
||||
archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
|
||||
|
||||
if not archive_id:
|
||||
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
|
||||
log(
|
||||
"[openlibrary] No archive identifier available; cannot download",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
# Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
|
||||
@@ -1290,12 +1409,9 @@ class OpenLibrary(Provider):
|
||||
session=self._session,
|
||||
progress_callback=(
|
||||
(
|
||||
lambda downloaded, total, label: progress_callback(
|
||||
"bytes", downloaded, total, label
|
||||
)
|
||||
)
|
||||
if progress_callback is not None
|
||||
else None
|
||||
lambda downloaded, total, label:
|
||||
progress_callback("bytes", downloaded, total, label)
|
||||
) if progress_callback is not None else None
|
||||
),
|
||||
)
|
||||
if ok:
|
||||
@@ -1307,7 +1423,10 @@ class OpenLibrary(Provider):
|
||||
try:
|
||||
email, password = self._credential_archive(self.config or {})
|
||||
if not email or not password:
|
||||
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
|
||||
log(
|
||||
"[openlibrary] Archive credentials missing; cannot borrow",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
lendable = True
|
||||
@@ -1369,7 +1488,10 @@ class OpenLibrary(Provider):
|
||||
continue
|
||||
|
||||
if not links:
|
||||
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
|
||||
log(
|
||||
f"[openlibrary] Failed to extract pages: {last_exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
@@ -1388,9 +1510,10 @@ class OpenLibrary(Provider):
|
||||
scale=3,
|
||||
book_id=archive_id,
|
||||
progress_callback=(
|
||||
(lambda done, total: progress_callback("pages", done, total, "pages"))
|
||||
if progress_callback is not None
|
||||
else None
|
||||
(
|
||||
lambda done, total:
|
||||
progress_callback("pages", done, total, "pages")
|
||||
) if progress_callback is not None else None
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1436,7 +1559,10 @@ class OpenLibrary(Provider):
|
||||
try:
|
||||
self._archive_return_loan(session, archive_id)
|
||||
except Exception as exc:
|
||||
log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
|
||||
log(
|
||||
f"[openlibrary] Warning: failed to return loan: {exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
try:
|
||||
self._archive_logout(session)
|
||||
except Exception:
|
||||
|
||||
@@ -15,7 +15,6 @@ from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
from models import ProgressBar
|
||||
|
||||
|
||||
_SOULSEEK_NOISE_SUBSTRINGS = (
|
||||
"search reply ticket does not match any search request",
|
||||
"failed to receive transfer ticket on file connection",
|
||||
@@ -82,11 +81,11 @@ def _configure_aioslsk_logging() -> None:
|
||||
aioslsk to ERROR and stop propagation so it doesn't spam the CLI.
|
||||
"""
|
||||
for name in (
|
||||
"aioslsk",
|
||||
"aioslsk.network",
|
||||
"aioslsk.search",
|
||||
"aioslsk.transfer",
|
||||
"aioslsk.transfer.manager",
|
||||
"aioslsk",
|
||||
"aioslsk.network",
|
||||
"aioslsk.search",
|
||||
"aioslsk.transfer",
|
||||
"aioslsk.transfer.manager",
|
||||
):
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.ERROR)
|
||||
@@ -237,7 +236,7 @@ class Soulseek(Provider):
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
try:
|
||||
from config import get_soulseek_username, get_soulseek_password
|
||||
from SYS.config import get_soulseek_username, get_soulseek_password
|
||||
|
||||
user = get_soulseek_username(self.config)
|
||||
pwd = get_soulseek_password(self.config)
|
||||
@@ -257,7 +256,10 @@ class Soulseek(Provider):
|
||||
filename = full_metadata.get("filename") or result.path
|
||||
|
||||
if not username or not filename:
|
||||
log(f"[soulseek] Missing metadata for download: {result.title}", file=sys.stderr)
|
||||
log(
|
||||
f"[soulseek] Missing metadata for download: {result.title}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
# This cmdlet stack is synchronous; use asyncio.run for clarity.
|
||||
@@ -294,9 +296,11 @@ class Soulseek(Provider):
|
||||
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
async def perform_search(
|
||||
self, query: str, timeout: float = 9.0, limit: int = 50
|
||||
) -> List[Dict[str, Any]]:
|
||||
async def perform_search(self,
|
||||
query: str,
|
||||
timeout: float = 9.0,
|
||||
limit: int = 50) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Perform async Soulseek search."""
|
||||
|
||||
from aioslsk.client import SoulSeekClient
|
||||
@@ -305,7 +309,10 @@ class Soulseek(Provider):
|
||||
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
|
||||
|
||||
settings = Settings(
|
||||
credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)
|
||||
credentials=CredentialsSettings(
|
||||
username=self.USERNAME,
|
||||
password=self.PASSWORD
|
||||
)
|
||||
)
|
||||
client = SoulSeekClient(settings)
|
||||
|
||||
@@ -315,7 +322,10 @@ class Soulseek(Provider):
|
||||
await client.start()
|
||||
await client.login()
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr)
|
||||
log(
|
||||
f"[soulseek] Login failed: {type(exc).__name__}: {exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return []
|
||||
|
||||
try:
|
||||
@@ -323,7 +333,10 @@ class Soulseek(Provider):
|
||||
await self._collect_results(search_request, timeout=timeout)
|
||||
return self._flatten_results(search_request)[:limit]
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr)
|
||||
log(
|
||||
f"[soulseek] Search error: {type(exc).__name__}: {exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return []
|
||||
finally:
|
||||
# Best-effort: try to cancel/close the search request before stopping
|
||||
@@ -356,8 +369,12 @@ class Soulseek(Provider):
|
||||
{
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
"filename": getattr(file_data,
|
||||
"filename",
|
||||
"?"),
|
||||
"size": getattr(file_data,
|
||||
"filesize",
|
||||
0),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -366,14 +383,22 @@ class Soulseek(Provider):
|
||||
{
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0),
|
||||
"filename": getattr(file_data,
|
||||
"filename",
|
||||
"?"),
|
||||
"size": getattr(file_data,
|
||||
"filesize",
|
||||
0),
|
||||
}
|
||||
)
|
||||
|
||||
return flat
|
||||
|
||||
async def _collect_results(self, search_request: Any, timeout: float = 75.0) -> None:
|
||||
async def _collect_results(
|
||||
self,
|
||||
search_request: Any,
|
||||
timeout: float = 75.0
|
||||
) -> None:
|
||||
end = time.time() + timeout
|
||||
last_count = 0
|
||||
while time.time() < end:
|
||||
@@ -387,20 +412,28 @@ class Soulseek(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit))
|
||||
flat_results = asyncio.run(
|
||||
self.perform_search(query,
|
||||
timeout=9.0,
|
||||
limit=limit)
|
||||
)
|
||||
if not flat_results:
|
||||
return []
|
||||
|
||||
music_results: List[dict] = []
|
||||
for item in flat_results:
|
||||
filename = item["filename"]
|
||||
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
|
||||
ext = (
|
||||
"." + filename.rsplit(".",
|
||||
1)[-1].lower()
|
||||
) if "." in filename else ""
|
||||
if ext in self.MUSIC_EXTENSIONS:
|
||||
music_results.append(item)
|
||||
|
||||
@@ -410,18 +443,23 @@ class Soulseek(Provider):
|
||||
enriched_results: List[dict] = []
|
||||
for item in music_results:
|
||||
filename = item["filename"]
|
||||
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
|
||||
ext = (
|
||||
"." + filename.rsplit(".",
|
||||
1)[-1].lower()
|
||||
) if "." in filename else ""
|
||||
|
||||
display_name = filename.replace("\\", "/").split("/")[-1]
|
||||
path_parts = filename.replace("\\", "/").split("/")
|
||||
artist = path_parts[-3] if len(path_parts) >= 3 else ""
|
||||
album = (
|
||||
path_parts[-2]
|
||||
if len(path_parts) >= 3
|
||||
else (path_parts[-2] if len(path_parts) == 2 else "")
|
||||
path_parts[-2] if len(path_parts) >= 3 else
|
||||
(path_parts[-2] if len(path_parts) == 2 else "")
|
||||
)
|
||||
|
||||
base_name = display_name.rsplit(".", 1)[0] if "." in display_name else display_name
|
||||
base_name = display_name.rsplit(
|
||||
".",
|
||||
1
|
||||
)[0] if "." in display_name else display_name
|
||||
track_num = ""
|
||||
title = base_name
|
||||
filename_artist = ""
|
||||
@@ -457,7 +495,8 @@ class Soulseek(Provider):
|
||||
if artist_filter or album_filter or track_filter:
|
||||
filtered: List[dict] = []
|
||||
for item in enriched_results:
|
||||
if artist_filter and artist_filter not in item["artist"].lower():
|
||||
if artist_filter and artist_filter not in item["artist"].lower(
|
||||
):
|
||||
continue
|
||||
if album_filter and album_filter not in item["album"].lower():
|
||||
continue
|
||||
@@ -466,7 +505,9 @@ class Soulseek(Provider):
|
||||
filtered.append(item)
|
||||
enriched_results = filtered
|
||||
|
||||
enriched_results.sort(key=lambda item: (item["ext"].lower() != ".flac", -item["size"]))
|
||||
enriched_results.sort(
|
||||
key=lambda item: (item["ext"].lower() != ".flac", -item["size"])
|
||||
)
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for item in enriched_results:
|
||||
@@ -475,11 +516,16 @@ class Soulseek(Provider):
|
||||
size_mb = int(item["size"] / 1024 / 1024)
|
||||
|
||||
columns = [
|
||||
("Track", item["track_num"] or "?"),
|
||||
("Title", item["title"][:40]),
|
||||
("Artist", artist_display[:32]),
|
||||
("Album", album_display[:32]),
|
||||
("Size", f"{size_mb} MB"),
|
||||
("Track",
|
||||
item["track_num"] or "?"),
|
||||
("Title",
|
||||
item["title"][:40]),
|
||||
("Artist",
|
||||
artist_display[:32]),
|
||||
("Album",
|
||||
album_display[:32]),
|
||||
("Size",
|
||||
f"{size_mb} MB"),
|
||||
]
|
||||
|
||||
results.append(
|
||||
@@ -488,7 +534,8 @@ class Soulseek(Provider):
|
||||
title=item["title"],
|
||||
path=item["filename"],
|
||||
detail=f"{artist_display} - {album_display}",
|
||||
annotations=[f"{size_mb} MB", item["ext"].lstrip(".").upper()],
|
||||
annotations=[f"{size_mb} MB",
|
||||
item["ext"].lstrip(".").upper()],
|
||||
media_kind="audio",
|
||||
size_bytes=item["size"],
|
||||
columns=columns,
|
||||
@@ -515,7 +562,7 @@ class Soulseek(Provider):
|
||||
|
||||
# Require configured credentials.
|
||||
try:
|
||||
from config import get_soulseek_username, get_soulseek_password
|
||||
from SYS.config import get_soulseek_username, get_soulseek_password
|
||||
|
||||
user = get_soulseek_username(self.config)
|
||||
pwd = get_soulseek_password(self.config)
|
||||
@@ -570,10 +617,16 @@ async def download_soulseek_file(
|
||||
)
|
||||
|
||||
settings = Settings(
|
||||
credentials=CredentialsSettings(username=login_user, password=login_pass)
|
||||
credentials=CredentialsSettings(username=login_user,
|
||||
password=login_pass)
|
||||
)
|
||||
|
||||
async def _attempt_once(attempt_num: int) -> tuple[Optional[Path], Any, int, float]:
|
||||
async def _attempt_once(
|
||||
attempt_num: int
|
||||
) -> tuple[Optional[Path],
|
||||
Any,
|
||||
int,
|
||||
float]:
|
||||
client = SoulSeekClient(settings)
|
||||
with _suppress_aioslsk_noise():
|
||||
async with _suppress_aioslsk_asyncio_task_noise():
|
||||
@@ -586,10 +639,14 @@ async def download_soulseek_file(
|
||||
f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
debug(f"[soulseek] Requesting download from {username}: {filename}")
|
||||
debug(
|
||||
f"[soulseek] Requesting download from {username}: {filename}"
|
||||
)
|
||||
|
||||
transfer = await client.transfers.add(
|
||||
Transfer(username, filename, TransferDirection.DOWNLOAD)
|
||||
Transfer(username,
|
||||
filename,
|
||||
TransferDirection.DOWNLOAD)
|
||||
)
|
||||
transfer.local_path = str(output_path)
|
||||
await client.transfers.queue(transfer)
|
||||
@@ -602,14 +659,29 @@ async def download_soulseek_file(
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout:
|
||||
log(
|
||||
f"[soulseek] Download timeout after {timeout}s", file=sys.stderr
|
||||
f"[soulseek] Download timeout after {timeout}s",
|
||||
file=sys.stderr
|
||||
)
|
||||
bytes_done = int(
|
||||
getattr(transfer,
|
||||
"bytes_transfered",
|
||||
0) or 0
|
||||
)
|
||||
state_val = getattr(
|
||||
getattr(transfer,
|
||||
"state",
|
||||
None),
|
||||
"VALUE",
|
||||
None
|
||||
)
|
||||
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
|
||||
state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
|
||||
progress_bar.finish()
|
||||
return None, state_val, bytes_done, elapsed
|
||||
|
||||
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
|
||||
bytes_done = int(
|
||||
getattr(transfer,
|
||||
"bytes_transfered",
|
||||
0) or 0
|
||||
)
|
||||
total_bytes = int(getattr(transfer, "filesize", 0) or 0)
|
||||
now = time.time()
|
||||
if now - last_progress_time >= 0.5:
|
||||
@@ -623,11 +695,18 @@ async def download_soulseek_file(
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
final_state = getattr(getattr(transfer, "state", None), "VALUE", None)
|
||||
final_state = getattr(
|
||||
getattr(transfer,
|
||||
"state",
|
||||
None),
|
||||
"VALUE",
|
||||
None
|
||||
)
|
||||
downloaded_path = (
|
||||
Path(transfer.local_path)
|
||||
if getattr(transfer, "local_path", None)
|
||||
else output_path
|
||||
if getattr(transfer,
|
||||
"local_path",
|
||||
None) else output_path
|
||||
)
|
||||
final_elapsed = time.time() - start_time
|
||||
|
||||
@@ -636,7 +715,8 @@ async def download_soulseek_file(
|
||||
|
||||
# If a file was written, treat it as success even if state is odd.
|
||||
try:
|
||||
if downloaded_path.exists() and downloaded_path.stat().st_size > 0:
|
||||
if downloaded_path.exists() and downloaded_path.stat(
|
||||
).st_size > 0:
|
||||
if final_state != TransferState.COMPLETE:
|
||||
log(
|
||||
f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
|
||||
@@ -651,7 +731,8 @@ async def download_soulseek_file(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if final_state == TransferState.COMPLETE and downloaded_path.exists():
|
||||
if final_state == TransferState.COMPLETE and downloaded_path.exists(
|
||||
):
|
||||
debug(f"[soulseek] Download complete: {downloaded_path}")
|
||||
return (
|
||||
downloaded_path,
|
||||
@@ -670,7 +751,8 @@ async def download_soulseek_file(
|
||||
|
||||
# Clean up 0-byte placeholder.
|
||||
try:
|
||||
if downloaded_path.exists() and downloaded_path.stat().st_size == 0:
|
||||
if downloaded_path.exists() and downloaded_path.stat(
|
||||
).st_size == 0:
|
||||
downloaded_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -696,7 +778,8 @@ async def download_soulseek_file(
|
||||
should_retry = (bytes_done == 0) and (elapsed < 15.0)
|
||||
if attempt < max_attempts and should_retry:
|
||||
log(
|
||||
f"[soulseek] Retrying after fast failure (state={final_state})", file=sys.stderr
|
||||
f"[soulseek] Retrying after fast failure (state={final_state})",
|
||||
file=sys.stderr
|
||||
)
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
@@ -704,7 +787,10 @@ async def download_soulseek_file(
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
log("[soulseek] aioslsk not installed. Install with: pip install aioslsk", file=sys.stderr)
|
||||
log(
|
||||
"[soulseek] aioslsk not installed. Install with: pip install aioslsk",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr)
|
||||
|
||||
@@ -12,7 +12,6 @@ from urllib.parse import urlparse
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
|
||||
|
||||
_TELEGRAM_DEFAULT_TIMESTAMP_STEM_RE = re.compile(
|
||||
r"^(?P<prefix>photo|video|document|audio|voice|animation)_(?P<date>\d{4}-\d{2}-\d{2})_(?P<time>\d{2}-\d{2}-\d{2})(?: \(\d+\))?$",
|
||||
flags=re.IGNORECASE,
|
||||
@@ -39,7 +38,10 @@ def _unique_path(path: Path) -> Path:
|
||||
return parent / f"{stem} (copy){suffix}"
|
||||
|
||||
|
||||
def _maybe_strip_telegram_timestamped_default_filename(*, downloaded_path: Path) -> Path:
|
||||
def _maybe_strip_telegram_timestamped_default_filename(
|
||||
*,
|
||||
downloaded_path: Path
|
||||
) -> Path:
|
||||
"""Normalize Telethon's default timestamped names.
|
||||
|
||||
Examples:
|
||||
@@ -87,7 +89,8 @@ def _looks_like_telegram_message_url(url: str) -> bool:
|
||||
except Exception:
|
||||
return False
|
||||
host = (parsed.hostname or "").lower().strip()
|
||||
if host in {"t.me", "telegram.me"}:
|
||||
if host in {"t.me",
|
||||
"telegram.me"}:
|
||||
return True
|
||||
if host.endswith(".t.me"):
|
||||
return True
|
||||
@@ -147,9 +150,10 @@ class Telegram(Provider):
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
telegram_conf = (
|
||||
self.config.get("provider", {}).get("telegram", {})
|
||||
if isinstance(self.config, dict)
|
||||
else {}
|
||||
self.config.get("provider",
|
||||
{}).get("telegram",
|
||||
{}) if isinstance(self.config,
|
||||
dict) else {}
|
||||
)
|
||||
self._app_id = telegram_conf.get("app_id")
|
||||
self._api_hash = telegram_conf.get("api_hash")
|
||||
@@ -178,8 +182,10 @@ class Telegram(Provider):
|
||||
If an event loop is already running in this thread (common in REPL/TUI),
|
||||
runs the coroutine in a worker thread with its own loop.
|
||||
"""
|
||||
result: Dict[str, Any] = {}
|
||||
err: Dict[str, Any] = {}
|
||||
result: Dict[str,
|
||||
Any] = {}
|
||||
err: Dict[str,
|
||||
Any] = {}
|
||||
|
||||
def _runner() -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
@@ -201,7 +207,10 @@ class Telegram(Provider):
|
||||
except Exception:
|
||||
pass
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
loop.run_until_complete(
|
||||
asyncio.gather(*pending,
|
||||
return_exceptions=True)
|
||||
)
|
||||
try:
|
||||
loop.run_until_complete(loop.shutdown_asyncgens())
|
||||
except Exception:
|
||||
@@ -261,7 +270,10 @@ class Telegram(Provider):
|
||||
if not legacy_session.is_file():
|
||||
return
|
||||
|
||||
for suffix in (".session", ".session-journal", ".session-wal", ".session-shm"):
|
||||
for suffix in (".session",
|
||||
".session-journal",
|
||||
".session-wal",
|
||||
".session-shm"):
|
||||
src = Path(str(legacy_base) + suffix)
|
||||
dst = Path(str(new_base) + suffix)
|
||||
try:
|
||||
@@ -355,7 +367,10 @@ class Telegram(Provider):
|
||||
except EOFError:
|
||||
choice = ""
|
||||
|
||||
use_bot = choice in {"2", "b", "bot", "token"}
|
||||
use_bot = choice in {"2",
|
||||
"b",
|
||||
"bot",
|
||||
"token"}
|
||||
bot_token = ""
|
||||
if use_bot:
|
||||
sys.stderr.write("[telegram] Bot token: ")
|
||||
@@ -544,7 +559,9 @@ class Telegram(Provider):
|
||||
app_id = int(self._app_id) if self._app_id not in (None, "") else None
|
||||
except Exception:
|
||||
app_id = None
|
||||
api_hash = str(self._api_hash).strip() if self._api_hash not in (None, "") else ""
|
||||
api_hash = str(self._api_hash
|
||||
).strip() if self._api_hash not in (None,
|
||||
"") else ""
|
||||
if not bool(app_id and api_hash):
|
||||
return False
|
||||
|
||||
@@ -565,10 +582,13 @@ class Telegram(Provider):
|
||||
bot_token = str(self._bot_token or "").strip()
|
||||
if bot_token:
|
||||
return bool(
|
||||
self._ensure_session_with_bot_token(bot_token) and self._session_is_authorized()
|
||||
self._ensure_session_with_bot_token(bot_token)
|
||||
and self._session_is_authorized()
|
||||
)
|
||||
if prompt:
|
||||
return bool(self._ensure_session_interactive() and self._session_is_authorized())
|
||||
return bool(
|
||||
self._ensure_session_interactive() and self._session_is_authorized()
|
||||
)
|
||||
return False
|
||||
|
||||
def list_chats(self, *, limit: int = 200) -> list[Dict[str, Any]]:
|
||||
@@ -652,12 +672,20 @@ class Telegram(Provider):
|
||||
kind = "user"
|
||||
else:
|
||||
kind = (
|
||||
type(entity).__name__.lower() if entity is not None else "unknown"
|
||||
type(entity).__name__.lower()
|
||||
if entity is not None else "unknown"
|
||||
)
|
||||
except Exception:
|
||||
kind = "unknown"
|
||||
|
||||
rows.append({"id": chat_id, "title": title, "username": username, "type": kind})
|
||||
rows.append(
|
||||
{
|
||||
"id": chat_id,
|
||||
"title": title,
|
||||
"username": username,
|
||||
"type": kind
|
||||
}
|
||||
)
|
||||
return rows
|
||||
finally:
|
||||
try:
|
||||
@@ -672,7 +700,9 @@ class Telegram(Provider):
|
||||
|
||||
# Sort for stable display.
|
||||
try:
|
||||
rows.sort(key=lambda r: (str(r.get("type") or ""), str(r.get("title") or "")))
|
||||
rows.sort(
|
||||
key=lambda r: (str(r.get("type") or ""), str(r.get("title") or ""))
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return rows
|
||||
@@ -682,7 +712,8 @@ class Telegram(Provider):
|
||||
*,
|
||||
chat_ids: Sequence[int],
|
||||
usernames: Sequence[str],
|
||||
files: Optional[Sequence[Dict[str, Any]]] = None,
|
||||
files: Optional[Sequence[Dict[str,
|
||||
Any]]] = None,
|
||||
file_paths: Optional[Sequence[str]] = None,
|
||||
) -> None:
|
||||
"""Send local file(s) to one or more chats.
|
||||
@@ -713,7 +744,10 @@ class Telegram(Provider):
|
||||
|
||||
# Back-compat: allow callers to pass `file_paths=`.
|
||||
if files is None:
|
||||
files = [{"path": str(p), "title": ""} for p in (file_paths or [])]
|
||||
files = [{
|
||||
"path": str(p),
|
||||
"title": ""
|
||||
} for p in (file_paths or [])]
|
||||
|
||||
def _sanitize_filename(text: str) -> str:
|
||||
# Windows-safe plus generally safe for Telegram.
|
||||
@@ -754,7 +788,10 @@ class Telegram(Provider):
|
||||
title_text = str((f or {}).get("title") or "").strip()
|
||||
except Exception:
|
||||
title_text = ""
|
||||
jobs.append({"path": str(path_obj), "title": title_text})
|
||||
jobs.append({
|
||||
"path": str(path_obj),
|
||||
"title": title_text
|
||||
})
|
||||
|
||||
if not jobs:
|
||||
raise Exception("No files to send")
|
||||
@@ -781,7 +818,9 @@ class Telegram(Provider):
|
||||
try:
|
||||
await client.connect()
|
||||
if not bool(await client.is_user_authorized()):
|
||||
raise Exception("Telegram session is not authorized. Run: .telegram -login")
|
||||
raise Exception(
|
||||
"Telegram session is not authorized. Run: .telegram -login"
|
||||
)
|
||||
|
||||
# Resolve entities: prefer IDs. Only fall back to usernames when IDs are absent.
|
||||
entities: list[Any] = []
|
||||
@@ -826,8 +865,7 @@ class Telegram(Provider):
|
||||
fallback = path_obj.stem
|
||||
base = (
|
||||
_sanitize_filename(title_raw)
|
||||
if title_raw
|
||||
else _sanitize_filename(fallback)
|
||||
if title_raw else _sanitize_filename(fallback)
|
||||
)
|
||||
ext = path_obj.suffix
|
||||
send_name = f"{base}{ext}" if ext else base
|
||||
@@ -838,7 +876,11 @@ class Telegram(Provider):
|
||||
if print_progress is None:
|
||||
return
|
||||
try:
|
||||
print_progress(send_name, int(sent or 0), int(total or 0))
|
||||
print_progress(
|
||||
send_name,
|
||||
int(sent or 0),
|
||||
int(total or 0)
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
@@ -860,7 +902,11 @@ class Telegram(Provider):
|
||||
finally:
|
||||
if print_final_progress is not None:
|
||||
try:
|
||||
print_final_progress(send_name, int(file_size or 0), 0.0)
|
||||
print_final_progress(
|
||||
send_name,
|
||||
int(file_size or 0),
|
||||
0.0
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
@@ -900,9 +946,12 @@ class Telegram(Provider):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
def _download_message_media_sync(
|
||||
self, *, url: str, output_dir: Path
|
||||
) -> Tuple[Path, Dict[str, Any]]:
|
||||
def _download_message_media_sync(self,
|
||||
*,
|
||||
url: str,
|
||||
output_dir: Path) -> Tuple[Path,
|
||||
Dict[str,
|
||||
Any]]:
|
||||
# Ensure we have an authorized session before attempting API calls.
|
||||
# Never prompt during downloads.
|
||||
if not self.ensure_session(prompt=False):
|
||||
@@ -928,14 +977,17 @@ class Telegram(Provider):
|
||||
try:
|
||||
await client.connect()
|
||||
if not bool(await client.is_user_authorized()):
|
||||
raise Exception("Telegram session is not authorized. Run: .telegram -login")
|
||||
raise Exception(
|
||||
"Telegram session is not authorized. Run: .telegram -login"
|
||||
)
|
||||
|
||||
if chat.startswith("c:"):
|
||||
channel_id = int(chat.split(":", 1)[1])
|
||||
entity = PeerChannel(channel_id)
|
||||
else:
|
||||
entity = chat
|
||||
if isinstance(entity, str) and entity and not entity.startswith("@"):
|
||||
if isinstance(entity,
|
||||
str) and entity and not entity.startswith("@"):
|
||||
entity = "@" + entity
|
||||
|
||||
messages = await client.get_messages(entity, ids=[message_id])
|
||||
@@ -1009,7 +1061,9 @@ class Telegram(Provider):
|
||||
from models import ProgressBar
|
||||
|
||||
progress_bar = ProgressBar()
|
||||
last_print = {"t": 0.0}
|
||||
last_print = {
|
||||
"t": 0.0
|
||||
}
|
||||
|
||||
def _progress(current: int, total: int) -> None:
|
||||
now = time.monotonic()
|
||||
@@ -1017,7 +1071,10 @@ class Telegram(Provider):
|
||||
return
|
||||
last_print["t"] = now
|
||||
progress_bar.update(
|
||||
downloaded=int(current), total=int(total), label="telegram", file=sys.stderr
|
||||
downloaded=int(current),
|
||||
total=int(total),
|
||||
label="telegram",
|
||||
file=sys.stderr
|
||||
)
|
||||
|
||||
part_kb = self._resolve_part_size_kb(file_size)
|
||||
@@ -1030,7 +1087,9 @@ class Telegram(Provider):
|
||||
)
|
||||
except TypeError:
|
||||
downloaded = await client.download_media(
|
||||
message, file=str(output_dir), progress_callback=_progress
|
||||
message,
|
||||
file=str(output_dir),
|
||||
progress_callback=_progress
|
||||
)
|
||||
progress_bar.finish()
|
||||
if not downloaded:
|
||||
@@ -1051,27 +1110,28 @@ class Telegram(Provider):
|
||||
except Exception:
|
||||
date_iso = None
|
||||
|
||||
info: Dict[str, Any] = {
|
||||
"provider": "telegram",
|
||||
"source_url": url,
|
||||
"chat": {
|
||||
"key": chat,
|
||||
"title": chat_title,
|
||||
"username": chat_username,
|
||||
"id": chat_id,
|
||||
},
|
||||
"message": {
|
||||
"id": msg_id,
|
||||
"date": date_iso,
|
||||
"caption": caption,
|
||||
},
|
||||
"file": {
|
||||
"name": file_name,
|
||||
"mime_type": file_mime,
|
||||
"size": file_size,
|
||||
"downloaded_path": str(downloaded_path),
|
||||
},
|
||||
}
|
||||
info: Dict[str,
|
||||
Any] = {
|
||||
"provider": "telegram",
|
||||
"source_url": url,
|
||||
"chat": {
|
||||
"key": chat,
|
||||
"title": chat_title,
|
||||
"username": chat_username,
|
||||
"id": chat_id,
|
||||
},
|
||||
"message": {
|
||||
"id": msg_id,
|
||||
"date": date_iso,
|
||||
"caption": caption,
|
||||
},
|
||||
"file": {
|
||||
"name": file_name,
|
||||
"mime_type": file_mime,
|
||||
"size": file_size,
|
||||
"downloaded_path": str(downloaded_path),
|
||||
},
|
||||
}
|
||||
return downloaded_path, info
|
||||
except errors.RPCError as exc:
|
||||
raise Exception(f"Telegram RPC error: {exc}")
|
||||
|
||||
@@ -14,14 +14,20 @@ class YouTube(Provider):
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
# Use the yt_dlp Python module (installed via requirements.txt).
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
|
||||
ydl_opts: Dict[str, Any] = {"quiet": True, "skip_download": True, "extract_flat": True}
|
||||
ydl_opts: Dict[str,
|
||||
Any] = {
|
||||
"quiet": True,
|
||||
"skip_download": True,
|
||||
"extract_flat": True
|
||||
}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
search_query = f"ytsearch{limit}:{query}"
|
||||
info = ydl.extract_info(search_query, download=False)
|
||||
@@ -30,13 +36,16 @@ class YouTube(Provider):
|
||||
for video_data in entries[:limit]:
|
||||
title = video_data.get("title", "Unknown")
|
||||
video_id = video_data.get("id", "")
|
||||
url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}"
|
||||
url = video_data.get(
|
||||
"url"
|
||||
) or f"https://youtube.com/watch?v={video_id}"
|
||||
uploader = video_data.get("uploader", "Unknown")
|
||||
duration = video_data.get("duration", 0)
|
||||
view_count = video_data.get("view_count", 0)
|
||||
|
||||
duration_str = (
|
||||
f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
|
||||
f"{int(duration // 60)}:{int(duration % 60):02d}"
|
||||
if duration else ""
|
||||
)
|
||||
views_str = f"{view_count:,}" if view_count else ""
|
||||
|
||||
@@ -46,13 +55,18 @@ class YouTube(Provider):
|
||||
title=title,
|
||||
path=url,
|
||||
detail=f"By: {uploader}",
|
||||
annotations=[duration_str, f"{views_str} views"],
|
||||
annotations=[duration_str,
|
||||
f"{views_str} views"],
|
||||
media_kind="video",
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Uploader", uploader),
|
||||
("Duration", duration_str),
|
||||
("Views", views_str),
|
||||
("Title",
|
||||
title),
|
||||
("Uploader",
|
||||
uploader),
|
||||
("Duration",
|
||||
duration_str),
|
||||
("Views",
|
||||
views_str),
|
||||
],
|
||||
full_metadata={
|
||||
"video_id": video_id,
|
||||
|
||||
@@ -19,15 +19,26 @@ class ZeroXZero(Provider):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
headers = {"User-Agent": "Medeia-Macina/1.0"}
|
||||
headers = {
|
||||
"User-Agent": "Medeia-Macina/1.0"
|
||||
}
|
||||
with HTTPClient(headers=headers) as client:
|
||||
with open(file_path, "rb") as handle:
|
||||
try:
|
||||
total = os.path.getsize(file_path)
|
||||
except Exception:
|
||||
total = None
|
||||
wrapped = ProgressFileReader(handle, total_bytes=total, label="upload")
|
||||
response = client.post("https://0x0.st", files={"file": wrapped})
|
||||
wrapped = ProgressFileReader(
|
||||
handle,
|
||||
total_bytes=total,
|
||||
label="upload"
|
||||
)
|
||||
response = client.post(
|
||||
"https://0x0.st",
|
||||
files={
|
||||
"file": wrapped
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
uploaded_url = response.text.strip()
|
||||
@@ -36,15 +47,19 @@ class ZeroXZero(Provider):
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
Store(self.config, suppress_debug=True).try_add_url_for_pipe_object(
|
||||
pipe_obj, uploaded_url
|
||||
)
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(pipe_obj,
|
||||
uploaded_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return uploaded_url
|
||||
|
||||
raise Exception(f"Upload failed: {response.status_code} - {response.text}")
|
||||
raise Exception(
|
||||
f"Upload failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[0x0] Upload error: {exc}", file=sys.stderr)
|
||||
|
||||
Reference in New Issue
Block a user