This commit is contained in:
nose
2025-12-11 19:04:02 -08:00
parent 6863c6c7ea
commit 16d8a763cd
103 changed files with 4759 additions and 9156 deletions

5
Provider/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
"""Provider plugin modules.
Concrete provider implementations live in this package.
The public entrypoint/registry is Provider.registry.
"""

84
Provider/_base.py Normal file
View File

@@ -0,0 +1,84 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@dataclass
class SearchResult:
"""Unified search result format across all search providers."""
table: str # Provider name: "libgen", "soulseek", "bandcamp", "youtube", etc.
title: str # Display title/filename
path: str # Download target (URL, path, magnet, identifier)
detail: str = "" # Additional description
annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"]
media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet"
size_bytes: Optional[int] = None
tags: set[str] = field(default_factory=set) # Searchable tags
columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns
full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for pipeline processing."""
return {
"table": self.table,
"title": self.title,
"path": self.path,
"detail": self.detail,
"annotations": self.annotations,
"media_kind": self.media_kind,
"size_bytes": self.size_bytes,
"tags": list(self.tags),
"columns": list(self.columns),
"full_metadata": self.full_metadata,
}
class SearchProvider(ABC):
"""Base class for search providers."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
self.config = config or {}
self.name = self.__class__.__name__.lower()
@abstractmethod
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
"""Search for items matching the query."""
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Download an item from a search result."""
return None
def validate(self) -> bool:
"""Check if provider is available and properly configured."""
return True
class FileProvider(ABC):
"""Base class for file upload providers."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
self.config = config or {}
self.name = self.__class__.__name__.lower()
@abstractmethod
def upload(self, file_path: str, **kwargs: Any) -> str:
"""Upload a file and return the URL."""
def validate(self) -> bool:
"""Check if provider is available/configured."""
return True

109
Provider/bandcamp.py Normal file
View File

@@ -0,0 +1,109 @@
from __future__ import annotations
import sys
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from SYS.logger import log, debug
try:
from playwright.sync_api import sync_playwright
except ImportError: # pragma: no cover
sync_playwright = None
class Bandcamp(SearchProvider):
"""Search provider for Bandcamp."""
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
if sync_playwright is None:
log(
"[bandcamp] Playwright not available. Install with: pip install playwright",
file=sys.stderr,
)
return []
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
if query.strip().lower().startswith("artist:"):
artist_name = query[7:].strip().strip('"')
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
else:
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
results = self._scrape_url(page, search_url, limit)
browser.close()
return results
except Exception as exc:
log(f"[bandcamp] Search error: {exc}", file=sys.stderr)
return []
def _scrape_url(self, page: Any, url: str, limit: int) -> List[SearchResult]:
debug(f"[bandcamp] Scraping: {url}")
page.goto(url)
page.wait_for_load_state("domcontentloaded")
results: List[SearchResult] = []
search_results = page.query_selector_all(".searchresult")
if not search_results:
return results
for item in search_results[:limit]:
try:
heading = item.query_selector(".heading")
if not heading:
continue
link = heading.query_selector("a")
if not link:
continue
title = link.inner_text().strip()
target_url = link.get_attribute("href")
subhead = item.query_selector(".subhead")
artist = subhead.inner_text().strip() if subhead else "Unknown"
itemtype = item.query_selector(".itemtype")
media_type = itemtype.inner_text().strip() if itemtype else "album"
results.append(
SearchResult(
table="bandcamp",
title=title,
path=target_url,
detail=f"By: {artist}",
annotations=[media_type],
media_kind="audio",
columns=[
("Name", title),
("Artist", artist),
("Type", media_type),
],
full_metadata={
"artist": artist,
"type": media_type,
},
)
)
except Exception as exc:
debug(f"[bandcamp] Error parsing result: {exc}")
return results
def validate(self) -> bool:
return sync_playwright is not None

98
Provider/libgen.py Normal file
View File

@@ -0,0 +1,98 @@
from __future__ import annotations
import sys
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from SYS.logger import log
class Libgen(SearchProvider):
"""Search provider for Library Genesis books."""
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
try:
from Provider.unified_book_downloader import UnifiedBookDownloader
from Provider.query_parser import parse_query, get_field, get_free_text
parsed = parse_query(query)
isbn = get_field(parsed, "isbn")
author = get_field(parsed, "author")
title = get_field(parsed, "title")
free_text = get_free_text(parsed)
search_query = isbn or title or author or free_text or query
downloader = UnifiedBookDownloader(config=self.config)
books = downloader.search_libgen(search_query, limit=limit)
results: List[SearchResult] = []
for idx, book in enumerate(books, 1):
title = book.get("title", "Unknown")
author = book.get("author", "Unknown")
year = book.get("year", "Unknown")
pages = book.get("pages") or book.get("pages_str") or ""
extension = book.get("extension", "") or book.get("ext", "")
filesize = book.get("filesize_str", "Unknown")
isbn = book.get("isbn", "")
mirror_url = book.get("mirror_url", "")
columns = [
("Title", title),
("Author", author),
("Pages", str(pages)),
("Ext", str(extension)),
]
detail = f"By: {author}"
if year and year != "Unknown":
detail += f" ({year})"
annotations = [f"{filesize}"]
if isbn:
annotations.append(f"ISBN: {isbn}")
results.append(
SearchResult(
table="libgen",
title=title,
path=mirror_url or f"libgen:{book.get('id', '')}",
detail=detail,
annotations=annotations,
media_kind="book",
columns=columns,
full_metadata={
"number": idx,
"author": author,
"year": year,
"isbn": isbn,
"filesize": filesize,
"pages": pages,
"extension": extension,
"book_id": book.get("book_id", ""),
"md5": book.get("md5", ""),
},
)
)
return results
except Exception as exc:
log(f"[libgen] Search error: {exc}", file=sys.stderr)
return []
def validate(self) -> bool:
try:
from Provider.unified_book_downloader import UnifiedBookDownloader # noqa: F401
return True
except Exception:
return False

523
Provider/libgen_service.py Normal file
View File

@@ -0,0 +1,523 @@
"""Shared Library Genesis search and download helpers.
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
Targets libgen.is/rs/st mirrors and parses the results table directly.
"""
from __future__ import annotations
import logging
import re
import requests
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin, urlparse, unquote
# Optional dependencies
try:
from bs4 import BeautifulSoup
except ImportError:
BeautifulSoup = None
LogFn = Optional[Callable[[str], None]]
ErrorFn = Optional[Callable[[str], None]]
DEFAULT_TIMEOUT = 20.0
DEFAULT_LIMIT = 50
# Mirrors to try in order
MIRRORS = [
"https://libgen.is",
"https://libgen.rs",
"https://libgen.st",
"http://libgen.is",
"http://libgen.rs",
"http://libgen.st",
"https://libgen.li", # Different structure, fallback
"http://libgen.li",
"https://libgen.gl", # Different structure, fallback
"http://libgen.gl",
]
logging.getLogger(__name__).setLevel(logging.INFO)
def _call(logger: LogFn, message: str) -> None:
if logger:
logger(message)
class LibgenSearch:
"""Robust LibGen searcher."""
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
"""Search LibGen mirrors."""
if not BeautifulSoup:
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
return []
for mirror in MIRRORS:
try:
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit)
else:
results = self._search_libgen_rs(mirror, query, limit)
if results:
return results
except Exception as e:
logging.debug(f"Mirror {mirror} failed: {e}")
continue
return []
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.rs/is/st style mirrors."""
# Search URL: /search.php?req=QUERY&res=100&column=def
url = f"{mirror}/search.php"
params = {
"req": query,
"res": 100, # Request more to filter later
"column": "def",
"open": 0,
"view": "simple",
"phrase": 1,
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# Find the table with results. usually class 'c'
table = soup.find("table", {"class": "c"})
if not table:
# Try finding by structure (table with many rows)
tables = soup.find_all("table")
for t in tables:
if len(t.find_all("tr")) > 5:
table = t
break
if not table:
return []
results = []
# Skip header row
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
# Columns:
# 0: ID
# 1: Author(s)
# 2: Title
# 3: Publisher
# 4: Year
# 5: Pages
# 6: Language
# 7: Size
# 8: Extension
# 9+: Mirrors
try:
libgen_id = cols[0].get_text(strip=True)
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
if not authors:
authors = [cols[1].get_text(strip=True)]
title_tag = cols[2].find("a")
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
# Extract MD5 from title link if possible (often in href)
# href='book/index.php?md5=...'
md5 = ""
if title_tag and title_tag.has_attr("href"):
href = title_tag["href"]
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
if match:
md5 = match.group(1)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
pages = cols[5].get_text(strip=True)
language = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirrors
# Usually col 9 is http://library.lol/main/MD5
mirror_links = []
for i in range(9, len(cols)):
a = cols[i].find("a")
if a and a.has_attr("href"):
mirror_links.append(a["href"])
# Construct direct download page link (library.lol)
# If we have MD5, we can guess it: http://library.lol/main/{md5}
if md5:
download_link = f"http://library.lol/main/{md5}"
elif mirror_links:
download_link = mirror_links[0]
else:
download_link = ""
results.append({
"id": libgen_id,
"title": title,
"author": ", ".join(authors),
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": md5,
"mirror_url": download_link,
"cover": "", # Could extract from hover if needed
})
if len(results) >= limit:
break
except Exception as e:
logging.debug(f"Error parsing row: {e}")
continue
return results
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.li/gl style mirrors."""
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
url = f"{mirror}/index.php"
params = {
"req": query,
"res": 100,
"covers": "on",
"filesuns": "all",
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"id": "tablelibgen"})
if not table:
table = soup.find("table", {"class": "table table-striped"})
if not table:
return []
results = []
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
try:
# Structure is different
# 0: Cover
# 1: Title (with link to file.php?id=...)
# 2: Author
# 3: Publisher
# 4: Year
# 5: Language
# 6: Pages
# 7: Size
# 8: Extension
# 9: Mirrors
title_col = cols[1]
title_link = title_col.find("a")
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
# Extract ID from link
libgen_id = ""
if title_link and title_link.has_attr("href"):
href = title_link["href"]
# href is usually "file.php?id=..." or "edition.php?id=..."
match = re.search(r"id=(\d+)", href)
if match:
libgen_id = match.group(1)
authors = cols[2].get_text(strip=True)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
language = cols[5].get_text(strip=True)
pages = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirror link
# Usually in col 9 or title link
mirror_url = ""
if title_link:
href = title_link["href"]
if href.startswith("/"):
mirror_url = mirror + href
else:
mirror_url = urljoin(mirror, href)
results.append({
"id": libgen_id,
"title": title,
"author": authors,
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": "", # .li doesn't show MD5 easily in table
"mirror_url": mirror_url,
})
if len(results) >= limit:
break
except Exception:
continue
return results
def search_libgen(
query: str,
limit: int = DEFAULT_LIMIT,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> List[Dict[str, Any]]:
"""Search Libgen using the robust scraper."""
searcher = LibgenSearch(session=session)
try:
results = searcher.search(query, limit=limit)
_call(log_info, f"[libgen] Found {len(results)} results")
return results
except Exception as e:
_call(log_error, f"[libgen] Search failed: {e}")
return []
def _resolve_download_url(
session: requests.Session,
url: str,
log_info: LogFn = None
) -> Optional[str]:
"""Resolve the final download URL by following the LibGen chain."""
current_url = url
visited = set()
# Max hops to prevent infinite loops
for _ in range(6):
if current_url in visited:
break
visited.add(current_url)
_call(log_info, f"[resolve] Checking: {current_url}")
# Simple heuristic: if it looks like a file, return it
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
return current_url
try:
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
# So we'll just GET with stream=True to peek headers/content without downloading everything
with session.get(current_url, stream=True, timeout=30) as resp:
resp.raise_for_status()
ct = resp.headers.get("Content-Type", "").lower()
if "text/html" not in ct:
# It's a binary file
return current_url
# It's HTML, read content
content = resp.text
except Exception as e:
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
return None
soup = BeautifulSoup(content, "html.parser")
# 1. Check for "GET" link (library.lol / ads.php style)
# Usually <h2>GET</h2> inside <a> or just text "GET"
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
if not get_link:
# Try finding <a> containing <h2>GET</h2>
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
if h2_get and h2_get.parent.name == "a":
get_link = h2_get.parent
if get_link and get_link.has_attr("href"):
return urljoin(current_url, get_link["href"])
# 2. Check for "series.php" -> "edition.php"
if "series.php" in current_url:
# Find first edition link
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
if edition_link:
current_url = urljoin(current_url, edition_link["href"])
continue
# 3. Check for "edition.php" -> "file.php"
if "edition.php" in current_url:
file_link = soup.find("a", href=re.compile(r"file\.php"))
if file_link:
current_url = urljoin(current_url, file_link["href"])
continue
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
if "file.php" in current_url:
# Look for link with title="libgen" or text "Libgen"
libgen_link = soup.find("a", title="libgen")
if not libgen_link:
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
if libgen_link and libgen_link.has_attr("href"):
current_url = urljoin(current_url, libgen_link["href"])
continue
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
if "ads.php" in current_url:
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
if get_php_link:
return urljoin(current_url, get_php_link["href"])
# 6. Library.lol / generic fallback
for text in ["Cloudflare", "IPFS.io", "Infura"]:
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
if link and link.has_attr("href"):
return urljoin(current_url, link["href"])
# If we found nothing new, stop
break
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
if suffix:
return suffix.lstrip('.')
parsed = urlparse(download_url)
suffix = Path(parsed.path).suffix
if suffix:
return suffix.lstrip('.')
content_type = headers.get('content-type', '').lower()
mime_map = {
'application/pdf': 'pdf',
'application/epub+zip': 'epub',
'application/x-mobipocket-ebook': 'mobi',
'application/x-cbr': 'cbr',
'application/x-cbz': 'cbz',
'application/zip': 'zip',
}
for mime, ext in mime_map.items():
if mime in content_type:
return ext
return None
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
"""Rename the path to match the detected extension, if needed."""
if not extension:
return path
suffix = extension if extension.startswith('.') else f'.{extension}'
if path.suffix.lower() == suffix.lower():
return path
candidate = path.with_suffix(suffix)
base_stem = path.stem
counter = 1
while candidate.exists() and counter < 100:
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
counter += 1
try:
path.replace(candidate)
return candidate
except Exception:
return path
def download_from_mirror(
mirror_url: str,
output_path: Path,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
try:
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
download_url = _resolve_download_url(session, mirror_url, log_info)
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False, None
_call(log_info, f"[download] Downloading from: {download_url}")
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
headers = dict(r.headers)
# Verify it's not HTML (error page)
ct = headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False, None
total_size = int(headers.get("content-length", 0) or 0)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if progress_callback:
progress_callback(downloaded, total_size)
final_extension = _guess_filename_extension(download_url, headers)
final_path = _apply_extension(output_path, final_extension)
if progress_callback and total_size > 0:
progress_callback(downloaded, total_size)
_call(log_info, f"[download] Saved to {final_path}")
return True, final_path
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False, None

94
Provider/matrix.py Normal file
View File

@@ -0,0 +1,94 @@
from __future__ import annotations
import mimetypes
from pathlib import Path
from typing import Any
import requests
from Provider._base import FileProvider
class Matrix(FileProvider):
"""File provider for Matrix (Element) chat rooms."""
def validate(self) -> bool:
if not self.config:
return False
matrix_conf = self.config.get("storage", {}).get("matrix", {})
return bool(
matrix_conf.get("homeserver")
and matrix_conf.get("room_id")
and (matrix_conf.get("access_token") or matrix_conf.get("password"))
)
def upload(self, file_path: str, **kwargs: Any) -> str:
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
matrix_conf = self.config.get("storage", {}).get("matrix", {})
homeserver = matrix_conf.get("homeserver")
access_token = matrix_conf.get("access_token")
room_id = matrix_conf.get("room_id")
if not homeserver:
raise Exception("Matrix homeserver missing")
if not access_token:
raise Exception("Matrix access_token missing")
if not room_id:
raise Exception("Matrix room_id missing")
if not homeserver.startswith("http"):
homeserver = f"https://{homeserver}"
# Upload media
upload_url = f"{homeserver}/_matrix/media/v3/upload"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/octet-stream",
}
mime_type, _ = mimetypes.guess_type(path)
if mime_type:
headers["Content-Type"] = mime_type
filename = path.name
with open(path, "rb") as handle:
resp = requests.post(upload_url, headers=headers, data=handle, params={"filename": filename})
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = resp.json().get("content_uri")
if not content_uri:
raise Exception("No content_uri returned")
# Send message
send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
# Determine message type
msgtype = "m.file"
ext = path.suffix.lower()
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv"}
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
if ext in audio_exts:
msgtype = "m.audio"
elif ext in video_exts:
msgtype = "m.video"
elif ext in image_exts:
msgtype = "m.image"
info = {"mimetype": mime_type, "size": path.stat().st_size}
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
resp = requests.post(send_url, headers=headers, json=payload)
if resp.status_code != 200:
raise Exception(f"Matrix send message failed: {resp.text}")
event_id = resp.json().get("event_id")
return f"https://matrix.to/#/{room_id}/{event_id}"

View File

@@ -0,0 +1,389 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Type
import requests
import sys
from SYS.logger import log, debug
try: # Optional dependency
import musicbrainzngs # type: ignore
except ImportError: # pragma: no cover - optional
musicbrainzngs = None
class MetadataProvider(ABC):
"""Base class for metadata providers (music, movies, books, etc.)."""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
self.config = config or {}
@property
def name(self) -> str:
return self.__class__.__name__.replace("Provider", "").lower()
@abstractmethod
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Return a list of candidate metadata records."""
def to_tags(self, item: Dict[str, Any]) -> List[str]:
"""Convert a result item into a list of tags."""
tags: List[str] = []
title = item.get("title")
artist = item.get("artist")
album = item.get("album")
year = item.get("year")
if title:
tags.append(f"title:{title}")
if artist:
tags.append(f"artist:{artist}")
if album:
tags.append(f"album:{album}")
if year:
tags.append(f"year:{year}")
tags.append(f"source:{self.name}")
return tags
class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API."""
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
try:
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
resp.raise_for_status()
results = resp.json().get("results", [])
except Exception as exc:
log(f"iTunes search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for r in results:
item = {
"title": r.get("trackName"),
"artist": r.get("artistName"),
"album": r.get("collectionName"),
"year": str(r.get("releaseDate", ""))[:4],
"provider": self.name,
"raw": r,
}
items.append(item)
debug(f"iTunes returned {len(items)} items for '{query}'")
return items
class OpenLibraryMetadataProvider(MetadataProvider):
"""Metadata provider for OpenLibrary book metadata."""
@property
def name(self) -> str: # type: ignore[override]
return "openlibrary"
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
query_clean = (query or "").strip()
if not query_clean:
return []
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
resp = requests.get(
"https://openlibrary.org/search.json",
params={"q": q, "limit": limit},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
log(f"OpenLibrary search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for doc in data.get("docs", [])[:limit]:
authors = doc.get("author_name") or []
publisher = ""
publishers = doc.get("publisher") or []
if isinstance(publishers, list) and publishers:
publisher = publishers[0]
# Prefer 13-digit ISBN when available, otherwise 10-digit
isbn_list = doc.get("isbn") or []
isbn_13 = next((i for i in isbn_list if len(str(i)) == 13), None)
isbn_10 = next((i for i in isbn_list if len(str(i)) == 10), None)
# Derive OLID from key
olid = ""
key = doc.get("key", "")
if isinstance(key, str) and key:
olid = key.split("/")[-1]
items.append({
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
})
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
tags: List[str] = []
title = item.get("title")
authors = item.get("authors") or []
publisher = item.get("publisher")
year = item.get("year")
description = item.get("description") or ""
if title:
tags.append(f"title:{title}")
for author in authors:
if author:
tags.append(f"author:{author}")
if publisher:
tags.append(f"publisher:{publisher}")
if year:
tags.append(f"year:{year}")
if description:
tags.append(f"description:{description[:200]}")
identifiers = item.get("identifiers") or {}
for key, value in identifiers.items():
if value:
tags.append(f"{key}:{value}")
tags.append(f"source:{self.name}")
return tags
class GoogleBooksMetadataProvider(MetadataProvider):
"""Metadata provider for Google Books volumes API."""
@property
def name(self) -> str: # type: ignore[override]
return "googlebooks"
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
query_clean = (query or "").strip()
if not query_clean:
return []
# Prefer ISBN queries when possible
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
try:
resp = requests.get(
"https://www.googleapis.com/books/v1/volumes",
params={"q": q, "maxResults": limit},
timeout=10,
)
resp.raise_for_status()
payload = resp.json()
except Exception as exc:
log(f"Google Books search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for volume in payload.get("items", [])[:limit]:
info = volume.get("volumeInfo") or {}
authors = info.get("authors") or []
publisher = info.get("publisher", "")
published_date = info.get("publishedDate", "")
year = str(published_date)[:4] if published_date else ""
identifiers_raw = info.get("industryIdentifiers") or []
identifiers: Dict[str, Optional[str]] = {"googlebooks": volume.get("id")}
for ident in identifiers_raw:
if not isinstance(ident, dict):
continue
ident_type = ident.get("type", "").lower()
ident_value = ident.get("identifier")
if not ident_value:
continue
if ident_type == "isbn_13":
identifiers.setdefault("isbn_13", ident_value)
elif ident_type == "isbn_10":
identifiers.setdefault("isbn_10", ident_value)
else:
identifiers.setdefault(ident_type, ident_value)
items.append({
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
})
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
tags: List[str] = []
title = item.get("title")
authors = item.get("authors") or []
publisher = item.get("publisher")
year = item.get("year")
description = item.get("description") or ""
if title:
tags.append(f"title:{title}")
for author in authors:
if author:
tags.append(f"author:{author}")
if publisher:
tags.append(f"publisher:{publisher}")
if year:
tags.append(f"year:{year}")
if description:
tags.append(f"description:{description[:200]}")
identifiers = item.get("identifiers") or {}
for key, value in identifiers.items():
if value:
tags.append(f"{key}:{value}")
tags.append(f"source:{self.name}")
return tags
class MusicBrainzMetadataProvider(MetadataProvider):
"""Metadata provider for MusicBrainz recordings."""
@property
def name(self) -> str: # type: ignore[override]
return "musicbrainz"
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
if not musicbrainzngs:
log("musicbrainzngs is not installed; skipping MusicBrainz scrape", file=sys.stderr)
return []
q = (query or "").strip()
if not q:
return []
try:
# Ensure user agent is set (required by MusicBrainz)
musicbrainzngs.set_useragent("Medeia-Macina", "0.1")
except Exception:
pass
try:
resp = musicbrainzngs.search_recordings(query=q, limit=limit)
recordings = resp.get("recording-list") or resp.get("recordings") or []
except Exception as exc:
log(f"MusicBrainz search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for rec in recordings[:limit]:
if not isinstance(rec, dict):
continue
title = rec.get("title") or ""
artist = ""
artist_credit = rec.get("artist-credit") or rec.get("artist_credit")
if isinstance(artist_credit, list) and artist_credit:
first = artist_credit[0]
if isinstance(first, dict):
artist = first.get("name") or first.get("artist", {}).get("name", "")
elif isinstance(first, str):
artist = first
album = ""
release_list = rec.get("release-list") or rec.get("releases") or rec.get("release")
if isinstance(release_list, list) and release_list:
first_rel = release_list[0]
if isinstance(first_rel, dict):
album = first_rel.get("title", "") or ""
release_date = first_rel.get("date") or ""
else:
album = str(first_rel)
release_date = ""
else:
release_date = rec.get("first-release-date") or ""
year = str(release_date)[:4] if release_date else ""
mbid = rec.get("id") or ""
items.append({
"title": title,
"artist": artist,
"album": album,
"year": year,
"provider": self.name,
"mbid": mbid,
"raw": rec,
})
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
tags = super().to_tags(item)
mbid = item.get("mbid")
if mbid:
tags.append(f"musicbrainz:{mbid}")
return tags
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"itunes": ITunesProvider,
"openlibrary": OpenLibraryMetadataProvider,
"googlebooks": GoogleBooksMetadataProvider,
"google": GoogleBooksMetadataProvider,
"musicbrainz": MusicBrainzMetadataProvider,
}
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
_METADATA_PROVIDERS[name.lower()] = provider_cls
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for name, cls in _METADATA_PROVIDERS.items():
try:
provider = cls(config)
# Basic availability check: perform lightweight validation if defined
availability[name] = True
except Exception:
availability[name] = False
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None
try:
return cls(config)
except Exception as exc:
log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
return None

159
Provider/query_parser.py Normal file
View File

@@ -0,0 +1,159 @@
"""Dynamic query parser for filtering and field extraction.
Supports query syntax like:
- isbn:0557677203
- author:"Albert Pike"
- title:"Morals and Dogma"
- year:2010
- isbn:0557677203 author:"Albert Pike"
- Mixed with free text: "Morals" isbn:0557677203
This allows flexible query strings that can be parsed by any search provider
to extract specific fields for filtering and searching.
"""
from typing import Dict, List, Tuple, Optional, Any
import re
def parse_query(query: str) -> Dict[str, Any]:
"""Parse a query string into field:value pairs and free text.
Args:
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
Returns:
Dictionary with:
- 'fields': Dict[field_name, field_value] for structured fields
- 'text': str with remaining free text
- 'raw': str original query
"""
result = {
'fields': {},
'text': '',
'raw': query,
}
if not query or not query.strip():
return result
query = query.strip()
remaining_parts = []
# Pattern to match: field:value or field:"quoted value"
# Matches: word: followed by either quoted string or unquoted word
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
pos = 0
for match in re.finditer(pattern, query):
# Add any text before this match
if match.start() > pos:
before_text = query[pos:match.start()].strip()
if before_text:
remaining_parts.append(before_text)
field_name = match.group(1).lower()
field_value = match.group(2) if match.group(2) is not None else match.group(3)
result['fields'][field_name] = field_value
pos = match.end()
# Add any remaining text after last match
if pos < len(query):
remaining_text = query[pos:].strip()
if remaining_text:
remaining_parts.append(remaining_text)
result['text'] = ' '.join(remaining_parts)
return result
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
"""Get a field value from parsed query, with optional default.
Args:
parsed_query: Result from parse_query()
field_name: Field name to look up (case-insensitive)
default: Default value if field not found
Returns:
Field value or default
"""
return parsed_query.get('fields', {}).get(field_name.lower(), default)
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
"""Check if a field exists in parsed query.
Args:
parsed_query: Result from parse_query()
field_name: Field name to check (case-insensitive)
Returns:
True if field exists
"""
return field_name.lower() in parsed_query.get('fields', {})
def get_free_text(parsed_query: Dict[str, Any]) -> str:
"""Get the free text portion of a parsed query.
Args:
parsed_query: Result from parse_query()
Returns:
Free text or empty string
"""
return parsed_query.get('text', '')
def build_query_for_provider(
parsed_query: Dict[str, Any],
provider: str,
extraction_map: Optional[Dict[str, str]] = None
) -> Tuple[str, Dict[str, str]]:
"""Build a search query and filters dict for a specific provider.
Different providers have different search syntax. This function
extracts the appropriate fields for each provider.
Args:
parsed_query: Result from parse_query()
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
extraction_map: Optional mapping of field names to provider-specific names
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
Returns:
Tuple of (search_query: str, extracted_fields: Dict[field, value])
"""
extraction_map = extraction_map or {}
extracted = {}
free_text = get_free_text(parsed_query)
# Extract fields based on map
for field_name, provider_key in extraction_map.items():
if has_field(parsed_query, field_name):
extracted[provider_key] = get_field(parsed_query, field_name)
# If provider-specific extraction needed, providers can implement it
# For now, return the free text as query
return free_text, extracted
if __name__ == '__main__':
# Test cases
test_queries = [
'isbn:0557677203',
'isbn:0557677203 author:"Albert Pike"',
'Morals and Dogma isbn:0557677203',
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
'search term without fields',
'author:"John Smith" title:"A Book"',
]
for query in test_queries:
print(f"\nQuery: {query}")
parsed = parse_query(query)
print(f" Fields: {parsed['fields']}")
print(f" Text: {parsed['text']}")

110
Provider/registry.py Normal file
View File

@@ -0,0 +1,110 @@
"""Provider registry.
Concrete provider implementations live in the `Provider/` package.
This module is the single source of truth for provider discovery.
"""
from __future__ import annotations
from typing import Any, Dict, Optional, Type
import sys
from SYS.logger import log
from Provider._base import FileProvider, SearchProvider, SearchResult
from Provider.bandcamp import Bandcamp
from Provider.libgen import Libgen
from Provider.matrix import Matrix
from Provider.soulseek import Soulseek, download_soulseek_file
from Provider.youtube import YouTube
from Provider.zeroxzero import ZeroXZero
_SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = {
"libgen": Libgen,
"soulseek": Soulseek,
"bandcamp": Bandcamp,
"youtube": YouTube,
}
def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
"""Get a search provider by name."""
provider_class = _SEARCH_PROVIDERS.get((name or "").lower())
if provider_class is None:
log(f"[provider] Unknown search provider: {name}", file=sys.stderr)
return None
try:
provider = provider_class(config)
if not provider.validate():
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
return None
return provider
except Exception as exc:
log(f"[provider] Error initializing '{name}': {exc}", file=sys.stderr)
return None
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all search providers and their availability."""
availability: Dict[str, bool] = {}
for name, provider_class in _SEARCH_PROVIDERS.items():
try:
provider = provider_class(config)
availability[name] = provider.validate()
except Exception:
availability[name] = False
return availability
_FILE_PROVIDERS: Dict[str, Type[FileProvider]] = {
"0x0": ZeroXZero,
"matrix": Matrix,
}
def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
"""Get a file provider by name."""
provider_class = _FILE_PROVIDERS.get((name or "").lower())
if provider_class is None:
log(f"[provider] Unknown file provider: {name}", file=sys.stderr)
return None
try:
provider = provider_class(config)
if not provider.validate():
log(f"[provider] File provider '{name}' is not available", file=sys.stderr)
return None
return provider
except Exception as exc:
log(f"[provider] Error initializing file provider '{name}': {exc}", file=sys.stderr)
return None
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all file providers and their availability."""
availability: Dict[str, bool] = {}
for name, provider_class in _FILE_PROVIDERS.items():
try:
provider = provider_class(config)
availability[name] = provider.validate()
except Exception:
availability[name] = False
return availability
__all__ = [
"SearchResult",
"SearchProvider",
"FileProvider",
"get_search_provider",
"list_search_providers",
"get_file_provider",
"list_file_providers",
"download_soulseek_file",
]

380
Provider/soulseek.py Normal file
View File

@@ -0,0 +1,380 @@
from __future__ import annotations
import asyncio
import re
import sys
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from SYS.logger import log, debug
class Soulseek(SearchProvider):
"""Search provider for Soulseek P2P network."""
MUSIC_EXTENSIONS = {
".flac",
".mp3",
".m4a",
".aac",
".ogg",
".opus",
".wav",
".alac",
".wma",
".ape",
".aiff",
".dsf",
".dff",
".wv",
".tta",
".tak",
".ac3",
".dts",
}
# NOTE: These defaults preserve existing behavior.
USERNAME = "asjhkjljhkjfdsd334"
PASSWORD = "khhhg"
DOWNLOAD_DIR = "./downloads"
MAX_WAIT_TRANSFER = 1200
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
"""Download file from Soulseek."""
try:
full_metadata = result.full_metadata or {}
username = full_metadata.get("username")
filename = full_metadata.get("filename") or result.path
if not username or not filename:
log(f"[soulseek] Missing metadata for download: {result.title}", file=sys.stderr)
return None
# This cmdlet stack is synchronous; use asyncio.run for clarity.
return asyncio.run(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
timeout=self.MAX_WAIT_TRANSFER,
)
)
except RuntimeError:
# If we're already inside an event loop (e.g., TUI), fall back to a
# dedicated loop in this thread.
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return loop.run_until_complete(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
timeout=self.MAX_WAIT_TRANSFER,
)
)
finally:
try:
loop.close()
except Exception:
pass
except Exception as exc:
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
return None
async def perform_search(self, query: str, timeout: float = 9.0, limit: int = 50) -> List[Dict[str, Any]]:
"""Perform async Soulseek search."""
import os
from aioslsk.client import SoulSeekClient
from aioslsk.settings import CredentialsSettings, Settings
os.makedirs(self.DOWNLOAD_DIR, exist_ok=True)
settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD))
client = SoulSeekClient(settings)
try:
await client.start()
await client.login()
except Exception as exc:
log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr)
return []
try:
search_request = await client.searches.search(query)
await self._collect_results(search_request, timeout=timeout)
return self._flatten_results(search_request)[:limit]
except Exception as exc:
log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr)
return []
finally:
try:
await client.stop()
except Exception:
pass
def _flatten_results(self, search_request: Any) -> List[dict]:
flat: List[dict] = []
for result in getattr(search_request, "results", []):
username = getattr(result, "username", "?")
for file_data in getattr(result, "shared_items", []):
flat.append(
{
"file": file_data,
"username": username,
"filename": getattr(file_data, "filename", "?"),
"size": getattr(file_data, "filesize", 0),
}
)
for file_data in getattr(result, "locked_results", []):
flat.append(
{
"file": file_data,
"username": username,
"filename": getattr(file_data, "filename", "?"),
"size": getattr(file_data, "filesize", 0),
}
)
return flat
async def _collect_results(self, search_request: Any, timeout: float = 75.0) -> None:
end = time.time() + timeout
last_count = 0
while time.time() < end:
current_count = len(getattr(search_request, "results", []))
if current_count > last_count:
debug(f"[soulseek] Got {current_count} result(s)...")
last_count = current_count
await asyncio.sleep(0.5)
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
try:
flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit))
if not flat_results:
return []
music_results: List[dict] = []
for item in flat_results:
filename = item["filename"]
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
if ext in self.MUSIC_EXTENSIONS:
music_results.append(item)
if not music_results:
return []
enriched_results: List[dict] = []
for item in music_results:
filename = item["filename"]
ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
display_name = filename.replace("\\", "/").split("/")[-1]
path_parts = filename.replace("\\", "/").split("/")
artist = path_parts[-3] if len(path_parts) >= 3 else ""
album = path_parts[-2] if len(path_parts) >= 3 else (path_parts[-2] if len(path_parts) == 2 else "")
base_name = display_name.rsplit(".", 1)[0] if "." in display_name else display_name
track_num = ""
title = base_name
filename_artist = ""
match = re.match(r"^(\d{1,3})\s*[\.\-]?\s+(.+)$", base_name)
if match:
track_num = match.group(1)
rest = match.group(2)
if " - " in rest:
filename_artist, title = rest.split(" - ", 1)
else:
title = rest
if filename_artist:
artist = filename_artist
enriched_results.append(
{
**item,
"artist": artist,
"album": album,
"title": title,
"track_num": track_num,
"ext": ext,
}
)
if filters:
artist_filter = (filters.get("artist", "") or "").lower()
album_filter = (filters.get("album", "") or "").lower()
track_filter = (filters.get("track", "") or "").lower()
if artist_filter or album_filter or track_filter:
filtered: List[dict] = []
for item in enriched_results:
if artist_filter and artist_filter not in item["artist"].lower():
continue
if album_filter and album_filter not in item["album"].lower():
continue
if track_filter and track_filter not in item["title"].lower():
continue
filtered.append(item)
enriched_results = filtered
enriched_results.sort(key=lambda item: (item["ext"].lower() != ".flac", -item["size"]))
results: List[SearchResult] = []
for item in enriched_results:
artist_display = item["artist"] if item["artist"] else "(no artist)"
album_display = item["album"] if item["album"] else "(no album)"
size_mb = int(item["size"] / 1024 / 1024)
columns = [
("Track", item["track_num"] or "?"),
("Title", item["title"][:40]),
("Artist", artist_display[:32]),
("Album", album_display[:32]),
("Size", f"{size_mb} MB"),
]
results.append(
SearchResult(
table="soulseek",
title=item["title"],
path=item["filename"],
detail=f"{artist_display} - {album_display}",
annotations=[f"{size_mb} MB", item["ext"].lstrip(".").upper()],
media_kind="audio",
size_bytes=item["size"],
columns=columns,
full_metadata={
"username": item["username"],
"filename": item["filename"],
"artist": item["artist"],
"album": item["album"],
"track_num": item["track_num"],
"ext": item["ext"],
},
)
)
return results
except Exception as exc:
log(f"[soulseek] Search error: {exc}", file=sys.stderr)
return []
def validate(self) -> bool:
try:
from aioslsk.client import SoulSeekClient # noqa: F401
return True
except ImportError:
return False
async def download_soulseek_file(
username: str,
filename: str,
output_dir: Path = Path("./downloads"),
timeout: int = 1200,
) -> Optional[Path]:
"""Download a file from a Soulseek peer."""
try:
from aioslsk.client import SoulSeekClient
from aioslsk.settings import CredentialsSettings, Settings
from aioslsk.transfer.model import Transfer, TransferDirection
from aioslsk.transfer.state import TransferState
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
local_filename = filename.replace("\\", "/").split("/")[-1]
output_user_dir = output_dir / username
output_user_dir.mkdir(parents=True, exist_ok=True)
output_path = (output_user_dir / local_filename)
if output_path.exists():
base = output_path.stem
ext = output_path.suffix
counter = 1
while output_path.exists():
output_path = output_user_dir / f"{base}_{counter}{ext}"
counter += 1
output_path = output_path.resolve()
settings = Settings(credentials=CredentialsSettings(username=Soulseek.USERNAME, password=Soulseek.PASSWORD))
client = SoulSeekClient(settings)
try:
await client.start()
await client.login()
debug(f"[soulseek] Logged in as {Soulseek.USERNAME}")
debug(f"[soulseek] Requesting download from {username}: {filename}")
transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD))
transfer.local_path = str(output_path)
await client.transfers.queue(transfer)
start_time = time.time()
last_log_time = 0.0
while not transfer.is_finalized():
if time.time() - start_time > timeout:
log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr)
return None
if time.time() - last_log_time >= 5.0 and transfer.bytes_transfered > 0:
progress = (transfer.bytes_transfered / transfer.filesize * 100) if transfer.filesize else 0
debug(
f"[soulseek] Progress: {progress:.1f}% "
f"({transfer.bytes_transfered}/{transfer.filesize})"
)
last_log_time = time.time()
await asyncio.sleep(1)
if transfer.state.VALUE == TransferState.COMPLETE and transfer.local_path:
downloaded_path = Path(transfer.local_path)
if downloaded_path.exists():
debug(f"[soulseek] Download complete: {downloaded_path}")
return downloaded_path
log(f"[soulseek] Transfer completed but file missing: {downloaded_path}", file=sys.stderr)
return None
log(
f"[soulseek] Download failed: state={transfer.state.VALUE} "
f"bytes={transfer.bytes_transfered}/{transfer.filesize}",
file=sys.stderr,
)
return None
finally:
try:
await client.stop()
except Exception:
pass
except ImportError:
log("[soulseek] aioslsk not installed. Install with: pip install aioslsk", file=sys.stderr)
return None
except Exception as exc:
log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr)
return None

View File

@@ -0,0 +1,707 @@
"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
This module provides a single interface for downloading books from multiple sources:
1. Try Archive.org direct download (if available)
2. Try Archive.org borrowing (if user has credentials)
3. Fallback to Libgen search by ISBN
4. Attempt Libgen download
All sources integrated with proper metadata scraping and error handling.
"""
import logging
import asyncio
import requests
from typing import Optional, Dict, Any, Tuple, List, Callable, cast
from pathlib import Path
from SYS.logger import debug
logger = logging.getLogger(__name__)
class UnifiedBookDownloader:
"""Unified interface for downloading books from multiple sources."""
def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
"""Initialize the unified book downloader.
Args:
config: Configuration dict with credentials
output_dir: Default output directory
"""
self.config = config or {}
self.output_dir = output_dir
self.session = requests.Session()
# Import download functions from their modules
self._init_downloaders()
def _init_downloaders(self) -> None:
"""Initialize downloader functions from their modules."""
try:
from API.archive_client import (
check_direct_download,
get_openlibrary_by_isbn,
loan
)
self.check_direct_download = check_direct_download
self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
self.loan_func = loan
logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
self.check_direct_download = None
self.get_openlibrary_by_isbn = None
self.loan_func = None
try:
from Provider.libgen_service import (
DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
download_from_mirror as _libgen_download,
search_libgen as _libgen_search,
)
def _log_info(message: str) -> None:
debug(f"[UnifiedBookDownloader] {message}")
def _log_error(message: str) -> None:
logger.error(f"[UnifiedBookDownloader] {message}")
self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
query,
limit=limit,
log_info=_log_info,
log_error=_log_error,
)
self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
mirror_url,
output_path,
log_info=_log_info,
log_error=_log_error,
)
logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
self.search_libgen = None
self.download_from_mirror = None
def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get all available download options for a book.
Checks in priority order:
1. Archive.org direct download (public domain)
2. Archive.org borrowing (if credentials available and book is borrowable)
3. Libgen fallback (by ISBN)
Args:
book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
Returns:
Dict with available download methods and metadata
"""
options = {
'book_title': book_data.get('title', 'Unknown'),
'book_author': book_data.get('author', 'Unknown'),
'isbn': book_data.get('isbn', ''),
'openlibrary_id': book_data.get('openlibrary_id', ''),
'methods': [], # Will be sorted by priority
'metadata': {}
}
# Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
ol_id = book_data.get('openlibrary_id', '')
book_id = None
if ol_id.startswith('OL') and len(ol_id) > 2:
# Remove 'OL' prefix (keep everything after it including the suffix letter)
# The book_id is all digits after 'OL'
book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
# PRIORITY 1: Check direct download (fastest, no auth needed)
if self.check_direct_download:
try:
can_download, pdf_url = self.check_direct_download(book_id)
if can_download:
options['methods'].append({
'type': 'archive.org_direct',
'label': 'Archive.org Direct Download',
'requires_auth': False,
'pdf_url': pdf_url,
'book_id': book_id,
'priority': 1 # Highest priority
})
logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
# PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
# First verify the book is actually lendable via OpenLibrary API
if self._has_archive_credentials():
is_lendable, status = self._check_book_lendable_status(ol_id)
if is_lendable:
options['methods'].append({
'type': 'archive.org_borrow',
'label': 'Archive.org Borrow',
'requires_auth': True,
'book_id': book_id,
'priority': 2 # Second priority
})
logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
else:
logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
# PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
isbn = book_data.get('isbn', '')
title = book_data.get('title', '')
author = book_data.get('author', '')
if self.search_libgen:
# Can use Libgen if we have ISBN OR title (or both)
if isbn or title:
options['methods'].append({
'type': 'libgen',
'label': 'Libgen Search & Download',
'requires_auth': False,
'isbn': isbn,
'title': title,
'author': author,
'priority': 3 # Third priority (fallback)
})
logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
# Sort by priority (higher priority first)
options['methods'].sort(key=lambda x: x.get('priority', 999))
return options
def _has_archive_credentials(self) -> bool:
"""Check if Archive.org credentials are available."""
try:
from API.archive_client import credential_openlibrary
email, password = credential_openlibrary(self.config)
return bool(email and password)
except Exception:
return False
def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
"""Check if a book is lendable via OpenLibrary API.
Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
Args:
ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
Returns:
Tuple of (is_lendable: bool, status_reason: Optional[str])
"""
try:
if not ol_id.startswith('OL'):
return False, "Invalid OpenLibrary ID format"
# If this is a Work ID (ends with W), we can't query Volumes API
# Work IDs are abstract umbrella records, not specific editions
if ol_id.endswith('W'):
logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
return False, "Work ID not supported by Volumes API (not a specific edition)"
# If it ends with M, it's an Edition ID - proceed with query
if not ol_id.endswith('M'):
logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
return False, "Invalid OpenLibrary ID type"
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
response = self.session.get(url, timeout=10)
response.raise_for_status()
data = response.json()
# Empty response means no records found
if not data:
logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
return False, "No availability data found"
# The response is wrapped in OLID key
olid_key = f"OLID:{ol_id}"
if olid_key not in data:
logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
return False, "No availability data found"
olid_data = data[olid_key]
# Check items array for lendable status
if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
items = olid_data['items']
# Check the first item for lending status
first_item = items[0]
# Handle both dict and string representations (PowerShell converts to string)
if isinstance(first_item, dict):
status = first_item.get('status', '')
else:
# String representation - check if 'lendable' is in it
status = str(first_item).lower()
is_lendable = 'lendable' in str(status).lower()
if is_lendable:
logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
return True, "LENDABLE"
else:
status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
return False, status_str
else:
# No items array or empty
logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
return False, "Not available for lending"
except requests.exceptions.Timeout:
logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
return False, "API timeout"
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
return False, f"API error"
async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
"""Download a book using the specified method.
Args:
method: Download method dict from get_download_options()
output_dir: Directory to save the book
Returns:
Tuple of (success: bool, message: str)
"""
output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
method_type = method.get('type', '')
logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
try:
if method_type == 'archive.org_direct':
return await self._download_archive_direct(method, output_dir)
elif method_type == 'archive.org_borrow':
return await self._download_archive_borrow(method, output_dir)
elif method_type == 'libgen':
return await self._download_libgen(method, output_dir)
else:
return False, f"Unknown download method: {method_type}"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
return False, f"Download failed: {str(e)}"
async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download directly from Archive.org."""
try:
pdf_url = method.get('pdf_url', '')
book_id = method.get('book_id', '')
if not pdf_url:
return False, "No PDF URL available"
# Determine output filename
filename = f"{book_id}.pdf"
output_path = Path(output_dir) / filename
logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
# Download in a thread to avoid blocking
loop = asyncio.get_event_loop()
success = await loop.run_in_executor(
None,
self._download_file,
pdf_url,
str(output_path)
)
if success:
logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
return True, f"Downloaded to: {output_path}"
else:
return False, "Failed to download PDF"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
return False, f"Archive download failed: {str(e)}"
async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download via Archive.org borrowing (requires credentials).
Process (follows archive_client.py pattern):
1. Login to Archive.org with credentials
2. Call loan endpoint to borrow the book (14-day loan)
3. Get book info (page links, metadata)
4. Download all pages as images
5. Merge images into PDF
The loan function from archive_client.py handles:
- Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
- Creating borrow token for access
- Handling borrow failures
get_book_infos() extracts page links from the borrowed book viewer
download() downloads all pages using thread pool
img2pdf merges pages into searchable PDF
"""
try:
from API.archive_client import credential_openlibrary
book_id = method.get('book_id', '')
# Get credentials
email, password = credential_openlibrary(self.config)
if not email or not password:
return False, "Archive.org credentials not configured"
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
# Login and borrow (in thread, following download_book.py pattern)
loop = asyncio.get_event_loop()
borrow_result = await loop.run_in_executor(
None,
self._archive_borrow_and_download,
email,
password,
book_id,
output_dir
)
if borrow_result and isinstance(borrow_result, tuple):
success, filepath = borrow_result
if success:
logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
return True, filepath
else:
logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
return False, filepath
else:
return False, "Failed to borrow book from Archive.org"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
return False, f"Archive borrow failed: {str(e)}"
async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download via Libgen search and download with mirror fallback."""
try:
isbn = method.get('isbn', '')
title = method.get('title', '')
if not isbn and not title:
return False, "Need ISBN or title for Libgen search"
if not self.search_libgen:
return False, "Libgen searcher not available"
# Define wrapper functions to safely call the methods
search_func = self.search_libgen
if search_func is None:
return False, "Search function not available"
preloaded_results = method.get('results')
loop = asyncio.get_event_loop()
if preloaded_results:
results = list(preloaded_results)
if not results:
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
else:
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
if not results:
logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
return False, f"No Libgen results found for: {isbn or title}"
logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
# Determine output filename (use first result for naming)
first_result = results[0]
filename = f"{first_result.get('title', 'book')}"
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
# Try each result's mirror until one succeeds
for idx, result in enumerate(results, 1):
mirror_url = result.get('mirror_url', '')
if not mirror_url:
logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
continue
# Use extension from this result if available
extension = result.get('extension', 'pdf')
if extension and not extension.startswith('.'):
extension = f".{extension}"
elif not extension:
extension = '.pdf'
output_path = Path(output_dir) / (filename + extension)
logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
download_func = self.download_from_mirror
if download_func is None:
return False, "Download function not available"
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
def download_wrapper():
return download_callable(mirror_url, str(output_path))
# Download (in thread)
try:
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
if success:
dest_path = Path(downloaded_path) if downloaded_path else output_path
# Validate downloaded file is not HTML (common Libgen issue)
if dest_path.exists():
try:
with open(dest_path, 'rb') as f:
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
if '<!doctype' in file_start or '<html' in file_start:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
dest_path.unlink() # Delete the HTML file
continue
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
return True, str(dest_path)
else:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
continue
return False, f"All {len(results)} mirrors failed"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
return False, f"Libgen download failed: {str(e)}"
async def download_libgen_selection(
self,
selected: Dict[str, Any],
remaining: Optional[List[Dict[str, Any]]] = None,
output_dir: Optional[str] = None,
) -> Tuple[bool, str]:
"""Download a specific Libgen result with optional fallbacks."""
if not isinstance(selected, dict):
return False, "Selected result must be a dictionary"
ordered_results: List[Dict[str, Any]] = [selected]
if remaining:
for item in remaining:
if isinstance(item, dict) and item is not selected:
ordered_results.append(item)
method: Dict[str, Any] = {
'type': 'libgen',
'isbn': selected.get('isbn', '') or '',
'title': selected.get('title', '') or '',
'author': selected.get('author', '') or '',
'results': ordered_results,
}
return await self.download_book(method, output_dir)
def download_libgen_selection_sync(
self,
selected: Dict[str, Any],
remaining: Optional[List[Dict[str, Any]]] = None,
output_dir: Optional[str] = None,
) -> Tuple[bool, str]:
"""Synchronous helper for downloading a Libgen selection."""
async def _run() -> Tuple[bool, str]:
return await self.download_libgen_selection(selected, remaining, output_dir)
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return loop.run_until_complete(_run())
finally:
loop.close()
asyncio.set_event_loop(None)
def _download_file(self, url: str, output_path: str) -> bool:
"""Download a file from URL."""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
logger.error(f"[UnifiedBookDownloader] File download error: {e}")
return False
def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
"""Borrow a book from Archive.org and download pages as PDF.
This follows the exact process from archive_client.py:
1. Login with credentials
2. Call loan() to create 14-day borrow
3. Get book info (extract page url)
4. Download all pages as images
5. Merge images into searchable PDF
Returns tuple of (success: bool, filepath/message: str)
"""
try:
from API.archive_client import login, loan, get_book_infos, download
import tempfile
import shutil
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
session = login(email, password)
logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
# Call loan to create the 14-day borrow
session = loan(session, book_id, verbose=True)
# If we get here, borrowing succeeded
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
# Now get the book info (page url and metadata)
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
# Try both URL formats: with /borrow and without
book_url = [
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
f"https://archive.org/details/{book_id}" # Fallback to details page
]
title = None
links = None
metadata = None
last_error = None
for book_url in book_url:
try:
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
response = session.get(book_url, timeout=10)
# Log response status
if response.status_code != 200:
logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
# Continue to try next URL
continue
# Try to parse the response
title, links, metadata = get_book_infos(session, book_url)
logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
break
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
last_error = e
continue
if links is None:
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
# Borrow extraction failed - return False
return False, "Could not extract borrowed book pages"
# Create temporary directory for images
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
try:
# Download all pages (uses thread pool)
images = download(
session=session,
n_threads=10,
directory=temp_dir,
links=links,
scale=3, # Default resolution
book_id=book_id
)
logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
# Try to merge pages into PDF
try:
import img2pdf
logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
# Prepare PDF metadata
pdfmeta = {}
if metadata:
if "title" in metadata:
pdfmeta["title"] = metadata["title"]
if "creator" in metadata:
pdfmeta["author"] = metadata["creator"]
pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
pdfmeta["creationdate"] = None # Avoid timezone issues
# Convert images to PDF
pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
if not pdf_content:
logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
return False, "Failed to convert pages to PDF"
# Save the PDF
pdf_filename = f"{title}.pdf" if title else "book.pdf"
pdf_path = Path(output_dir) / pdf_filename
# Handle duplicate filenames
i = 1
while pdf_path.exists():
pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
i += 1
with open(pdf_path, 'wb') as f:
f.write(pdf_content)
logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
return True, str(pdf_path)
except ImportError:
logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
# Create JPG collection directory
if not title:
title = f"book_{book_id}"
jpg_dir = Path(output_dir) / title
i = 1
while jpg_dir.exists():
jpg_dir = Path(output_dir) / f"{title}({i})"
i += 1
# Move temporary directory to final location
shutil.move(temp_dir, str(jpg_dir))
temp_dir = None # Mark as already moved
logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
return True, str(jpg_dir)
finally:
# Clean up temporary directory if it still exists
if temp_dir and Path(temp_dir).exists():
shutil.rmtree(temp_dir)
except SystemExit:
# loan() function calls sys.exit on failure - catch it
logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
return False, "Book could not be borrowed (may not be available for borrowing)"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
return False, f"Borrow failed: {str(e)}"
def close(self) -> None:
"""Close the session."""
self.session.close()

94
Provider/youtube.py Normal file
View File

@@ -0,0 +1,94 @@
from __future__ import annotations
import json
import shutil
import subprocess
import sys
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from SYS.logger import log
class YouTube(SearchProvider):
"""Search provider for YouTube using yt-dlp."""
def search(
self,
query: str,
limit: int = 10,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
ytdlp_path = shutil.which("yt-dlp")
if not ytdlp_path:
log("[youtube] yt-dlp not found in PATH", file=sys.stderr)
return []
search_query = f"ytsearch{limit}:{query}"
cmd = [ytdlp_path, "--dump-json", "--flat-playlist", "--no-warnings", search_query]
try:
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
)
if process.returncode != 0:
log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr)
return []
results: List[SearchResult] = []
for line in process.stdout.splitlines():
if not line.strip():
continue
try:
video_data = json.loads(line)
except json.JSONDecodeError:
continue
title = video_data.get("title", "Unknown")
video_id = video_data.get("id", "")
url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}"
uploader = video_data.get("uploader", "Unknown")
duration = video_data.get("duration", 0)
view_count = video_data.get("view_count", 0)
duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else ""
views_str = f"{view_count:,}" if view_count else ""
results.append(
SearchResult(
table="youtube",
title=title,
path=url,
detail=f"By: {uploader}",
annotations=[duration_str, f"{views_str} views"],
media_kind="video",
columns=[
("Title", title),
("Uploader", uploader),
("Duration", duration_str),
("Views", views_str),
],
full_metadata={
"video_id": video_id,
"uploader": uploader,
"duration": duration,
"view_count": view_count,
},
)
)
return results
except Exception as exc:
log(f"[youtube] Error: {exc}", file=sys.stderr)
return []
def validate(self) -> bool:
return shutil.which("yt-dlp") is not None

36
Provider/zeroxzero.py Normal file
View File

@@ -0,0 +1,36 @@
from __future__ import annotations
import os
import sys
from typing import Any
from Provider._base import FileProvider
from SYS.logger import log
class ZeroXZero(FileProvider):
"""File provider for 0x0.st."""
def upload(self, file_path: str, **kwargs: Any) -> str:
from API.HTTP import HTTPClient
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
try:
headers = {"User-Agent": "Medeia-Macina/1.0"}
with HTTPClient(headers=headers) as client:
with open(file_path, "rb") as handle:
response = client.post("https://0x0.st", files={"file": handle})
if response.status_code == 200:
return response.text.strip()
raise Exception(f"Upload failed: {response.status_code} - {response.text}")
except Exception as exc:
log(f"[0x0] Upload error: {exc}", file=sys.stderr)
raise
def validate(self) -> bool:
return True