"""Provider interfaces for search and file upload functionality. This module defines two distinct provider types: 1. SearchProvider: For searching content (books, music, videos, games) 2. FileProvider: For uploading files to hosting services No legacy code or backwards compatibility - clean, single source of truth. """ from __future__ import annotations from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional, Tuple from dataclasses import dataclass, field from pathlib import Path import sys import os import json import re import time import asyncio import subprocess import shutil import mimetypes import traceback import requests from helper.logger import log, debug # Optional dependencies try: from playwright.sync_api import sync_playwright PLAYWRIGHT_AVAILABLE = True except ImportError: PLAYWRIGHT_AVAILABLE = False # ============================================================================ # SEARCH PROVIDERS # ============================================================================ @dataclass class SearchResult: """Unified search result format across all search providers.""" origin: str # Provider name: "libgen", "soulseek", "debrid", "bandcamp", etc. title: str # Display title/filename path: str # Download target (URL, path, magnet, identifier) detail: str = "" # Additional description annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"] media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet" size_bytes: Optional[int] = None tags: set[str] = field(default_factory=set) # Searchable tags columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for pipeline processing.""" return { "origin": self.origin, "title": self.title, "path": self.path, "detail": self.detail, "annotations": self.annotations, "media_kind": self.media_kind, "size_bytes": self.size_bytes, "tags": list(self.tags), "columns": list(self.columns), "full_metadata": self.full_metadata, } class SearchProvider(ABC): """Base class for search providers.""" def __init__(self, config: Dict[str, Any] = None): self.config = config or {} self.name = self.__class__.__name__.lower() @abstractmethod def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs ) -> List[SearchResult]: """Search for items matching the query. Args: query: Search query string limit: Maximum results to return filters: Optional filtering criteria **kwargs: Provider-specific arguments Returns: List of SearchResult objects """ pass def validate(self) -> bool: """Check if provider is available and properly configured.""" return True class Libgen(SearchProvider): """Search provider for Library Genesis books.""" def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs ) -> List[SearchResult]: filters = filters or {} try: from helper.unified_book_downloader import UnifiedBookDownloader from helper.query_parser import parse_query, get_field, get_free_text parsed = parse_query(query) isbn = get_field(parsed, 'isbn') author = get_field(parsed, 'author') title = get_field(parsed, 'title') free_text = get_free_text(parsed) search_query = isbn or title or author or free_text or query downloader = UnifiedBookDownloader(config=self.config) books = downloader.search_libgen(search_query, limit=limit) results = [] for idx, book in enumerate(books, 1): title = book.get("title", "Unknown") author = book.get("author", "Unknown") year = book.get("year", "Unknown") pages = book.get("pages") or book.get("pages_str") or "" extension = book.get("extension", "") or book.get("ext", "") filesize = book.get("filesize_str", "Unknown") isbn = book.get("isbn", "") mirror_url = book.get("mirror_url", "") columns = [ ("Title", title), ("Author", author), ("Pages", str(pages)), ("Ext", str(extension)), ] detail = f"By: {author}" if year and year != "Unknown": detail += f" ({year})" annotations = [f"{filesize}"] if isbn: annotations.append(f"ISBN: {isbn}") results.append(SearchResult( origin="libgen", title=title, path=mirror_url or f"libgen:{book.get('id', '')}", detail=detail, annotations=annotations, media_kind="book", columns=columns, full_metadata={ "number": idx, "author": author, "year": year, "isbn": isbn, "filesize": filesize, "pages": pages, "extension": extension, "book_id": book.get("book_id", ""), "md5": book.get("md5", ""), }, )) return results except Exception as e: log(f"[libgen] Search error: {e}", file=sys.stderr) return [] def validate(self) -> bool: try: from helper.unified_book_downloader import UnifiedBookDownloader return True except Exception: return False class Soulseek(SearchProvider): """Search provider for Soulseek P2P network.""" MUSIC_EXTENSIONS = { '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' } USERNAME = "asjhkjljhkjfdsd334" PASSWORD = "khhhg" DOWNLOAD_DIR = "./downloads" MAX_WAIT_TRANSFER = 1200 async def perform_search( self, query: str, timeout: float = 9.0, limit: int = 50 ) -> List[Dict[str, Any]]: """Perform async Soulseek search.""" import os from aioslsk.client import SoulSeekClient from aioslsk.settings import Settings, CredentialsSettings os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) client = SoulSeekClient(settings) try: await client.start() await client.login() except Exception as e: log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) return [] try: search_request = await client.searches.search(query) await self._collect_results(client, search_request, timeout=timeout) return self._flatten_results(search_request)[:limit] except Exception as e: log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) return [] finally: try: await client.stop() except Exception: pass def _flatten_results(self, search_request) -> List[dict]: flat = [] for result in search_request.results: username = getattr(result, "username", "?") for file_data in getattr(result, "shared_items", []): flat.append({ "file": file_data, "username": username, "filename": getattr(file_data, "filename", "?"), "size": getattr(file_data, "filesize", 0), }) for file_data in getattr(result, "locked_results", []): flat.append({ "file": file_data, "username": username, "filename": getattr(file_data, "filename", "?"), "size": getattr(file_data, "filesize", 0), }) return flat async def _collect_results(self, client, search_request, timeout: float = 75.0) -> None: end = time.time() + timeout last_count = 0 while time.time() < end: current_count = len(search_request.results) if current_count > last_count: debug(f"[soulseek] Got {current_count} result(s)...") last_count = current_count await asyncio.sleep(0.5) def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs ) -> List[SearchResult]: filters = filters or {} try: flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) if not flat_results: return [] # Filter to music files only music_results = [] for item in flat_results: filename = item['filename'] ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' if ext in self.MUSIC_EXTENSIONS: music_results.append(item) if not music_results: return [] # Extract metadata enriched_results = [] for item in music_results: filename = item['filename'] ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' # Get display filename display_name = filename.split('\\')[-1] if '\\' in filename else filename.split('/')[-1] if '/' in filename else filename # Extract path hierarchy path_parts = filename.replace('\\', '/').split('/') artist = path_parts[-3] if len(path_parts) >= 3 else '' album = path_parts[-2] if len(path_parts) >= 3 else path_parts[-2] if len(path_parts) == 2 else '' # Extract track number and title base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name track_num = '' title = base_name filename_artist = '' match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) if match: track_num = match.group(1) rest = match.group(2) if ' - ' in rest: filename_artist, title = rest.split(' - ', 1) else: title = rest if filename_artist: artist = filename_artist enriched_results.append({ **item, 'artist': artist, 'album': album, 'title': title, 'track_num': track_num, 'ext': ext }) # Apply filters if filters: artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' album_filter = filters.get('album', '').lower() if filters.get('album') else '' track_filter = filters.get('track', '').lower() if filters.get('track') else '' if artist_filter or album_filter or track_filter: filtered = [] for item in enriched_results: if artist_filter and artist_filter not in item['artist'].lower(): continue if album_filter and album_filter not in item['album'].lower(): continue if track_filter and track_filter not in item['title'].lower(): continue filtered.append(item) enriched_results = filtered # Sort: .flac first, then by size enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) # Convert to SearchResult results = [] for idx, item in enumerate(enriched_results, 1): artist_display = item['artist'] if item['artist'] else "(no artist)" album_display = item['album'] if item['album'] else "(no album)" size_mb = int(item['size'] / 1024 / 1024) columns = [ ("Track", item['track_num'] or "?"), ("Title", item['title'][:40]), ("Artist", artist_display[:32]), ("Album", album_display[:32]), ("Size", f"{size_mb} MB"), ] results.append(SearchResult( origin="soulseek", title=item['title'], path=item['filename'], detail=f"{artist_display} - {album_display}", annotations=[f"{size_mb} MB", item['ext'].lstrip('.').upper()], media_kind="audio", size_bytes=item['size'], columns=columns, full_metadata={ "username": item['username'], "filename": item['filename'], "artist": item['artist'], "album": item['album'], "track_num": item['track_num'], "ext": item['ext'], }, )) return results except Exception as e: log(f"[soulseek] Search error: {e}", file=sys.stderr) return [] def validate(self) -> bool: try: from aioslsk.client import SoulSeekClient return True except ImportError: return False class Bandcamp(SearchProvider): """Search provider for Bandcamp.""" def search( self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs ) -> List[SearchResult]: if not PLAYWRIGHT_AVAILABLE: log("[bandcamp] Playwright not available. Install with: pip install playwright", file=sys.stderr) return [] results = [] try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() # Parse query for artist: prefix if query.strip().lower().startswith("artist:"): artist_name = query[7:].strip().strip('"') search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b" else: search_url = f"https://bandcamp.com/search?q={query}&item_type=a" results = self._scrape_url(page, search_url, limit) browser.close() except Exception as e: log(f"[bandcamp] Search error: {e}", file=sys.stderr) return [] return results def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: debug(f"[bandcamp] Scraping: {url}") page.goto(url) page.wait_for_load_state("domcontentloaded") results = [] # Check for search results search_results = page.query_selector_all(".searchresult") if search_results: for item in search_results[:limit]: try: heading = item.query_selector(".heading") if not heading: continue link = heading.query_selector("a") if not link: continue title = link.inner_text().strip() target_url = link.get_attribute("href") subhead = item.query_selector(".subhead") artist = subhead.inner_text().strip() if subhead else "Unknown" itemtype = item.query_selector(".itemtype") media_type = itemtype.inner_text().strip() if itemtype else "album" results.append(SearchResult( origin="bandcamp", title=title, path=target_url, detail=f"By: {artist}", annotations=[media_type], media_kind="audio", columns=[ ("Name", title), ("Artist", artist), ("Type", media_type), ], full_metadata={ "artist": artist, "type": media_type, }, )) except Exception as e: debug(f"[bandcamp] Error parsing result: {e}") continue return results def validate(self) -> bool: return PLAYWRIGHT_AVAILABLE class YouTube(SearchProvider): """Search provider for YouTube using yt-dlp.""" def search( self, query: str, limit: int = 10, filters: Optional[Dict[str, Any]] = None, **kwargs ) -> List[SearchResult]: ytdlp_path = shutil.which("yt-dlp") if not ytdlp_path: log("[youtube] yt-dlp not found in PATH", file=sys.stderr) return [] search_query = f"ytsearch{limit}:{query}" cmd = [ ytdlp_path, "--dump-json", "--flat-playlist", "--no-warnings", search_query ] try: process = subprocess.run( cmd, capture_output=True, text=True, encoding="utf-8", errors="replace" ) if process.returncode != 0: log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr) return [] results = [] for line in process.stdout.splitlines(): if not line.strip(): continue try: video_data = json.loads(line) title = video_data.get("title", "Unknown") video_id = video_data.get("id", "") url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}" uploader = video_data.get("uploader", "Unknown") duration = video_data.get("duration", 0) view_count = video_data.get("view_count", 0) duration_str = f"{int(duration//60)}:{int(duration%60):02d}" if duration else "" views_str = f"{view_count:,}" if view_count else "" results.append(SearchResult( origin="youtube", title=title, path=url, detail=f"By: {uploader}", annotations=[duration_str, f"{views_str} views"], media_kind="video", columns=[ ("Title", title), ("Uploader", uploader), ("Duration", duration_str), ("Views", views_str), ], full_metadata={ "video_id": video_id, "uploader": uploader, "duration": duration, "view_count": view_count, }, )) except json.JSONDecodeError: continue return results except Exception as e: log(f"[youtube] Error: {e}", file=sys.stderr) return [] def validate(self) -> bool: return shutil.which("yt-dlp") is not None def pipe(self, path: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: """Return the playable URL for MPV (just the path for YouTube).""" return path # Search provider registry _SEARCH_PROVIDERS = { "libgen": Libgen, "soulseek": Soulseek, "bandcamp": Bandcamp, "youtube": YouTube, } def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]: """Get a search provider by name.""" provider_class = _SEARCH_PROVIDERS.get(name.lower()) if provider_class is None: log(f"[provider] Unknown search provider: {name}", file=sys.stderr) return None try: provider = provider_class(config) if not provider.validate(): log(f"[provider] Provider '{name}' is not available", file=sys.stderr) return None return provider except Exception as e: log(f"[provider] Error initializing '{name}': {e}", file=sys.stderr) return None def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: """List all search providers and their availability.""" availability = {} for name, provider_class in _SEARCH_PROVIDERS.items(): try: provider = provider_class(config) availability[name] = provider.validate() except Exception: availability[name] = False return availability # ============================================================================ # FILE PROVIDERS # ============================================================================ class FileProvider(ABC): """Base class for file upload providers.""" def __init__(self, config: Optional[Dict[str, Any]] = None): self.config = config or {} self.name = self.__class__.__name__.lower() @abstractmethod def upload(self, file_path: str, **kwargs: Any) -> str: """Upload a file and return the URL.""" pass def validate(self) -> bool: """Check if provider is available/configured.""" return True class ZeroXZero(FileProvider): """File provider for 0x0.st.""" def upload(self, file_path: str, **kwargs: Any) -> str: from helper.http_client import HTTPClient if not os.path.exists(file_path): raise FileNotFoundError(f"File not found: {file_path}") try: headers = {"User-Agent": "Medeia-Macina/1.0"} with HTTPClient(headers=headers) as client: with open(file_path, 'rb') as f: response = client.post( "https://0x0.st", files={"file": f} ) if response.status_code == 200: return response.text.strip() else: raise Exception(f"Upload failed: {response.status_code} - {response.text}") except Exception as e: log(f"[0x0] Upload error: {e}", file=sys.stderr) raise def validate(self) -> bool: return True class Matrix(FileProvider): """File provider for Matrix (Element) chat rooms.""" def validate(self) -> bool: if not self.config: return False matrix_conf = self.config.get('storage', {}).get('matrix', {}) return bool( matrix_conf.get('homeserver') and matrix_conf.get('room_id') and (matrix_conf.get('access_token') or matrix_conf.get('password')) ) def upload(self, file_path: str, **kwargs: Any) -> str: from pathlib import Path path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"File not found: {file_path}") matrix_conf = self.config.get('storage', {}).get('matrix', {}) homeserver = matrix_conf.get('homeserver') access_token = matrix_conf.get('access_token') room_id = matrix_conf.get('room_id') if not homeserver.startswith('http'): homeserver = f"https://{homeserver}" # Upload media upload_url = f"{homeserver}/_matrix/media/v3/upload" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/octet-stream" } mime_type, _ = mimetypes.guess_type(path) if mime_type: headers["Content-Type"] = mime_type filename = path.name with open(path, 'rb') as f: resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) if resp.status_code != 200: raise Exception(f"Matrix upload failed: {resp.text}") content_uri = resp.json().get('content_uri') if not content_uri: raise Exception("No content_uri returned") # Send message send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" # Determine message type msgtype = "m.file" ext = path.suffix.lower() AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} if ext in AUDIO_EXTS: msgtype = "m.audio" elif ext in VIDEO_EXTS: msgtype = "m.video" elif ext in IMAGE_EXTS: msgtype = "m.image" info = { "mimetype": mime_type, "size": path.stat().st_size } payload = { "msgtype": msgtype, "body": filename, "url": content_uri, "info": info } resp = requests.post(send_url, headers=headers, json=payload) if resp.status_code != 200: raise Exception(f"Matrix send message failed: {resp.text}") event_id = resp.json().get('event_id') return f"https://matrix.to/#/{room_id}/{event_id}" # File provider registry _FILE_PROVIDERS = { "0x0": ZeroXZero, "matrix": Matrix, } def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: """Get a file provider by name.""" provider_class = _FILE_PROVIDERS.get(name.lower()) if provider_class is None: log(f"[provider] Unknown file provider: {name}", file=sys.stderr) return None try: provider = provider_class(config) if not provider.validate(): log(f"[provider] File provider '{name}' is not available", file=sys.stderr) return None return provider except Exception as e: log(f"[provider] Error initializing file provider '{name}': {e}", file=sys.stderr) return None def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: """List all file providers and their availability.""" availability = {} for name, provider_class in _FILE_PROVIDERS.items(): try: provider = provider_class(config) availability[name] = provider.validate() except Exception: availability[name] = False return availability