d

2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions
--- a/helper/file_storage.py
+++ b/helper/file_storage.py
@@ -1397,6 +1397,10 @@ class FileStorage:
                            log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
                        except Exception as e:
                            log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
+    
+    def list_backends(self) -> list[str]:
+        """Return available backend keys for autocomplete and validation."""
+        return sorted(self._backends.keys())

    def __getitem__(self, backend_name: str) -> StorageBackend:
        """Get a storage backend by name.
--- a/helper/libgen_service.py
+++ b/helper/libgen_service.py
@@ -9,8 +9,8 @@ import logging
 import re
 import requests
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional
-from urllib.parse import quote, urljoin
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from urllib.parse import quote, urljoin, urlparse, unquote

 # Optional dependencies
 try:
@@ -405,6 +405,61 @@ def _resolve_download_url(
    return None


+def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
+    """Guess the file extension from headers or the download URL."""
+    content_disposition = headers.get("content-disposition", "")
+    if content_disposition:
+        match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
+        if match:
+            filename = unquote(match.group(1).strip('"'))
+            suffix = Path(filename).suffix
+            if suffix:
+                return suffix.lstrip('.')
+
+    parsed = urlparse(download_url)
+    suffix = Path(parsed.path).suffix
+    if suffix:
+        return suffix.lstrip('.')
+
+    content_type = headers.get('content-type', '').lower()
+    mime_map = {
+        'application/pdf': 'pdf',
+        'application/epub+zip': 'epub',
+        'application/x-mobipocket-ebook': 'mobi',
+        'application/x-cbr': 'cbr',
+        'application/x-cbz': 'cbz',
+        'application/zip': 'zip',
+    }
+
+    for mime, ext in mime_map.items():
+        if mime in content_type:
+            return ext
+
+    return None
+
+
+def _apply_extension(path: Path, extension: Optional[str]) -> Path:
+    """Rename the path to match the detected extension, if needed."""
+    if not extension:
+        return path
+
+    suffix = extension if extension.startswith('.') else f'.{extension}'
+    if path.suffix.lower() == suffix.lower():
+        return path
+
+    candidate = path.with_suffix(suffix)
+    base_stem = path.stem
+    counter = 1
+    while candidate.exists() and counter < 100:
+        candidate = path.with_name(f"{base_stem}({counter}){suffix}")
+        counter += 1
+
+    try:
+        path.replace(candidate)
+        return candidate
+    except Exception:
+        return path
+
 def download_from_mirror(
    mirror_url: str,
    output_path: Path,
@@ -412,8 +467,9 @@ def download_from_mirror(
    log_info: LogFn = None,
    log_error: ErrorFn = None,
    session: Optional[requests.Session] = None,
-) -> bool:
-    """Download file from a LibGen mirror URL."""
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+) -> Tuple[bool, Optional[Path]]:
+    """Download file from a LibGen mirror URL with optional progress tracking."""
    session = session or requests.Session()
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -425,33 +481,43 @@ def download_from_mirror(
        
        if not download_url:
            _call(log_error, "[download] Could not find direct download link")
-            return False
+            return False, None
            
        _call(log_info, f"[download] Downloading from: {download_url}")
        
-        # Download the actual file
+        downloaded = 0
+        total_size = 0
+        headers: Dict[str, str] = {}
+        
        with session.get(download_url, stream=True, timeout=60) as r:
            r.raise_for_status()
+            headers = dict(r.headers)
            
            # Verify it's not HTML (error page)
-            ct = r.headers.get("content-type", "").lower()
+            ct = headers.get("content-type", "").lower()
            if "text/html" in ct:
                _call(log_error, "[download] Final URL returned HTML, not a file.")
-                return False
+                return False, None

-            total_size = int(r.headers.get("content-length", 0))
-            downloaded = 0
+            total_size = int(headers.get("content-length", 0) or 0)
            
            with open(output_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        downloaded += len(chunk)
-                        # Optional: progress logging
-                        
-        _call(log_info, f"[download] Saved to {output_path}")
-        return True
+                        if progress_callback:
+                            progress_callback(downloaded, total_size)
+
+        final_extension = _guess_filename_extension(download_url, headers)
+        final_path = _apply_extension(output_path, final_extension)
+
+        if progress_callback and total_size > 0:
+            progress_callback(downloaded, total_size)
+
+        _call(log_info, f"[download] Saved to {final_path}")
+        return True, final_path
        
    except Exception as e:
        _call(log_error, f"[download] Download failed: {e}")
-        return False
+        return False, None
--- a/helper/logger.py
+++ b/helper/logger.py
@@ -38,6 +38,9 @@ def log(*args, **kwargs) -> None:
    Example:
        log("Upload started")  # Output: [add_file.run] Upload started
    """
+    # When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
+    add_prefix = _DEBUG_ENABLED
+
    # Get the calling frame
    frame = inspect.currentframe()
    if frame is None:
@@ -60,11 +63,11 @@ def log(*args, **kwargs) -> None:
        if 'file' not in kwargs:
            kwargs['file'] = sys.stdout
        
-        # Build prefix
-        prefix = f"[{file_name}.{func_name}]"
-        
-        # Print with prefix
-        print(prefix, *args, **kwargs)
+        if add_prefix:
+            prefix = f"[{file_name}.{func_name}]"
+            print(prefix, *args, **kwargs)
+        else:
+            print(*args, **kwargs)
    finally:
        del frame
        del caller_frame
--- a/helper/metadata_search.py
+++ b/helper/metadata_search.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Type
+import requests
+import sys
+
+from helper.logger import log, debug
+
+
+class MetadataProvider(ABC):
+    """Base class for metadata providers (music, movies, books, etc.)."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
+        self.config = config or {}
+
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__.replace("Provider", "").lower()
+
+    @abstractmethod
+    def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+        """Return a list of candidate metadata records."""
+
+    def to_tags(self, item: Dict[str, Any]) -> List[str]:
+        """Convert a result item into a list of tags."""
+        tags: List[str] = []
+        title = item.get("title")
+        artist = item.get("artist")
+        album = item.get("album")
+        year = item.get("year")
+
+        if title:
+            tags.append(f"title:{title}")
+        if artist:
+            tags.append(f"artist:{artist}")
+        if album:
+            tags.append(f"album:{album}")
+        if year:
+            tags.append(f"year:{year}")
+
+        tags.append(f"source:{self.name}")
+        return tags
+
+
+class ITunesProvider(MetadataProvider):
+    """Metadata provider using the iTunes Search API."""
+
+    def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+        params = {"term": query, "media": "music", "entity": "song", "limit": limit}
+        try:
+            resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
+            resp.raise_for_status()
+            results = resp.json().get("results", [])
+        except Exception as exc:
+            log(f"iTunes search failed: {exc}", file=sys.stderr)
+            return []
+
+        items: List[Dict[str, Any]] = []
+        for r in results:
+            item = {
+                "title": r.get("trackName"),
+                "artist": r.get("artistName"),
+                "album": r.get("collectionName"),
+                "year": str(r.get("releaseDate", ""))[:4],
+                "provider": self.name,
+                "raw": r,
+            }
+            items.append(item)
+        debug(f"iTunes returned {len(items)} items for '{query}'")
+        return items
+
+
+# Registry ---------------------------------------------------------------
+
+_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
+    "itunes": ITunesProvider,
+}
+
+
+def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
+    _METADATA_PROVIDERS[name.lower()] = provider_cls
+
+
+def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
+    availability: Dict[str, bool] = {}
+    for name, cls in _METADATA_PROVIDERS.items():
+        try:
+            provider = cls(config)
+            # Basic availability check: perform lightweight validation if defined
+            availability[name] = True
+        except Exception:
+            availability[name] = False
+    return availability
+
+
+def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
+    cls = _METADATA_PROVIDERS.get(name.lower())
+    if not cls:
+        return None
+    try:
+        return cls(config)
+    except Exception as exc:
+        log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
+        return None
--- a/helper/search_provider.py
+++ b/helper/search_provider.py
@@ -73,7 +73,12 @@ class SearchResult:
            self.columns = []
    
    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary for JSON serialization."""
+        """Convert to dictionary for JSON serialization.
+        
+        Note: full_metadata is excluded from dict to keep response size small
+        until the result is actually selected/used. This speeds up initial
+        search result display and piping.
+        """
        data = {
            "origin": self.origin,
            "title": self.title,
@@ -83,10 +88,10 @@ class SearchResult:
            "media_kind": self.media_kind,
            "size_bytes": self.size_bytes,
            "tags": list(self.tags) if self.tags else [],
-            "full_metadata": self.full_metadata,
        }
        if self.columns:
            data["columns"] = list(self.columns)
+        # Note: full_metadata is NOT included in dict to keep payload small
        return data


@@ -377,6 +382,7 @@ class LibGenProvider(SearchProvider):
                if isbn:
                    annotations.append(f"ISBN: {isbn}")
                
+                # Store full book data without mirrors in metadata to avoid serialization overhead
                search_results.append(SearchResult(
                    origin="libgen",
                    title=title,
@@ -391,7 +397,8 @@ class LibGenProvider(SearchProvider):
                        "year": year,
                        "isbn": isbn,
                        "filesize": filesize,
-                        "mirrors": book.get("mirrors", {}),
+                        # Exclude mirrors dict from metadata to reduce serialization overhead
+                        # Mirrors can be re-fetched if the result is selected
                        "book_id": book.get("book_id", ""),
                        "md5": book.get("md5", ""),
                    },
--- a/helper/unified_book_downloader.py
+++ b/helper/unified_book_downloader.py
@@ -450,30 +450,31 @@ class UnifiedBookDownloader:
                if download_func is None:
                    return False, "Download function not available"
                
-                download_callable = cast(Callable[[str, str], bool], download_func)
+                download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)

                def download_wrapper():
                    return download_callable(mirror_url, str(output_path))
                
                # Download (in thread)
                try:
-                    success = await loop.run_in_executor(None, download_wrapper)
+                    success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
                    
                    if success:
+                        dest_path = Path(downloaded_path) if downloaded_path else output_path
                        # Validate downloaded file is not HTML (common Libgen issue)
-                        if output_path.exists():
+                        if dest_path.exists():
                            try:
-                                with open(output_path, 'rb') as f:
+                                with open(dest_path, 'rb') as f:
                                    file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
                                    if '<!doctype' in file_start or '<html' in file_start:
                                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
-                                        output_path.unlink()  # Delete the HTML file
+                                        dest_path.unlink()  # Delete the HTML file
                                        continue
                            except Exception as e:
                                logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
                        
-                        logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
-                        return True, str(output_path)
+                        logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
+                        return True, str(dest_path)
                    else:
                        logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
                except Exception as e: