This commit is contained in:
nose
2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions

View File

@@ -1397,6 +1397,10 @@ class FileStorage:
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
except Exception as e:
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
def list_backends(self) -> list[str]:
"""Return available backend keys for autocomplete and validation."""
return sorted(self._backends.keys())
def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name.

View File

@@ -9,8 +9,8 @@ import logging
import re
import requests
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from urllib.parse import quote, urljoin
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin, urlparse, unquote
# Optional dependencies
try:
@@ -405,6 +405,61 @@ def _resolve_download_url(
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
if suffix:
return suffix.lstrip('.')
parsed = urlparse(download_url)
suffix = Path(parsed.path).suffix
if suffix:
return suffix.lstrip('.')
content_type = headers.get('content-type', '').lower()
mime_map = {
'application/pdf': 'pdf',
'application/epub+zip': 'epub',
'application/x-mobipocket-ebook': 'mobi',
'application/x-cbr': 'cbr',
'application/x-cbz': 'cbz',
'application/zip': 'zip',
}
for mime, ext in mime_map.items():
if mime in content_type:
return ext
return None
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
"""Rename the path to match the detected extension, if needed."""
if not extension:
return path
suffix = extension if extension.startswith('.') else f'.{extension}'
if path.suffix.lower() == suffix.lower():
return path
candidate = path.with_suffix(suffix)
base_stem = path.stem
counter = 1
while candidate.exists() and counter < 100:
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
counter += 1
try:
path.replace(candidate)
return candidate
except Exception:
return path
def download_from_mirror(
mirror_url: str,
output_path: Path,
@@ -412,8 +467,9 @@ def download_from_mirror(
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> bool:
"""Download file from a LibGen mirror URL."""
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -425,33 +481,43 @@ def download_from_mirror(
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False
return False, None
_call(log_info, f"[download] Downloading from: {download_url}")
# Download the actual file
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
headers = dict(r.headers)
# Verify it's not HTML (error page)
ct = r.headers.get("content-type", "").lower()
ct = headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False
return False, None
total_size = int(r.headers.get("content-length", 0))
downloaded = 0
total_size = int(headers.get("content-length", 0) or 0)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
# Optional: progress logging
_call(log_info, f"[download] Saved to {output_path}")
return True
if progress_callback:
progress_callback(downloaded, total_size)
final_extension = _guess_filename_extension(download_url, headers)
final_path = _apply_extension(output_path, final_extension)
if progress_callback and total_size > 0:
progress_callback(downloaded, total_size)
_call(log_info, f"[download] Saved to {final_path}")
return True, final_path
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False
return False, None

View File

@@ -38,6 +38,9 @@ def log(*args, **kwargs) -> None:
Example:
log("Upload started") # Output: [add_file.run] Upload started
"""
# When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
add_prefix = _DEBUG_ENABLED
# Get the calling frame
frame = inspect.currentframe()
if frame is None:
@@ -60,11 +63,11 @@ def log(*args, **kwargs) -> None:
if 'file' not in kwargs:
kwargs['file'] = sys.stdout
# Build prefix
prefix = f"[{file_name}.{func_name}]"
# Print with prefix
print(prefix, *args, **kwargs)
if add_prefix:
prefix = f"[{file_name}.{func_name}]"
print(prefix, *args, **kwargs)
else:
print(*args, **kwargs)
finally:
del frame
del caller_frame

105
helper/metadata_search.py Normal file
View File

@@ -0,0 +1,105 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Type
import requests
import sys
from helper.logger import log, debug
class MetadataProvider(ABC):
"""Base class for metadata providers (music, movies, books, etc.)."""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
self.config = config or {}
@property
def name(self) -> str:
return self.__class__.__name__.replace("Provider", "").lower()
@abstractmethod
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Return a list of candidate metadata records."""
def to_tags(self, item: Dict[str, Any]) -> List[str]:
"""Convert a result item into a list of tags."""
tags: List[str] = []
title = item.get("title")
artist = item.get("artist")
album = item.get("album")
year = item.get("year")
if title:
tags.append(f"title:{title}")
if artist:
tags.append(f"artist:{artist}")
if album:
tags.append(f"album:{album}")
if year:
tags.append(f"year:{year}")
tags.append(f"source:{self.name}")
return tags
class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API."""
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
try:
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
resp.raise_for_status()
results = resp.json().get("results", [])
except Exception as exc:
log(f"iTunes search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for r in results:
item = {
"title": r.get("trackName"),
"artist": r.get("artistName"),
"album": r.get("collectionName"),
"year": str(r.get("releaseDate", ""))[:4],
"provider": self.name,
"raw": r,
}
items.append(item)
debug(f"iTunes returned {len(items)} items for '{query}'")
return items
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"itunes": ITunesProvider,
}
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
_METADATA_PROVIDERS[name.lower()] = provider_cls
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for name, cls in _METADATA_PROVIDERS.items():
try:
provider = cls(config)
# Basic availability check: perform lightweight validation if defined
availability[name] = True
except Exception:
availability[name] = False
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None
try:
return cls(config)
except Exception as exc:
log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
return None

View File

@@ -73,7 +73,12 @@ class SearchResult:
self.columns = []
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
"""Convert to dictionary for JSON serialization.
Note: full_metadata is excluded from dict to keep response size small
until the result is actually selected/used. This speeds up initial
search result display and piping.
"""
data = {
"origin": self.origin,
"title": self.title,
@@ -83,10 +88,10 @@ class SearchResult:
"media_kind": self.media_kind,
"size_bytes": self.size_bytes,
"tags": list(self.tags) if self.tags else [],
"full_metadata": self.full_metadata,
}
if self.columns:
data["columns"] = list(self.columns)
# Note: full_metadata is NOT included in dict to keep payload small
return data
@@ -377,6 +382,7 @@ class LibGenProvider(SearchProvider):
if isbn:
annotations.append(f"ISBN: {isbn}")
# Store full book data without mirrors in metadata to avoid serialization overhead
search_results.append(SearchResult(
origin="libgen",
title=title,
@@ -391,7 +397,8 @@ class LibGenProvider(SearchProvider):
"year": year,
"isbn": isbn,
"filesize": filesize,
"mirrors": book.get("mirrors", {}),
# Exclude mirrors dict from metadata to reduce serialization overhead
# Mirrors can be re-fetched if the result is selected
"book_id": book.get("book_id", ""),
"md5": book.get("md5", ""),
},

View File

@@ -450,30 +450,31 @@ class UnifiedBookDownloader:
if download_func is None:
return False, "Download function not available"
download_callable = cast(Callable[[str, str], bool], download_func)
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
def download_wrapper():
return download_callable(mirror_url, str(output_path))
# Download (in thread)
try:
success = await loop.run_in_executor(None, download_wrapper)
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
if success:
dest_path = Path(downloaded_path) if downloaded_path else output_path
# Validate downloaded file is not HTML (common Libgen issue)
if output_path.exists():
if dest_path.exists():
try:
with open(output_path, 'rb') as f:
with open(dest_path, 'rb') as f:
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
if '<!doctype' in file_start or '<html' in file_start:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
output_path.unlink() # Delete the HTML file
dest_path.unlink() # Delete the HTML file
continue
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
return True, str(output_path)
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
return True, str(dest_path)
else:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
except Exception as e: