d
This commit is contained in:
@@ -1397,6 +1397,10 @@ class FileStorage:
|
||||
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
|
||||
|
||||
def list_backends(self) -> list[str]:
|
||||
"""Return available backend keys for autocomplete and validation."""
|
||||
return sorted(self._backends.keys())
|
||||
|
||||
def __getitem__(self, backend_name: str) -> StorageBackend:
|
||||
"""Get a storage backend by name.
|
||||
|
||||
@@ -9,8 +9,8 @@ import logging
|
||||
import re
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
from urllib.parse import quote, urljoin
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, urljoin, urlparse, unquote
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
@@ -405,6 +405,61 @@ def _resolve_download_url(
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
suffix = Path(filename).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
parsed = urlparse(download_url)
|
||||
suffix = Path(parsed.path).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
content_type = headers.get('content-type', '').lower()
|
||||
mime_map = {
|
||||
'application/pdf': 'pdf',
|
||||
'application/epub+zip': 'epub',
|
||||
'application/x-mobipocket-ebook': 'mobi',
|
||||
'application/x-cbr': 'cbr',
|
||||
'application/x-cbz': 'cbz',
|
||||
'application/zip': 'zip',
|
||||
}
|
||||
|
||||
for mime, ext in mime_map.items():
|
||||
if mime in content_type:
|
||||
return ext
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
|
||||
"""Rename the path to match the detected extension, if needed."""
|
||||
if not extension:
|
||||
return path
|
||||
|
||||
suffix = extension if extension.startswith('.') else f'.{extension}'
|
||||
if path.suffix.lower() == suffix.lower():
|
||||
return path
|
||||
|
||||
candidate = path.with_suffix(suffix)
|
||||
base_stem = path.stem
|
||||
counter = 1
|
||||
while candidate.exists() and counter < 100:
|
||||
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
|
||||
counter += 1
|
||||
|
||||
try:
|
||||
path.replace(candidate)
|
||||
return candidate
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: Path,
|
||||
@@ -412,8 +467,9 @@ def download_from_mirror(
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> bool:
|
||||
"""Download file from a LibGen mirror URL."""
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -425,33 +481,43 @@ def download_from_mirror(
|
||||
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
# Download the actual file
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
headers = dict(r.headers)
|
||||
|
||||
# Verify it's not HTML (error page)
|
||||
ct = r.headers.get("content-type", "").lower()
|
||||
ct = headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
total_size = int(r.headers.get("content-length", 0))
|
||||
downloaded = 0
|
||||
total_size = int(headers.get("content-length", 0) or 0)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
# Optional: progress logging
|
||||
|
||||
_call(log_info, f"[download] Saved to {output_path}")
|
||||
return True
|
||||
if progress_callback:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
final_extension = _guess_filename_extension(download_url, headers)
|
||||
final_path = _apply_extension(output_path, final_extension)
|
||||
|
||||
if progress_callback and total_size > 0:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
_call(log_info, f"[download] Saved to {final_path}")
|
||||
return True, final_path
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False
|
||||
return False, None
|
||||
|
||||
@@ -38,6 +38,9 @@ def log(*args, **kwargs) -> None:
|
||||
Example:
|
||||
log("Upload started") # Output: [add_file.run] Upload started
|
||||
"""
|
||||
# When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
|
||||
add_prefix = _DEBUG_ENABLED
|
||||
|
||||
# Get the calling frame
|
||||
frame = inspect.currentframe()
|
||||
if frame is None:
|
||||
@@ -60,11 +63,11 @@ def log(*args, **kwargs) -> None:
|
||||
if 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stdout
|
||||
|
||||
# Build prefix
|
||||
prefix = f"[{file_name}.{func_name}]"
|
||||
|
||||
# Print with prefix
|
||||
print(prefix, *args, **kwargs)
|
||||
if add_prefix:
|
||||
prefix = f"[{file_name}.{func_name}]"
|
||||
print(prefix, *args, **kwargs)
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
finally:
|
||||
del frame
|
||||
del caller_frame
|
||||
|
||||
105
helper/metadata_search.py
Normal file
105
helper/metadata_search.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
import requests
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
|
||||
class MetadataProvider(ABC):
|
||||
"""Base class for metadata providers (music, movies, books, etc.)."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
self.config = config or {}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.__class__.__name__.replace("Provider", "").lower()
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Return a list of candidate metadata records."""
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
"""Convert a result item into a list of tags."""
|
||||
tags: List[str] = []
|
||||
title = item.get("title")
|
||||
artist = item.get("artist")
|
||||
album = item.get("album")
|
||||
year = item.get("year")
|
||||
|
||||
if title:
|
||||
tags.append(f"title:{title}")
|
||||
if artist:
|
||||
tags.append(f"artist:{artist}")
|
||||
if album:
|
||||
tags.append(f"album:{album}")
|
||||
if year:
|
||||
tags.append(f"year:{year}")
|
||||
|
||||
tags.append(f"source:{self.name}")
|
||||
return tags
|
||||
|
||||
|
||||
class ITunesProvider(MetadataProvider):
|
||||
"""Metadata provider using the iTunes Search API."""
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
|
||||
try:
|
||||
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
|
||||
resp.raise_for_status()
|
||||
results = resp.json().get("results", [])
|
||||
except Exception as exc:
|
||||
log(f"iTunes search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for r in results:
|
||||
item = {
|
||||
"title": r.get("trackName"),
|
||||
"artist": r.get("artistName"),
|
||||
"album": r.get("collectionName"),
|
||||
"year": str(r.get("releaseDate", ""))[:4],
|
||||
"provider": self.name,
|
||||
"raw": r,
|
||||
}
|
||||
items.append(item)
|
||||
debug(f"iTunes returned {len(items)} items for '{query}'")
|
||||
return items
|
||||
|
||||
|
||||
# Registry ---------------------------------------------------------------
|
||||
|
||||
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
|
||||
"itunes": ITunesProvider,
|
||||
}
|
||||
|
||||
|
||||
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
|
||||
_METADATA_PROVIDERS[name.lower()] = provider_cls
|
||||
|
||||
|
||||
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
availability: Dict[str, bool] = {}
|
||||
for name, cls in _METADATA_PROVIDERS.items():
|
||||
try:
|
||||
provider = cls(config)
|
||||
# Basic availability check: perform lightweight validation if defined
|
||||
availability[name] = True
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
|
||||
cls = _METADATA_PROVIDERS.get(name.lower())
|
||||
if not cls:
|
||||
return None
|
||||
try:
|
||||
return cls(config)
|
||||
except Exception as exc:
|
||||
log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
|
||||
return None
|
||||
@@ -73,7 +73,12 @@ class SearchResult:
|
||||
self.columns = []
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
"""Convert to dictionary for JSON serialization.
|
||||
|
||||
Note: full_metadata is excluded from dict to keep response size small
|
||||
until the result is actually selected/used. This speeds up initial
|
||||
search result display and piping.
|
||||
"""
|
||||
data = {
|
||||
"origin": self.origin,
|
||||
"title": self.title,
|
||||
@@ -83,10 +88,10 @@ class SearchResult:
|
||||
"media_kind": self.media_kind,
|
||||
"size_bytes": self.size_bytes,
|
||||
"tags": list(self.tags) if self.tags else [],
|
||||
"full_metadata": self.full_metadata,
|
||||
}
|
||||
if self.columns:
|
||||
data["columns"] = list(self.columns)
|
||||
# Note: full_metadata is NOT included in dict to keep payload small
|
||||
return data
|
||||
|
||||
|
||||
@@ -377,6 +382,7 @@ class LibGenProvider(SearchProvider):
|
||||
if isbn:
|
||||
annotations.append(f"ISBN: {isbn}")
|
||||
|
||||
# Store full book data without mirrors in metadata to avoid serialization overhead
|
||||
search_results.append(SearchResult(
|
||||
origin="libgen",
|
||||
title=title,
|
||||
@@ -391,7 +397,8 @@ class LibGenProvider(SearchProvider):
|
||||
"year": year,
|
||||
"isbn": isbn,
|
||||
"filesize": filesize,
|
||||
"mirrors": book.get("mirrors", {}),
|
||||
# Exclude mirrors dict from metadata to reduce serialization overhead
|
||||
# Mirrors can be re-fetched if the result is selected
|
||||
"book_id": book.get("book_id", ""),
|
||||
"md5": book.get("md5", ""),
|
||||
},
|
||||
|
||||
@@ -450,30 +450,31 @@ class UnifiedBookDownloader:
|
||||
if download_func is None:
|
||||
return False, "Download function not available"
|
||||
|
||||
download_callable = cast(Callable[[str, str], bool], download_func)
|
||||
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
|
||||
|
||||
def download_wrapper():
|
||||
return download_callable(mirror_url, str(output_path))
|
||||
|
||||
# Download (in thread)
|
||||
try:
|
||||
success = await loop.run_in_executor(None, download_wrapper)
|
||||
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
|
||||
|
||||
if success:
|
||||
dest_path = Path(downloaded_path) if downloaded_path else output_path
|
||||
# Validate downloaded file is not HTML (common Libgen issue)
|
||||
if output_path.exists():
|
||||
if dest_path.exists():
|
||||
try:
|
||||
with open(output_path, 'rb') as f:
|
||||
with open(dest_path, 'rb') as f:
|
||||
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
|
||||
if '<!doctype' in file_start or '<html' in file_start:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
|
||||
output_path.unlink() # Delete the HTML file
|
||||
dest_path.unlink() # Delete the HTML file
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
|
||||
return True, str(output_path)
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
|
||||
return True, str(dest_path)
|
||||
else:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user