This commit is contained in:
nose
2025-12-06 00:10:19 -08:00
parent 5482ee5586
commit f29709d951
20 changed files with 1353 additions and 419 deletions

View File

@@ -381,6 +381,81 @@ class LocalStorageBackend(StorageBackend):
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit:
return results
# Title-tag search: treat freeform terms as title namespace queries (AND across terms)
if terms:
title_hits: dict[int, dict[str, Any]] = {}
for term in terms:
cursor.execute(
"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""",
(f"title:%{term}%", fetch_limit),
)
for file_id, file_path_str, size_bytes in cursor.fetchall():
if not file_path_str:
continue
entry = title_hits.get(file_id)
if entry:
entry["count"] += 1
if size_bytes is not None:
entry["size"] = size_bytes
else:
title_hits[file_id] = {
"path": file_path_str,
"size": size_bytes,
"count": 1,
}
if title_hits:
required = len(terms)
for file_id, info in title_hits.items():
if info.get("count") != required:
continue
file_path_str = info.get("path")
if not file_path_str or file_path_str in seen_files:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
seen_files.add(file_path_str)
size_bytes = info.get("size")
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
cursor.execute(
"""
SELECT tag FROM tags WHERE file_id = ?
""",
(file_id,),
)
tags = [row[0] for row in cursor.fetchall()]
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": str(file_path),
"target": str(file_path),
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit:
return results
# Also search for simple tags (without namespace) containing the query
# Only perform tag search if single term, or if we want to support multi-term tag search
@@ -697,28 +772,35 @@ class HydrusStorageBackend(StorageBackend):
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# debug(f"[HydrusBackend.search] Tags payload keys: {list(tags_set.keys())}")
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_text.split(":", 1)[1].strip()
for service_name, service_tags in tags_set.items():
# debug(f"[HydrusBackend.search] Processing service: {service_name}")
if isinstance(service_tags, dict):
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_type, tag_list in storage_tags.items():
# debug(f"[HydrusBackend.search] Tag type: {tag_type}, count: {len(tag_list) if isinstance(tag_list, list) else 0}")
if isinstance(tag_list, list):
for tag in tag_list:
tag_text = str(tag) if tag else ""
if tag_text:
# debug(f"[HydrusBackend.search] Tag: {tag_text}")
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
# Extract title: namespace
if tag_text.startswith("title:"):
title = tag_text[6:].strip() # Remove "title:" prefix
# debug(f"[HydrusBackend.search] ✓ Extracted title: {title}")
break
if title != f"Hydrus File {file_id}":
break
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Resolve extension from MIME type
mime_type = meta.get("mime")
@@ -796,202 +878,6 @@ class HydrusStorageBackend(StorageBackend):
import traceback
traceback.print_exc(file=sys.stderr)
raise
class DebridStorageBackend(StorageBackend):
"""File storage backend for Debrid services (AllDebrid, RealDebrid, etc.)."""
def __init__(self, api_key: Optional[str] = None) -> None:
"""Initialize Debrid storage backend.
Args:
api_key: API key for Debrid service (e.g., from config["Debrid"]["All-debrid"])
"""
self._api_key = api_key
def get_name(self) -> str:
return "debrid"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Debrid service.
Args:
file_path: Path to the file to upload
**kwargs: Debrid-specific options
Returns:
Debrid link/URL
Raises:
NotImplementedError: Debrid upload not yet implemented
"""
raise NotImplementedError("Debrid upload not yet implemented")
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Debrid for files matching query.
Searches through available magnets in AllDebrid storage and returns
matching results with download links.
Args:
query: Search query string (filename or magnet name pattern)
limit: Maximum number of results to return (default: 50)
api_key: Optional override for API key (uses default if not provided)
Returns:
List of dicts with keys:
- 'name': File/magnet name
- 'title': Same as name (for compatibility)
- 'url': AllDebrid download link
- 'size': File size in bytes
- 'magnet_id': AllDebrid magnet ID
- 'origin': 'debrid'
- 'annotations': Status and seeders info
Example:
results = storage["debrid"].search("movie.mkv")
for result in results:
print(f"{result['name']} - {result['size']} bytes")
"""
api_key = kwargs.get("api_key") or self._api_key
if not api_key:
raise ValueError("'api_key' parameter required for Debrid search (not configured)")
limit = kwargs.get("limit", 50)
try:
from helper.alldebrid import AllDebridClient
debug(f"Searching AllDebrid for: {query}")
client = AllDebridClient(api_key=api_key)
# STEP 1: Get magnet status list
try:
response = client._request('magnet/status')
magnets_data = response.get('data', {})
magnets = magnets_data.get('magnets', [])
if not isinstance(magnets, list):
magnets = [magnets] if magnets else []
debug(f"[debrid_search] Got {len(magnets)} total magnets")
except Exception as e:
log(f"⚠ Failed to get magnets list: {e}", file=sys.stderr)
magnets = []
# Filter by query for relevant magnets
query_lower = query.lower()
matching_magnet_ids = []
magnet_info_map = {} # Store status info for later
# "*" means "match all" - include all magnets
match_all = query_lower == "*"
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
for magnet in magnets:
filename = magnet.get('filename', '').lower()
status_code = magnet.get('statusCode', 0)
magnet_id = magnet.get('id')
# Only include ready or nearly-ready magnets (skip error states 5+)
if status_code not in [0, 1, 2, 3, 4]:
continue
# Match query against filename (or match all if query is "*")
if not match_all:
if not all(term in filename for term in terms):
continue
matching_magnet_ids.append(magnet_id)
magnet_info_map[magnet_id] = magnet
debug(f"[debrid_search] ✓ Matched magnet {magnet_id}: {filename}")
debug(f"[debrid_search] Found {len(matching_magnet_ids)} matching magnets")
results = []
# Return one result per magnet (not per file)
# This keeps search results clean and allows user to download entire magnet at once
for magnet_id in matching_magnet_ids:
magnet_status = magnet_info_map.get(magnet_id, {})
filename = magnet_status.get('filename', 'Unknown')
status = magnet_status.get('status', 'Unknown')
status_code = magnet_status.get('statusCode', 0)
size = magnet_status.get('size', 0)
seeders = magnet_status.get('seeders', 0)
# Format size nicely
size_label = f"{size / (1024**3):.2f}GB" if size > 0 else "Unknown"
# Create one result per magnet with aggregated info
results.append({
'name': filename,
'title': filename,
'url': '', # No direct file link for the magnet itself
'size': size,
'size_bytes': size,
'magnet_id': magnet_id,
'origin': 'debrid',
'annotations': [
status,
f"{seeders} seeders",
size_label,
],
'target': '', # Magnet ID is stored, user can then download it
})
debug(f"Found {len(results)} result(s) on AllDebrid")
return results[:limit]
except Exception as exc:
log(f"❌ Debrid search failed: {exc}", file=sys.stderr)
raise
def _flatten_file_tree(self, files: list[Any], prefix: str = '') -> list[Dict[str, Any]]:
"""Flatten AllDebrid's nested file tree structure.
AllDebrid returns files in a tree structure with folders ('e' key).
This flattens it to a list of individual files.
Args:
files: AllDebrid file tree structure
prefix: Current path prefix (used recursively)
Returns:
List of flattened file entries with 'name', 'size', 'link' keys
"""
result = []
if not isinstance(files, list):
return result
for item in files:
if not isinstance(item, dict):
continue
name = item.get('n', '')
# Check if it's a folder (has 'e' key with entries)
if 'e' in item:
# Recursively flatten subfolder
subfolder_path = f"{prefix}/{name}" if prefix else name
subitems = item.get('e', [])
result.extend(self._flatten_file_tree(subitems, subfolder_path))
else:
# It's a file - add it to results
file_path = f"{prefix}/{name}" if prefix else name
result.append({
'name': file_path,
'size': item.get('s', 0),
'link': item.get('l', ''),
})
return result
class MatrixStorageBackend(StorageBackend):
"""File storage backend for Matrix (Element) chat rooms."""
@@ -1344,7 +1230,6 @@ class FileStorage:
# Search with searchable backends (uses configured locations)
results = storage["hydrus"].search("music")
results = storage["local"].search("song") # Uses config["Local"]["path"]
results = storage["debrid"].search("movie")
"""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
@@ -1356,13 +1241,11 @@ class FileStorage:
config = config or {}
# Extract backend-specific settings from config
from config import get_local_storage_path, get_debrid_api_key
from config import get_local_storage_path
local_path = get_local_storage_path(config)
local_path_str = str(local_path) if local_path else None
debrid_api_key = get_debrid_api_key(config)
self._backends: Dict[str, StorageBackend] = {}
# Always include local backend (even if no default path configured)
@@ -1372,10 +1255,6 @@ class FileStorage:
# Include Hydrus backend (configuration optional)
self._backends["hydrus"] = HydrusStorageBackend(config=config)
# Include Debrid backend (API key optional - will raise on use if not provided)
if debrid_api_key:
self._backends["debrid"] = DebridStorageBackend(api_key=debrid_api_key)
# Include Matrix backend
self._backends["matrix"] = MatrixStorageBackend()

View File

@@ -71,10 +71,208 @@ class ITunesProvider(MetadataProvider):
return items
class OpenLibraryMetadataProvider(MetadataProvider):
"""Metadata provider for OpenLibrary book metadata."""
@property
def name(self) -> str: # type: ignore[override]
return "openlibrary"
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
query_clean = (query or "").strip()
if not query_clean:
return []
try:
# Prefer ISBN-specific search when the query looks like one
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
resp = requests.get(
"https://openlibrary.org/search.json",
params={"q": q, "limit": limit},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
log(f"OpenLibrary search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for doc in data.get("docs", [])[:limit]:
authors = doc.get("author_name") or []
publisher = ""
publishers = doc.get("publisher") or []
if isinstance(publishers, list) and publishers:
publisher = publishers[0]
# Prefer 13-digit ISBN when available, otherwise 10-digit
isbn_list = doc.get("isbn") or []
isbn_13 = next((i for i in isbn_list if len(str(i)) == 13), None)
isbn_10 = next((i for i in isbn_list if len(str(i)) == 10), None)
# Derive OLID from key
olid = ""
key = doc.get("key", "")
if isinstance(key, str) and key:
olid = key.split("/")[-1]
items.append({
"title": doc.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": str(doc.get("first_publish_year") or ""),
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": {
"isbn_13": isbn_13,
"isbn_10": isbn_10,
"openlibrary": olid,
"oclc": (doc.get("oclc_numbers") or [None])[0],
"lccn": (doc.get("lccn") or [None])[0],
},
"description": None,
})
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
tags: List[str] = []
title = item.get("title")
authors = item.get("authors") or []
publisher = item.get("publisher")
year = item.get("year")
description = item.get("description") or ""
if title:
tags.append(f"title:{title}")
for author in authors:
if author:
tags.append(f"author:{author}")
if publisher:
tags.append(f"publisher:{publisher}")
if year:
tags.append(f"year:{year}")
if description:
tags.append(f"description:{description[:200]}")
identifiers = item.get("identifiers") or {}
for key, value in identifiers.items():
if value:
tags.append(f"{key}:{value}")
tags.append(f"source:{self.name}")
return tags
class GoogleBooksMetadataProvider(MetadataProvider):
"""Metadata provider for Google Books volumes API."""
@property
def name(self) -> str: # type: ignore[override]
return "googlebooks"
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
query_clean = (query or "").strip()
if not query_clean:
return []
# Prefer ISBN queries when possible
if query_clean.replace("-", "").isdigit() and len(query_clean.replace("-", "")) in (10, 13):
q = f"isbn:{query_clean.replace('-', '')}"
else:
q = query_clean
try:
resp = requests.get(
"https://www.googleapis.com/books/v1/volumes",
params={"q": q, "maxResults": limit},
timeout=10,
)
resp.raise_for_status()
payload = resp.json()
except Exception as exc:
log(f"Google Books search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for volume in payload.get("items", [])[:limit]:
info = volume.get("volumeInfo") or {}
authors = info.get("authors") or []
publisher = info.get("publisher", "")
published_date = info.get("publishedDate", "")
year = str(published_date)[:4] if published_date else ""
identifiers_raw = info.get("industryIdentifiers") or []
identifiers: Dict[str, Optional[str]] = {"googlebooks": volume.get("id")}
for ident in identifiers_raw:
if not isinstance(ident, dict):
continue
ident_type = ident.get("type", "").lower()
ident_value = ident.get("identifier")
if not ident_value:
continue
if ident_type == "isbn_13":
identifiers.setdefault("isbn_13", ident_value)
elif ident_type == "isbn_10":
identifiers.setdefault("isbn_10", ident_value)
else:
identifiers.setdefault(ident_type, ident_value)
items.append({
"title": info.get("title") or "",
"artist": ", ".join(authors) if authors else "",
"album": publisher,
"year": year,
"provider": self.name,
"authors": authors,
"publisher": publisher,
"identifiers": identifiers,
"description": info.get("description", ""),
})
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
tags: List[str] = []
title = item.get("title")
authors = item.get("authors") or []
publisher = item.get("publisher")
year = item.get("year")
description = item.get("description") or ""
if title:
tags.append(f"title:{title}")
for author in authors:
if author:
tags.append(f"author:{author}")
if publisher:
tags.append(f"publisher:{publisher}")
if year:
tags.append(f"year:{year}")
if description:
tags.append(f"description:{description[:200]}")
identifiers = item.get("identifiers") or {}
for key, value in identifiers.items():
if value:
tags.append(f"{key}:{value}")
tags.append(f"source:{self.name}")
return tags
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"itunes": ITunesProvider,
"openlibrary": OpenLibraryMetadataProvider,
"googlebooks": GoogleBooksMetadataProvider,
"google": GoogleBooksMetadataProvider,
}

View File

@@ -293,13 +293,7 @@ class LocalStorageProvider(SearchProvider):
class LibGenProvider(SearchProvider):
"""Search provider for Library Genesis books."""
# Define fields to display (note: LibGen doesn't have API field mapping like OpenLibrary)
# These are extracted from the book dict directly
RESULT_FIELDS = [
("title", "Title", None),
("author", "Author(s)", None),
("year", "Year", None),
]
RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually
def __init__(self, config: Dict[str, Any] = None):
super().__init__(config)
@@ -363,15 +357,22 @@ class LibGenProvider(SearchProvider):
search_results = []
for idx, book in enumerate(books, 1):
# Build columns dynamically from RESULT_FIELDS
columns = self.build_columns_from_doc(book, idx)
title = book.get("title", "Unknown")
author = book.get("author", "Unknown")
year = book.get("year", "Unknown")
pages = book.get("pages") or book.get("pages_str") or ""
extension = book.get("extension", "") or book.get("ext", "")
filesize = book.get("filesize_str", "Unknown")
isbn = book.get("isbn", "")
mirror_url = book.get("mirror_url", "")
# Columns: Title, Author, Pages, Ext
columns = [
("Title", title),
("Author", author),
("Pages", str(pages)),
("Ext", str(extension)),
]
# Build detail with author and year
detail = f"By: {author}"
@@ -1077,12 +1078,7 @@ class OpenLibraryProvider(SearchProvider):
"""Search provider for OpenLibrary."""
# Define fields to request from API and how to display them
RESULT_FIELDS = [
("title", "Title", None),
("author_name", "Author", lambda x: ", ".join(x) if isinstance(x, list) else x),
("first_publish_year", "Year", None),
("status", "Status", None),
]
RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually
def __init__(self, config: Dict[str, Any] = None):
super().__init__(config)
@@ -1146,10 +1142,25 @@ class OpenLibraryProvider(SearchProvider):
return []
# Default to title/general search
requested_fields = [
"title",
"author_name",
"first_publish_year",
"number_of_pages_median",
"isbn",
"oclc_numbers",
"lccn",
"language",
"key",
"edition_key",
"ebook_access",
"ia",
"has_fulltext",
]
params = {
"q": query_clean,
"limit": limit,
"fields": f"{self.get_api_fields_string()},isbn,oclc_numbers,lccn,number_of_pages_median,language,key,ebook_access,ia,has_fulltext",
"fields": ",".join(requested_fields),
}
response = requests.get(search_url, params=params, timeout=9)
@@ -1158,16 +1169,18 @@ class OpenLibraryProvider(SearchProvider):
search_results = []
for idx, doc in enumerate(data.get("docs", []), 1):
# Extract OLID first (needed for metadata)
olid = doc.get("key", "").split("/")[-1]
# Prefer edition_key (books/OLxxxM). Fallback to work key.
edition_keys = doc.get("edition_key") or []
olid = ""
if isinstance(edition_keys, list) and edition_keys:
olid = str(edition_keys[0]).strip()
if not olid:
olid = doc.get("key", "").split("/")[-1]
# Determine status/availability
status, archive_id = self._derive_status(doc)
doc["status"] = status
# Build columns dynamically from RESULT_FIELDS (now includes status)
columns = self.build_columns_from_doc(doc, idx)
# Extract additional metadata
title = doc.get("title", "Unknown")
authors = doc.get("author_name", ["Unknown"])
@@ -1183,6 +1196,13 @@ class OpenLibraryProvider(SearchProvider):
language = languages[0] if languages else ""
author_str = ", ".join(authors) if authors else "Unknown"
# Columns: Title, Author, Pages
columns = [
("Title", title),
("Author", author_str),
("Pages", str(pages or "")),
]
# Build detail with author and year
detail = f"By: {author_str}"