1233 lines
52 KiB
Python
1233 lines
52 KiB
Python
"""File storage abstraction layer for uploading files to different services.
|
||
|
||
Supports multiple backend storage services (0x0.st, local directories, Hydrus, etc.)
|
||
with a unified interface.
|
||
|
||
Example:
|
||
storage = FileStorage()
|
||
|
||
# Upload to 0x0.st
|
||
url = storage["0x0"].upload(Path("file.mp3"))
|
||
|
||
# Copy to local directory
|
||
path = storage["local"].upload(Path("file.mp3"), location="/home/user/files")
|
||
|
||
# Upload to Hydrus
|
||
hash_result = storage["hydrus"].upload(file_path, config=config)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from abc import ABC, abstractmethod
|
||
from pathlib import Path
|
||
from typing import Any, Dict, Optional
|
||
import sys
|
||
import shutil
|
||
import requests
|
||
import re
|
||
|
||
from helper.logger import log, debug
|
||
|
||
|
||
class StorageBackend(ABC):
|
||
"""Abstract base class for file storage backends.
|
||
|
||
Backends can optionally support searching by implementing the search() method.
|
||
"""
|
||
|
||
@abstractmethod
|
||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Upload a file and return a result identifier (URL, hash, path, etc.).
|
||
|
||
Args:
|
||
file_path: Path to the file to upload
|
||
**kwargs: Backend-specific options
|
||
|
||
Returns:
|
||
Result identifier (e.g., URL for 0x0.st, hash for Hydrus, path for local)
|
||
|
||
Raises:
|
||
Exception: If upload fails
|
||
"""
|
||
|
||
@abstractmethod
|
||
def get_name(self) -> str:
|
||
"""Get the unique name of this backend."""
|
||
|
||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||
"""Search for files in backends that support it.
|
||
|
||
This method is optional and only implemented by searchable backends
|
||
(e.g., Hydrus, Debrid, Soulseek).
|
||
|
||
Args:
|
||
query: Search query string
|
||
**kwargs: Backend-specific search options
|
||
|
||
Returns:
|
||
List of search results, each as a dict with backend-specific fields.
|
||
Common fields: 'name', 'size', 'hash', 'url', 'id', etc.
|
||
|
||
Raises:
|
||
NotImplementedError: If backend doesn't support searching
|
||
Exception: If search fails
|
||
|
||
Example:
|
||
results = storage["hydrus"].search("music artist:john")
|
||
for result in results:
|
||
print(result['name'], result['hash'])
|
||
"""
|
||
raise NotImplementedError(f"{self.get_name()} backend does not support searching")
|
||
|
||
def supports_search(self) -> bool:
|
||
"""Check if this backend supports searching.
|
||
|
||
Returns:
|
||
True if search() is implemented, False otherwise
|
||
"""
|
||
return self.search.__func__ is not StorageBackend.search
|
||
|
||
|
||
class LocalStorageBackend(StorageBackend):
|
||
"""File storage backend for local file system copy."""
|
||
|
||
def __init__(self, location: Optional[str] = None) -> None:
|
||
"""Initialize local storage backend.
|
||
|
||
Args:
|
||
location: Default directory path for storage operations
|
||
"""
|
||
self._location = location
|
||
|
||
def get_name(self) -> str:
|
||
return "local"
|
||
|
||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Copy or move file to a local directory.
|
||
|
||
Args:
|
||
file_path: Path to the file to upload
|
||
location: Destination directory path (uses default if not provided)
|
||
move: When True, move the file instead of copying (default: False)
|
||
|
||
Returns:
|
||
Absolute path to the copied/moved file
|
||
|
||
Raises:
|
||
ValueError: If location not provided and no default configured
|
||
Exception: If copy fails or duplicate detected
|
||
"""
|
||
from helper.utils import unique_path as utils_unique_path
|
||
from helper.utils import sha256_file
|
||
from helper.local_library import LocalLibraryDB
|
||
|
||
location = kwargs.get("location") or self._location
|
||
move_file = bool(kwargs.get("move"))
|
||
if not location:
|
||
raise ValueError("'location' parameter required for local storage (not configured)")
|
||
|
||
try:
|
||
# Compute file hash
|
||
file_hash = sha256_file(file_path)
|
||
debug(f"File hash: {file_hash}", file=sys.stderr)
|
||
|
||
dest_dir = Path(location).expanduser()
|
||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Check for duplicate files using LocalLibraryDB (fast - uses index)
|
||
try:
|
||
db = LocalLibraryDB(dest_dir)
|
||
existing_path = db.search_by_hash(file_hash)
|
||
if existing_path and existing_path.exists():
|
||
log(
|
||
f"✓ File already in local storage: {existing_path}",
|
||
file=sys.stderr,
|
||
)
|
||
return str(existing_path)
|
||
except Exception as exc:
|
||
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
|
||
|
||
dest_file = dest_dir / file_path.name
|
||
dest_file = utils_unique_path(dest_file)
|
||
|
||
if move_file:
|
||
shutil.move(str(file_path), dest_file)
|
||
debug(f"Local move: {dest_file}", file=sys.stderr)
|
||
else:
|
||
shutil.copy2(file_path, dest_file)
|
||
debug(f"Local copy: {dest_file}", file=sys.stderr)
|
||
return str(dest_file)
|
||
except Exception as exc:
|
||
debug(f"Local copy failed: {exc}", file=sys.stderr)
|
||
raise
|
||
|
||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||
"""Search local database for files by title tag or filename.
|
||
|
||
Args:
|
||
query: Search string supporting:
|
||
- Title tag search: "title:document" or just searches DB for matching title tags
|
||
- Tag namespace search: "creator:Mac*" matches tags in database
|
||
- Filename fallback: if query not in DB, searches filesystem
|
||
- "*" means "match all files"
|
||
location: Directory to search in (uses default if not provided)
|
||
recursive: Search subdirectories (default: True)
|
||
|
||
Returns:
|
||
List of dicts with 'name', 'path', 'size' fields
|
||
"""
|
||
from fnmatch import fnmatch
|
||
from helper.local_library import LocalLibraryDB
|
||
|
||
location = kwargs.get("location") or self._location
|
||
if not location:
|
||
raise ValueError("'location' parameter required for local search (not configured)")
|
||
|
||
limit = kwargs.get("limit")
|
||
try:
|
||
limit = int(limit) if limit is not None else None
|
||
except (TypeError, ValueError):
|
||
limit = None
|
||
if isinstance(limit, int) and limit <= 0:
|
||
limit = None
|
||
|
||
query_lower = query.lower()
|
||
match_all = query_lower == "*"
|
||
results = []
|
||
search_dir = Path(location).expanduser()
|
||
debug(f"Searching local storage at: {search_dir}")
|
||
|
||
try:
|
||
if not search_dir.exists():
|
||
debug(f"Search directory does not exist: {search_dir}")
|
||
return results
|
||
|
||
# Try database search first (much faster than filesystem scan)
|
||
try:
|
||
db = LocalLibraryDB(search_dir)
|
||
cursor = db.connection.cursor()
|
||
|
||
# Check if query is a tag namespace search (format: "namespace:pattern")
|
||
if ":" in query and not query.startswith(":"):
|
||
namespace, pattern = query.split(":", 1)
|
||
namespace = namespace.strip().lower()
|
||
pattern = pattern.strip().lower()
|
||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||
|
||
# Search for tags matching the namespace and pattern
|
||
query_pattern = f"{namespace}:%"
|
||
|
||
cursor.execute("""
|
||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||
FROM files f
|
||
JOIN tags t ON f.id = t.file_id
|
||
WHERE LOWER(t.tag) LIKE ?
|
||
ORDER BY f.file_path
|
||
LIMIT ?
|
||
""", (query_pattern, limit or 1000))
|
||
|
||
rows = cursor.fetchall()
|
||
debug(f"Found {len(rows)} potential matches in DB")
|
||
|
||
# Filter results by pattern match
|
||
for file_id, file_path_str, size_bytes in rows:
|
||
if not file_path_str:
|
||
continue
|
||
|
||
# Get the file's tags and check if any match the pattern
|
||
cursor.execute("""
|
||
SELECT DISTINCT tag FROM tags
|
||
WHERE file_id = ?
|
||
AND LOWER(tag) LIKE ?
|
||
""", (file_id, query_pattern))
|
||
|
||
tags = [row[0] for row in cursor.fetchall()]
|
||
|
||
# Check if any tag matches the pattern (case-insensitive wildcard)
|
||
for tag in tags:
|
||
tag_lower = tag.lower()
|
||
# Extract the value part after "namespace:"
|
||
if tag_lower.startswith(f"{namespace}:"):
|
||
value = tag_lower[len(namespace)+1:]
|
||
# Use fnmatch for wildcard matching
|
||
if fnmatch(value, pattern):
|
||
file_path = Path(file_path_str)
|
||
if file_path.exists():
|
||
path_str = str(file_path)
|
||
if size_bytes is None:
|
||
size_bytes = file_path.stat().st_size
|
||
|
||
# Fetch all tags for this file
|
||
cursor.execute("""
|
||
SELECT tag FROM tags WHERE file_id = ?
|
||
""", (file_id,))
|
||
all_tags = [row[0] for row in cursor.fetchall()]
|
||
|
||
results.append({
|
||
"name": file_path.stem,
|
||
"title": file_path.stem,
|
||
"ext": file_path.suffix.lstrip('.'),
|
||
"path": path_str,
|
||
"target": path_str,
|
||
"origin": "local",
|
||
"size": size_bytes,
|
||
"size_bytes": size_bytes,
|
||
"tags": all_tags,
|
||
})
|
||
else:
|
||
debug(f"File missing on disk: {file_path}")
|
||
break # Don't add same file multiple times
|
||
|
||
if limit is not None and len(results) >= limit:
|
||
return results
|
||
|
||
elif not match_all:
|
||
# Search by filename or simple tags (namespace-agnostic for plain text)
|
||
# For plain text search, match:
|
||
# 1. Filenames containing the query
|
||
# 2. Simple tags (without namespace) containing the query
|
||
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
|
||
# Use explicit namespace search for that (e.g., "channel:joe*")
|
||
|
||
# Split query into terms for AND logic
|
||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||
if not terms:
|
||
terms = [query_lower]
|
||
|
||
debug(f"Performing filename/tag search for terms: {terms}")
|
||
|
||
# Fetch more results than requested to allow for filtering
|
||
fetch_limit = (limit or 45) * 50
|
||
|
||
# 1. Filename search (AND logic)
|
||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||
params = [f"%{t}%" for t in terms]
|
||
where_clause = " AND ".join(conditions)
|
||
|
||
cursor.execute(f"""
|
||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||
FROM files f
|
||
WHERE {where_clause}
|
||
ORDER BY f.file_path
|
||
LIMIT ?
|
||
""", (*params, fetch_limit))
|
||
|
||
rows = cursor.fetchall()
|
||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||
|
||
# Compile regex for whole word matching (only if single term, otherwise skip)
|
||
word_regex = None
|
||
if len(terms) == 1:
|
||
term = terms[0]
|
||
# Check if term contains wildcard characters
|
||
has_wildcard = '*' in term or '?' in term
|
||
|
||
if has_wildcard:
|
||
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
|
||
try:
|
||
from fnmatch import translate
|
||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||
except Exception:
|
||
word_regex = None
|
||
else:
|
||
# Use word boundary for exact terms (backwards compatibility)
|
||
try:
|
||
word_regex = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
|
||
except Exception:
|
||
word_regex = None
|
||
|
||
seen_files = set()
|
||
for file_id, file_path_str, size_bytes in rows:
|
||
if not file_path_str or file_path_str in seen_files:
|
||
continue
|
||
|
||
# Apply whole word filter on filename if single term
|
||
if word_regex:
|
||
p = Path(file_path_str)
|
||
if not word_regex.search(p.name):
|
||
continue
|
||
seen_files.add(file_path_str)
|
||
|
||
file_path = Path(file_path_str)
|
||
if file_path.exists():
|
||
path_str = str(file_path)
|
||
if size_bytes is None:
|
||
size_bytes = file_path.stat().st_size
|
||
|
||
# Fetch tags for this file
|
||
cursor.execute("""
|
||
SELECT tag FROM tags WHERE file_id = ?
|
||
""", (file_id,))
|
||
tags = [row[0] for row in cursor.fetchall()]
|
||
|
||
results.append({
|
||
"name": file_path.stem,
|
||
"title": file_path.stem,
|
||
"ext": file_path.suffix.lstrip('.'),
|
||
"path": path_str,
|
||
"target": path_str,
|
||
"origin": "local",
|
||
"size": size_bytes,
|
||
"size_bytes": size_bytes,
|
||
"tags": tags,
|
||
})
|
||
|
||
# Also search for simple tags (without namespace) containing the query
|
||
# Only perform tag search if single term, or if we want to support multi-term tag search
|
||
# For now, fallback to single pattern search for tags if multiple terms
|
||
# (searching for a tag that contains "term1 term2" or "term1,term2")
|
||
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
|
||
query_pattern = f"%{query_lower}%"
|
||
|
||
cursor.execute("""
|
||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||
FROM files f
|
||
JOIN tags t ON f.id = t.file_id
|
||
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
|
||
ORDER BY f.file_path
|
||
LIMIT ?
|
||
""", (query_pattern, limit or 1000))
|
||
|
||
tag_rows = cursor.fetchall()
|
||
for file_id, file_path_str, size_bytes in tag_rows:
|
||
if not file_path_str or file_path_str in seen_files:
|
||
continue
|
||
seen_files.add(file_path_str)
|
||
|
||
file_path = Path(file_path_str)
|
||
if file_path.exists():
|
||
path_str = str(file_path)
|
||
if size_bytes is None:
|
||
size_bytes = file_path.stat().st_size
|
||
|
||
# Fetch tags for this file
|
||
cursor.execute("""
|
||
SELECT tag FROM tags WHERE file_id = ?
|
||
""", (file_id,))
|
||
tags = [row[0] for row in cursor.fetchall()]
|
||
|
||
results.append({
|
||
"name": file_path.stem,
|
||
"title": file_path.stem,
|
||
"ext": file_path.suffix.lstrip('.'),
|
||
"path": path_str,
|
||
"target": path_str,
|
||
"origin": "local",
|
||
"size": size_bytes,
|
||
"size_bytes": size_bytes,
|
||
"tags": tags,
|
||
})
|
||
|
||
if limit is not None and len(results) >= limit:
|
||
return results
|
||
|
||
else:
|
||
# Match all - get all files from database
|
||
cursor.execute("""
|
||
SELECT id, file_path, file_size
|
||
FROM files
|
||
ORDER BY file_path
|
||
LIMIT ?
|
||
""", (limit or 1000,))
|
||
|
||
rows = cursor.fetchall()
|
||
for file_id, file_path_str, size_bytes in rows:
|
||
if file_path_str:
|
||
file_path = Path(file_path_str)
|
||
if file_path.exists():
|
||
path_str = str(file_path)
|
||
if size_bytes is None:
|
||
size_bytes = file_path.stat().st_size
|
||
|
||
# Fetch tags for this file
|
||
cursor.execute("""
|
||
SELECT tag FROM tags WHERE file_id = ?
|
||
""", (file_id,))
|
||
tags = [row[0] for row in cursor.fetchall()]
|
||
|
||
results.append({
|
||
"name": file_path.stem,
|
||
"title": file_path.stem,
|
||
"ext": file_path.suffix.lstrip('.'),
|
||
"path": path_str,
|
||
"target": path_str,
|
||
"origin": "local",
|
||
"size": size_bytes,
|
||
"size_bytes": size_bytes,
|
||
"tags": tags,
|
||
})
|
||
|
||
if results:
|
||
debug(f"Returning {len(results)} results from DB")
|
||
return results
|
||
else:
|
||
debug("No results found in DB, falling back to filesystem scan")
|
||
|
||
except Exception as e:
|
||
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
|
||
debug(f"DB search exception details: {e}")
|
||
|
||
# Fallback to filesystem search if database search fails or returns nothing
|
||
debug("Starting filesystem scan...")
|
||
recursive = kwargs.get("recursive", True)
|
||
pattern = "**/*" if recursive else "*"
|
||
|
||
# Split query into terms for AND logic
|
||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||
if not terms:
|
||
terms = [query_lower]
|
||
|
||
count = 0
|
||
for file_path in search_dir.glob(pattern):
|
||
if not file_path.is_file():
|
||
continue
|
||
lower_name = file_path.name.lower()
|
||
if lower_name.endswith('.tags') or lower_name.endswith('.metadata') \
|
||
or lower_name.endswith('.notes') or lower_name.endswith('.tags.txt'):
|
||
continue
|
||
|
||
if not match_all:
|
||
# Check if ALL terms are present in the filename
|
||
# For single terms with wildcards, use fnmatch; otherwise use substring matching
|
||
if len(terms) == 1 and ('*' in terms[0] or '?' in terms[0]):
|
||
# Wildcard pattern matching for single term
|
||
from fnmatch import fnmatch
|
||
if not fnmatch(lower_name, terms[0]):
|
||
continue
|
||
else:
|
||
# Substring matching for all terms (AND logic)
|
||
if not all(term in lower_name for term in terms):
|
||
continue
|
||
|
||
size_bytes = file_path.stat().st_size
|
||
path_str = str(file_path)
|
||
results.append({
|
||
"name": file_path.stem,
|
||
"title": file_path.stem,
|
||
"ext": file_path.suffix.lstrip('.'),
|
||
"path": path_str,
|
||
"target": path_str,
|
||
"origin": "local",
|
||
"size": size_bytes,
|
||
"size_bytes": size_bytes,
|
||
})
|
||
count += 1
|
||
|
||
if limit is not None and len(results) >= limit:
|
||
break
|
||
|
||
debug(f"Filesystem scan found {count} matches")
|
||
|
||
except Exception as exc:
|
||
log(f"❌ Local search failed: {exc}", file=sys.stderr)
|
||
raise
|
||
|
||
return results
|
||
|
||
|
||
class HydrusStorageBackend(StorageBackend):
|
||
"""File storage backend for Hydrus client."""
|
||
|
||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||
"""Initialize Hydrus storage backend.
|
||
|
||
Args:
|
||
config: Configuration dict with Hydrus settings (HydrusNetwork section)
|
||
"""
|
||
self._config = config or {}
|
||
|
||
def get_name(self) -> str:
|
||
return "hydrus"
|
||
|
||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Upload file to Hydrus.
|
||
|
||
Args:
|
||
file_path: Path to the file to upload
|
||
tags: Optional list of tags to add (uses default config if not provided)
|
||
config: Optional override for config (uses default if not provided)
|
||
|
||
Returns:
|
||
File hash from Hydrus
|
||
|
||
Raises:
|
||
Exception: If upload fails
|
||
"""
|
||
from helper import hydrus as hydrus_wrapper
|
||
from helper.utils import sha256_file
|
||
|
||
config = kwargs.get("config") or self._config
|
||
if not config:
|
||
raise ValueError("'config' parameter required for Hydrus storage (not configured)")
|
||
|
||
tags = kwargs.get("tags", [])
|
||
|
||
try:
|
||
# Compute file hash
|
||
file_hash = sha256_file(file_path)
|
||
debug(f"File hash: {file_hash}")
|
||
|
||
# Build Hydrus client
|
||
client = hydrus_wrapper.get_client(config)
|
||
if client is None:
|
||
raise Exception("Hydrus client unavailable")
|
||
|
||
# Check if file already exists in Hydrus
|
||
try:
|
||
metadata = client.fetch_file_metadata(hashes=[file_hash])
|
||
if metadata and isinstance(metadata, dict):
|
||
files = metadata.get("file_metadata", [])
|
||
if files:
|
||
log(
|
||
f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}",
|
||
file=sys.stderr,
|
||
)
|
||
# Even if duplicate, we should add tags if provided
|
||
if tags:
|
||
try:
|
||
service_name = hydrus_wrapper.get_tag_service_name(config)
|
||
except Exception:
|
||
service_name = "my tags"
|
||
|
||
try:
|
||
debug(f"Adding {len(tags)} tag(s) to existing file in Hydrus: {tags}")
|
||
client.add_tags(file_hash, tags, service_name)
|
||
log(f"✅ Tags added to existing file via '{service_name}'", file=sys.stderr)
|
||
except Exception as exc:
|
||
log(f"⚠️ Failed to add tags to existing file: {exc}", file=sys.stderr)
|
||
|
||
return file_hash
|
||
except Exception:
|
||
pass
|
||
|
||
# Upload file to Hydrus
|
||
log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
|
||
response = client.add_file(file_path)
|
||
|
||
# Extract hash from response
|
||
hydrus_hash: Optional[str] = None
|
||
if isinstance(response, dict):
|
||
hydrus_hash = response.get("hash") or response.get("file_hash")
|
||
if not hydrus_hash:
|
||
hashes = response.get("hashes")
|
||
if isinstance(hashes, list) and hashes:
|
||
hydrus_hash = hashes[0]
|
||
|
||
if not hydrus_hash:
|
||
raise Exception(f"Hydrus response missing file hash: {response}")
|
||
|
||
file_hash = hydrus_hash
|
||
log(f"Hydrus: {file_hash}", file=sys.stderr)
|
||
|
||
# Add tags if provided
|
||
if tags:
|
||
try:
|
||
service_name = hydrus_wrapper.get_tag_service_name(config)
|
||
except Exception:
|
||
service_name = "my tags"
|
||
|
||
try:
|
||
debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}")
|
||
client.add_tags(file_hash, tags, service_name)
|
||
log(f"✅ Tags added via '{service_name}'", file=sys.stderr)
|
||
except Exception as exc:
|
||
log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr)
|
||
|
||
return file_hash
|
||
|
||
except Exception as exc:
|
||
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
|
||
raise
|
||
|
||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||
"""Search Hydrus database for files matching query.
|
||
|
||
Args:
|
||
query: Search query (tags, filenames, hashes, etc.)
|
||
limit: Maximum number of results to return (default: 100)
|
||
config: Optional override for config (uses default if not provided)
|
||
|
||
Returns:
|
||
List of dicts with 'name', 'hash', 'size', 'tags' fields
|
||
|
||
Example:
|
||
results = storage["hydrus"].search("artist:john_doe music")
|
||
results = storage["hydrus"].search("Simple Man")
|
||
"""
|
||
from helper import hydrus as hydrus_wrapper
|
||
|
||
config = kwargs.get("config") or self._config
|
||
if not config:
|
||
raise ValueError("'config' parameter required for Hydrus search (not configured)")
|
||
|
||
limit = kwargs.get("limit", 100)
|
||
|
||
try:
|
||
client = hydrus_wrapper.get_client(config)
|
||
if client is None:
|
||
raise Exception("Hydrus client unavailable")
|
||
|
||
debug(f"Searching Hydrus for: {query}")
|
||
|
||
# Parse the query into tags
|
||
# Handle both simple tags and complex queries
|
||
# "*" means "match all" - use system:everything tag in Hydrus
|
||
if query.strip() == "*":
|
||
# Use system:everything to match all files in Hydrus
|
||
tags = ["system:everything"]
|
||
else:
|
||
query_lower = query.lower().strip()
|
||
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
|
||
# If query has explicit namespace, use it as a tag search
|
||
if ':' not in query_lower:
|
||
# No namespace provided: search all files, then filter by title/tags containing the query
|
||
tags = ["system:everything"]
|
||
else:
|
||
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
|
||
# Use it as a tag search
|
||
tags = [query_lower]
|
||
|
||
if not tags:
|
||
debug(f"Found 0 result(s)")
|
||
return []
|
||
|
||
# Search files with the tags
|
||
search_result = client.search_files(
|
||
tags=tags,
|
||
return_hashes=True,
|
||
return_file_ids=True
|
||
)
|
||
|
||
# Extract file IDs from search result
|
||
file_ids = search_result.get("file_ids", [])
|
||
hashes = search_result.get("hashes", [])
|
||
|
||
if not file_ids and not hashes:
|
||
debug(f"Found 0 result(s)")
|
||
return []
|
||
|
||
# Fetch metadata for the found files
|
||
results = []
|
||
query_lower = query.lower().strip()
|
||
# Split by comma or space for AND logic
|
||
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
|
||
|
||
if file_ids:
|
||
metadata = client.fetch_file_metadata(file_ids=file_ids)
|
||
metadata_list = metadata.get("metadata", [])
|
||
|
||
for meta in metadata_list:
|
||
if len(results) >= limit:
|
||
break
|
||
|
||
file_id = meta.get("file_id")
|
||
hash_hex = meta.get("hash")
|
||
size = meta.get("size", 0)
|
||
|
||
# Get tags for this file and extract title
|
||
tags_set = meta.get("tags", {})
|
||
all_tags = []
|
||
title = f"Hydrus File {file_id}" # Default fallback
|
||
all_tags_str = "" # For substring matching
|
||
|
||
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
|
||
|
||
if isinstance(tags_set, dict):
|
||
# debug(f"[HydrusBackend.search] Tags payload keys: {list(tags_set.keys())}")
|
||
for service_name, service_tags in tags_set.items():
|
||
# debug(f"[HydrusBackend.search] Processing service: {service_name}")
|
||
if isinstance(service_tags, dict):
|
||
storage_tags = service_tags.get("storage_tags", {})
|
||
if isinstance(storage_tags, dict):
|
||
for tag_type, tag_list in storage_tags.items():
|
||
# debug(f"[HydrusBackend.search] Tag type: {tag_type}, count: {len(tag_list) if isinstance(tag_list, list) else 0}")
|
||
if isinstance(tag_list, list):
|
||
for tag in tag_list:
|
||
tag_text = str(tag) if tag else ""
|
||
if tag_text:
|
||
# debug(f"[HydrusBackend.search] Tag: {tag_text}")
|
||
all_tags.append(tag_text)
|
||
all_tags_str += " " + tag_text.lower()
|
||
# Extract title: namespace
|
||
if tag_text.startswith("title:"):
|
||
title = tag_text[6:].strip() # Remove "title:" prefix
|
||
# debug(f"[HydrusBackend.search] ✓ Extracted title: {title}")
|
||
break
|
||
if title != f"Hydrus File {file_id}":
|
||
break
|
||
|
||
# Filter results based on query type
|
||
# If user provided explicit namespace (has ':'), don't do substring filtering
|
||
# Just include what the tag search returned
|
||
has_namespace = ':' in query_lower
|
||
|
||
if has_namespace:
|
||
# Explicit namespace search - already filtered by Hydrus tag search
|
||
# Include this result as-is
|
||
results.append({
|
||
"hash": hash_hex,
|
||
"hash_hex": hash_hex,
|
||
"target": hash_hex,
|
||
"name": title,
|
||
"title": title,
|
||
"size": size,
|
||
"size_bytes": size,
|
||
"origin": "hydrus",
|
||
"tags": all_tags,
|
||
"file_id": file_id,
|
||
"mime": meta.get("mime"),
|
||
})
|
||
else:
|
||
# Free-form search: check if search terms match the title or tags
|
||
# Match if ALL search terms are found in title or tags (AND logic)
|
||
# AND use whole word matching
|
||
|
||
# Combine title and tags for searching
|
||
searchable_text = (title + " " + all_tags_str).lower()
|
||
|
||
match = True
|
||
if query_lower != "*":
|
||
for term in search_terms:
|
||
# Regex for whole word: \bterm\b
|
||
# Escape term to handle special chars
|
||
pattern = r'\b' + re.escape(term) + r'\b'
|
||
if not re.search(pattern, searchable_text):
|
||
match = False
|
||
break
|
||
|
||
if match:
|
||
results.append({
|
||
"hash": hash_hex,
|
||
"hash_hex": hash_hex,
|
||
"target": hash_hex,
|
||
"name": title,
|
||
"title": title,
|
||
"size": size,
|
||
"size_bytes": size,
|
||
"origin": "hydrus",
|
||
"tags": all_tags,
|
||
"file_id": file_id,
|
||
"mime": meta.get("mime"),
|
||
})
|
||
|
||
debug(f"Found {len(results)} result(s)")
|
||
return results[:limit]
|
||
|
||
except Exception as exc:
|
||
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
raise
|
||
|
||
|
||
class DebridStorageBackend(StorageBackend):
|
||
"""File storage backend for Debrid services (AllDebrid, RealDebrid, etc.)."""
|
||
|
||
def __init__(self, api_key: Optional[str] = None) -> None:
|
||
"""Initialize Debrid storage backend.
|
||
|
||
Args:
|
||
api_key: API key for Debrid service (e.g., from config["Debrid"]["All-debrid"])
|
||
"""
|
||
self._api_key = api_key
|
||
|
||
def get_name(self) -> str:
|
||
return "debrid"
|
||
|
||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Upload file to Debrid service.
|
||
|
||
Args:
|
||
file_path: Path to the file to upload
|
||
**kwargs: Debrid-specific options
|
||
|
||
Returns:
|
||
Debrid link/URL
|
||
|
||
Raises:
|
||
NotImplementedError: Debrid upload not yet implemented
|
||
"""
|
||
raise NotImplementedError("Debrid upload not yet implemented")
|
||
|
||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||
"""Search Debrid for files matching query.
|
||
|
||
Searches through available magnets in AllDebrid storage and returns
|
||
matching results with download links.
|
||
|
||
Args:
|
||
query: Search query string (filename or magnet name pattern)
|
||
limit: Maximum number of results to return (default: 50)
|
||
api_key: Optional override for API key (uses default if not provided)
|
||
|
||
Returns:
|
||
List of dicts with keys:
|
||
- 'name': File/magnet name
|
||
- 'title': Same as name (for compatibility)
|
||
- 'url': AllDebrid download link
|
||
- 'size': File size in bytes
|
||
- 'magnet_id': AllDebrid magnet ID
|
||
- 'origin': 'debrid'
|
||
- 'annotations': Status and seeders info
|
||
|
||
Example:
|
||
results = storage["debrid"].search("movie.mkv")
|
||
for result in results:
|
||
print(f"{result['name']} - {result['size']} bytes")
|
||
"""
|
||
api_key = kwargs.get("api_key") or self._api_key
|
||
if not api_key:
|
||
raise ValueError("'api_key' parameter required for Debrid search (not configured)")
|
||
|
||
limit = kwargs.get("limit", 50)
|
||
|
||
try:
|
||
from helper.alldebrid import AllDebridClient
|
||
|
||
debug(f"Searching AllDebrid for: {query}")
|
||
|
||
client = AllDebridClient(api_key=api_key)
|
||
|
||
# STEP 1: Get magnet status list
|
||
try:
|
||
response = client._request('magnet/status')
|
||
magnets_data = response.get('data', {})
|
||
magnets = magnets_data.get('magnets', [])
|
||
if not isinstance(magnets, list):
|
||
magnets = [magnets] if magnets else []
|
||
debug(f"[debrid_search] Got {len(magnets)} total magnets")
|
||
except Exception as e:
|
||
log(f"⚠ Failed to get magnets list: {e}", file=sys.stderr)
|
||
magnets = []
|
||
|
||
# Filter by query for relevant magnets
|
||
query_lower = query.lower()
|
||
matching_magnet_ids = []
|
||
magnet_info_map = {} # Store status info for later
|
||
|
||
# "*" means "match all" - include all magnets
|
||
match_all = query_lower == "*"
|
||
|
||
# Split query into terms for AND logic
|
||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||
if not terms:
|
||
terms = [query_lower]
|
||
|
||
for magnet in magnets:
|
||
filename = magnet.get('filename', '').lower()
|
||
status_code = magnet.get('statusCode', 0)
|
||
magnet_id = magnet.get('id')
|
||
|
||
# Only include ready or nearly-ready magnets (skip error states 5+)
|
||
if status_code not in [0, 1, 2, 3, 4]:
|
||
continue
|
||
|
||
# Match query against filename (or match all if query is "*")
|
||
if not match_all:
|
||
if not all(term in filename for term in terms):
|
||
continue
|
||
|
||
matching_magnet_ids.append(magnet_id)
|
||
magnet_info_map[magnet_id] = magnet
|
||
debug(f"[debrid_search] ✓ Matched magnet {magnet_id}: {filename}")
|
||
|
||
debug(f"[debrid_search] Found {len(matching_magnet_ids)} matching magnets")
|
||
|
||
results = []
|
||
|
||
# Return one result per magnet (not per file)
|
||
# This keeps search results clean and allows user to download entire magnet at once
|
||
for magnet_id in matching_magnet_ids:
|
||
magnet_status = magnet_info_map.get(magnet_id, {})
|
||
filename = magnet_status.get('filename', 'Unknown')
|
||
status = magnet_status.get('status', 'Unknown')
|
||
status_code = magnet_status.get('statusCode', 0)
|
||
size = magnet_status.get('size', 0)
|
||
seeders = magnet_status.get('seeders', 0)
|
||
|
||
# Format size nicely
|
||
size_label = f"{size / (1024**3):.2f}GB" if size > 0 else "Unknown"
|
||
|
||
# Create one result per magnet with aggregated info
|
||
results.append({
|
||
'name': filename,
|
||
'title': filename,
|
||
'url': '', # No direct file link for the magnet itself
|
||
'size': size,
|
||
'size_bytes': size,
|
||
'magnet_id': magnet_id,
|
||
'origin': 'debrid',
|
||
'annotations': [
|
||
status,
|
||
f"{seeders} seeders",
|
||
size_label,
|
||
],
|
||
'target': '', # Magnet ID is stored, user can then download it
|
||
})
|
||
|
||
debug(f"Found {len(results)} result(s) on AllDebrid")
|
||
return results[:limit]
|
||
|
||
except Exception as exc:
|
||
log(f"❌ Debrid search failed: {exc}", file=sys.stderr)
|
||
raise
|
||
|
||
def _flatten_file_tree(self, files: list[Any], prefix: str = '') -> list[Dict[str, Any]]:
|
||
"""Flatten AllDebrid's nested file tree structure.
|
||
|
||
AllDebrid returns files in a tree structure with folders ('e' key).
|
||
This flattens it to a list of individual files.
|
||
|
||
Args:
|
||
files: AllDebrid file tree structure
|
||
prefix: Current path prefix (used recursively)
|
||
|
||
Returns:
|
||
List of flattened file entries with 'name', 'size', 'link' keys
|
||
"""
|
||
result = []
|
||
|
||
if not isinstance(files, list):
|
||
return result
|
||
|
||
for item in files:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
|
||
name = item.get('n', '')
|
||
|
||
# Check if it's a folder (has 'e' key with entries)
|
||
if 'e' in item:
|
||
# Recursively flatten subfolder
|
||
subfolder_path = f"{prefix}/{name}" if prefix else name
|
||
subitems = item.get('e', [])
|
||
result.extend(self._flatten_file_tree(subitems, subfolder_path))
|
||
else:
|
||
# It's a file - add it to results
|
||
file_path = f"{prefix}/{name}" if prefix else name
|
||
result.append({
|
||
'name': file_path,
|
||
'size': item.get('s', 0),
|
||
'link': item.get('l', ''),
|
||
})
|
||
|
||
return result
|
||
|
||
|
||
class MatrixStorageBackend(StorageBackend):
|
||
"""File storage backend for Matrix (Element) chat rooms."""
|
||
|
||
def get_name(self) -> str:
|
||
return "matrix"
|
||
|
||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Upload file to Matrix room.
|
||
|
||
Requires 'config' in kwargs with 'storage.matrix' settings:
|
||
- homeserver: URL of homeserver (e.g. https://matrix.org)
|
||
- user_id: User ID (e.g. @user:matrix.org)
|
||
- access_token: Access token (preferred) OR password
|
||
- room_id: Room ID to upload to (e.g. !roomid:matrix.org)
|
||
"""
|
||
config = kwargs.get('config', {})
|
||
if not config:
|
||
raise ValueError("Config required for Matrix upload")
|
||
|
||
matrix_conf = config.get('storage', {}).get('matrix', {})
|
||
if not matrix_conf:
|
||
raise ValueError("Matrix storage not configured in config.json")
|
||
|
||
homeserver = matrix_conf.get('homeserver')
|
||
# user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
|
||
access_token = matrix_conf.get('access_token')
|
||
room_id = matrix_conf.get('room_id')
|
||
|
||
if not homeserver or not room_id:
|
||
raise ValueError("Matrix homeserver and room_id required")
|
||
|
||
# Ensure homeserver has protocol
|
||
if not homeserver.startswith('http'):
|
||
homeserver = f"https://{homeserver}"
|
||
|
||
# Login if no access token (optional implementation, for now assume token)
|
||
if not access_token:
|
||
raise ValueError("Matrix access_token required (login not yet implemented)")
|
||
|
||
# 1. Upload Media
|
||
upload_url = f"{homeserver}/_matrix/media/r3/upload"
|
||
headers = {
|
||
"Authorization": f"Bearer {access_token}",
|
||
"Content-Type": "application/octet-stream" # Or guess mime type
|
||
}
|
||
|
||
import mimetypes
|
||
mime_type, _ = mimetypes.guess_type(file_path)
|
||
if mime_type:
|
||
headers["Content-Type"] = mime_type
|
||
|
||
filename = file_path.name
|
||
|
||
try:
|
||
with open(file_path, 'rb') as f:
|
||
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
|
||
|
||
if resp.status_code != 200:
|
||
raise Exception(f"Matrix upload failed: {resp.text}")
|
||
|
||
content_uri = resp.json().get('content_uri')
|
||
if not content_uri:
|
||
raise Exception("No content_uri returned from Matrix upload")
|
||
|
||
# 2. Send Message
|
||
send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"
|
||
|
||
# Determine msgtype
|
||
msgtype = "m.file"
|
||
if mime_type:
|
||
if mime_type.startswith("image/"): msgtype = "m.image"
|
||
elif mime_type.startswith("video/"): msgtype = "m.video"
|
||
elif mime_type.startswith("audio/"): msgtype = "m.audio"
|
||
|
||
payload = {
|
||
"msgtype": msgtype,
|
||
"body": filename,
|
||
"url": content_uri,
|
||
"info": {
|
||
"mimetype": mime_type,
|
||
"size": file_path.stat().st_size
|
||
}
|
||
}
|
||
|
||
resp = requests.post(send_url, headers=headers, json=payload)
|
||
if resp.status_code != 200:
|
||
raise Exception(f"Matrix send message failed: {resp.text}")
|
||
|
||
event_id = resp.json().get('event_id')
|
||
return f"matrix://{room_id}/{event_id}"
|
||
|
||
except Exception as e:
|
||
log(f"❌ Matrix upload error: {e}", file=sys.stderr)
|
||
raise
|
||
|
||
|
||
class FileStorage:
|
||
"""Unified file storage interface supporting multiple backend services.
|
||
|
||
Example:
|
||
storage = FileStorage(config)
|
||
|
||
# Upload to different backends (uses configured locations)
|
||
url = storage["0x0"].upload(Path("file.mp3"))
|
||
local_path = storage["local"].upload(Path("file.mp3")) # Uses config["Local"]["path"]
|
||
hydrus_hash = storage["hydrus"].upload(Path("file.mp3"), tags=["music"])
|
||
|
||
# Search with searchable backends (uses configured locations)
|
||
results = storage["hydrus"].search("music")
|
||
results = storage["local"].search("song") # Uses config["Local"]["path"]
|
||
results = storage["debrid"].search("movie")
|
||
"""
|
||
|
||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||
"""Initialize the file storage system with available backends.
|
||
|
||
Args:
|
||
config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.)
|
||
"""
|
||
config = config or {}
|
||
|
||
# Extract backend-specific settings from config
|
||
from config import get_local_storage_path, get_debrid_api_key
|
||
|
||
local_path = get_local_storage_path(config)
|
||
local_path_str = str(local_path) if local_path else None
|
||
|
||
debrid_api_key = get_debrid_api_key(config)
|
||
|
||
self._backends: Dict[str, StorageBackend] = {}
|
||
|
||
# Always include local backend (even if no default path configured)
|
||
# The location can be specified at upload time if not configured globally
|
||
self._backends["local"] = LocalStorageBackend(location=local_path_str)
|
||
|
||
# Include Hydrus backend (configuration optional)
|
||
self._backends["hydrus"] = HydrusStorageBackend(config=config)
|
||
|
||
# Include Debrid backend (API key optional - will raise on use if not provided)
|
||
if debrid_api_key:
|
||
self._backends["debrid"] = DebridStorageBackend(api_key=debrid_api_key)
|
||
|
||
# Include Matrix backend
|
||
self._backends["matrix"] = MatrixStorageBackend()
|
||
|
||
def __getitem__(self, backend_name: str) -> StorageBackend:
|
||
"""Get a storage backend by name.
|
||
|
||
Args:
|
||
backend_name: Name of the backend ('0x0', 'local', 'hydrus')
|
||
|
||
Returns:
|
||
StorageBackend instance
|
||
|
||
Raises:
|
||
KeyError: If backend not found
|
||
"""
|
||
if backend_name not in self._backends:
|
||
raise KeyError(
|
||
f"Unknown storage backend: {backend_name}. "
|
||
f"Available: {list(self._backends.keys())}"
|
||
)
|
||
return self._backends[backend_name]
|
||
|
||
def register(self, backend: StorageBackend) -> None:
|
||
"""Register a custom storage backend.
|
||
|
||
Args:
|
||
backend: StorageBackend instance to register
|
||
"""
|
||
name = backend.get_name()
|
||
self._backends[name] = backend
|
||
log(f"Registered storage backend: {name}", file=sys.stderr)
|
||
|
||
def list_backends(self) -> list[str]:
|
||
"""Get list of available backend names.
|
||
|
||
Returns:
|
||
List of backend names
|
||
"""
|
||
return list(self._backends.keys())
|
||
|
||
def is_available(self, backend_name: str) -> bool:
|
||
"""Check if a backend is available.
|
||
|
||
Args:
|
||
backend_name: Name of the backend
|
||
|
||
Returns:
|
||
True if backend is registered
|
||
"""
|
||
return backend_name in self._backends
|
||
|
||
def list_searchable_backends(self) -> list[str]:
|
||
"""Get list of backends that support searching.
|
||
|
||
Returns:
|
||
List of searchable backend names
|
||
"""
|
||
return [
|
||
name for name, backend in self._backends.items()
|
||
if backend.supports_search()
|
||
]
|
||
|
||
def supports_search(self, backend_name: str) -> bool:
|
||
"""Check if a backend supports searching.
|
||
|
||
Args:
|
||
backend_name: Name of the backend
|
||
|
||
Returns:
|
||
True if backend supports search(), False otherwise
|
||
"""
|
||
if backend_name not in self._backends:
|
||
return False
|
||
return self._backends[backend_name].supports_search()
|