Files
Medios-Macina/helper/file_storage.py
2025-11-27 10:59:01 -08:00

1233 lines
52 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""File storage abstraction layer for uploading files to different services.
Supports multiple backend storage services (0x0.st, local directories, Hydrus, etc.)
with a unified interface.
Example:
storage = FileStorage()
# Upload to 0x0.st
url = storage["0x0"].upload(Path("file.mp3"))
# Copy to local directory
path = storage["local"].upload(Path("file.mp3"), location="/home/user/files")
# Upload to Hydrus
hash_result = storage["hydrus"].upload(file_path, config=config)
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, Optional
import sys
import shutil
import requests
import re
from helper.logger import log, debug
class StorageBackend(ABC):
"""Abstract base class for file storage backends.
Backends can optionally support searching by implementing the search() method.
"""
@abstractmethod
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload a file and return a result identifier (URL, hash, path, etc.).
Args:
file_path: Path to the file to upload
**kwargs: Backend-specific options
Returns:
Result identifier (e.g., URL for 0x0.st, hash for Hydrus, path for local)
Raises:
Exception: If upload fails
"""
@abstractmethod
def get_name(self) -> str:
"""Get the unique name of this backend."""
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search for files in backends that support it.
This method is optional and only implemented by searchable backends
(e.g., Hydrus, Debrid, Soulseek).
Args:
query: Search query string
**kwargs: Backend-specific search options
Returns:
List of search results, each as a dict with backend-specific fields.
Common fields: 'name', 'size', 'hash', 'url', 'id', etc.
Raises:
NotImplementedError: If backend doesn't support searching
Exception: If search fails
Example:
results = storage["hydrus"].search("music artist:john")
for result in results:
print(result['name'], result['hash'])
"""
raise NotImplementedError(f"{self.get_name()} backend does not support searching")
def supports_search(self) -> bool:
"""Check if this backend supports searching.
Returns:
True if search() is implemented, False otherwise
"""
return self.search.__func__ is not StorageBackend.search
class LocalStorageBackend(StorageBackend):
"""File storage backend for local file system copy."""
def __init__(self, location: Optional[str] = None) -> None:
"""Initialize local storage backend.
Args:
location: Default directory path for storage operations
"""
self._location = location
def get_name(self) -> str:
return "local"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Copy or move file to a local directory.
Args:
file_path: Path to the file to upload
location: Destination directory path (uses default if not provided)
move: When True, move the file instead of copying (default: False)
Returns:
Absolute path to the copied/moved file
Raises:
ValueError: If location not provided and no default configured
Exception: If copy fails or duplicate detected
"""
from helper.utils import unique_path as utils_unique_path
from helper.utils import sha256_file
from helper.local_library import LocalLibraryDB
location = kwargs.get("location") or self._location
move_file = bool(kwargs.get("move"))
if not location:
raise ValueError("'location' parameter required for local storage (not configured)")
try:
# Compute file hash
file_hash = sha256_file(file_path)
debug(f"File hash: {file_hash}", file=sys.stderr)
dest_dir = Path(location).expanduser()
dest_dir.mkdir(parents=True, exist_ok=True)
# Check for duplicate files using LocalLibraryDB (fast - uses index)
try:
db = LocalLibraryDB(dest_dir)
existing_path = db.search_by_hash(file_hash)
if existing_path and existing_path.exists():
log(
f"✓ File already in local storage: {existing_path}",
file=sys.stderr,
)
return str(existing_path)
except Exception as exc:
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
dest_file = dest_dir / file_path.name
dest_file = utils_unique_path(dest_file)
if move_file:
shutil.move(str(file_path), dest_file)
debug(f"Local move: {dest_file}", file=sys.stderr)
else:
shutil.copy2(file_path, dest_file)
debug(f"Local copy: {dest_file}", file=sys.stderr)
return str(dest_file)
except Exception as exc:
debug(f"Local copy failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search local database for files by title tag or filename.
Args:
query: Search string supporting:
- Title tag search: "title:document" or just searches DB for matching title tags
- Tag namespace search: "creator:Mac*" matches tags in database
- Filename fallback: if query not in DB, searches filesystem
- "*" means "match all files"
location: Directory to search in (uses default if not provided)
recursive: Search subdirectories (default: True)
Returns:
List of dicts with 'name', 'path', 'size' fields
"""
from fnmatch import fnmatch
from helper.local_library import LocalLibraryDB
location = kwargs.get("location") or self._location
if not location:
raise ValueError("'location' parameter required for local search (not configured)")
limit = kwargs.get("limit")
try:
limit = int(limit) if limit is not None else None
except (TypeError, ValueError):
limit = None
if isinstance(limit, int) and limit <= 0:
limit = None
query_lower = query.lower()
match_all = query_lower == "*"
results = []
search_dir = Path(location).expanduser()
debug(f"Searching local storage at: {search_dir}")
try:
if not search_dir.exists():
debug(f"Search directory does not exist: {search_dir}")
return results
# Try database search first (much faster than filesystem scan)
try:
db = LocalLibraryDB(search_dir)
cursor = db.connection.cursor()
# Check if query is a tag namespace search (format: "namespace:pattern")
if ":" in query and not query.startswith(":"):
namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}")
# Search for tags matching the namespace and pattern
query_pattern = f"{namespace}:%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
rows = cursor.fetchall()
debug(f"Found {len(rows)} potential matches in DB")
# Filter results by pattern match
for file_id, file_path_str, size_bytes in rows:
if not file_path_str:
continue
# Get the file's tags and check if any match the pattern
cursor.execute("""
SELECT DISTINCT tag FROM tags
WHERE file_id = ?
AND LOWER(tag) LIKE ?
""", (file_id, query_pattern))
tags = [row[0] for row in cursor.fetchall()]
# Check if any tag matches the pattern (case-insensitive wildcard)
for tag in tags:
tag_lower = tag.lower()
# Extract the value part after "namespace:"
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
# Use fnmatch for wildcard matching
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch all tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
all_tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": all_tags,
})
else:
debug(f"File missing on disk: {file_path}")
break # Don't add same file multiple times
if limit is not None and len(results) >= limit:
return results
elif not match_all:
# Search by filename or simple tags (namespace-agnostic for plain text)
# For plain text search, match:
# 1. Filenames containing the query
# 2. Simple tags (without namespace) containing the query
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
# Use explicit namespace search for that (e.g., "channel:joe*")
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use word boundary for exact terms (backwards compatibility)
try:
word_regex = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
# Also search for simple tags (without namespace) containing the query
# Only perform tag search if single term, or if we want to support multi-term tag search
# For now, fallback to single pattern search for tags if multiple terms
# (searching for a tag that contains "term1 term2" or "term1,term2")
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tag_rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in tag_rows:
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit:
return results
else:
# Match all - get all files from database
cursor.execute("""
SELECT id, file_path, file_size
FROM files
ORDER BY file_path
LIMIT ?
""", (limit or 1000,))
rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in rows:
if file_path_str:
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if results:
debug(f"Returning {len(results)} results from DB")
return results
else:
debug("No results found in DB, falling back to filesystem scan")
except Exception as e:
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
debug(f"DB search exception details: {e}")
# Fallback to filesystem search if database search fails or returns nothing
debug("Starting filesystem scan...")
recursive = kwargs.get("recursive", True)
pattern = "**/*" if recursive else "*"
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
count = 0
for file_path in search_dir.glob(pattern):
if not file_path.is_file():
continue
lower_name = file_path.name.lower()
if lower_name.endswith('.tags') or lower_name.endswith('.metadata') \
or lower_name.endswith('.notes') or lower_name.endswith('.tags.txt'):
continue
if not match_all:
# Check if ALL terms are present in the filename
# For single terms with wildcards, use fnmatch; otherwise use substring matching
if len(terms) == 1 and ('*' in terms[0] or '?' in terms[0]):
# Wildcard pattern matching for single term
from fnmatch import fnmatch
if not fnmatch(lower_name, terms[0]):
continue
else:
# Substring matching for all terms (AND logic)
if not all(term in lower_name for term in terms):
continue
size_bytes = file_path.stat().st_size
path_str = str(file_path)
results.append({
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
})
count += 1
if limit is not None and len(results) >= limit:
break
debug(f"Filesystem scan found {count} matches")
except Exception as exc:
log(f"❌ Local search failed: {exc}", file=sys.stderr)
raise
return results
class HydrusStorageBackend(StorageBackend):
"""File storage backend for Hydrus client."""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
"""Initialize Hydrus storage backend.
Args:
config: Configuration dict with Hydrus settings (HydrusNetwork section)
"""
self._config = config or {}
def get_name(self) -> str:
return "hydrus"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus.
Args:
file_path: Path to the file to upload
tags: Optional list of tags to add (uses default config if not provided)
config: Optional override for config (uses default if not provided)
Returns:
File hash from Hydrus
Raises:
Exception: If upload fails
"""
from helper import hydrus as hydrus_wrapper
from helper.utils import sha256_file
config = kwargs.get("config") or self._config
if not config:
raise ValueError("'config' parameter required for Hydrus storage (not configured)")
tags = kwargs.get("tags", [])
try:
# Compute file hash
file_hash = sha256_file(file_path)
debug(f"File hash: {file_hash}")
# Build Hydrus client
client = hydrus_wrapper.get_client(config)
if client is None:
raise Exception("Hydrus client unavailable")
# Check if file already exists in Hydrus
try:
metadata = client.fetch_file_metadata(hashes=[file_hash])
if metadata and isinstance(metadata, dict):
files = metadata.get("file_metadata", [])
if files:
log(
f" Duplicate detected - file already in Hydrus with hash: {file_hash}",
file=sys.stderr,
)
# Even if duplicate, we should add tags if provided
if tags:
try:
service_name = hydrus_wrapper.get_tag_service_name(config)
except Exception:
service_name = "my tags"
try:
debug(f"Adding {len(tags)} tag(s) to existing file in Hydrus: {tags}")
client.add_tags(file_hash, tags, service_name)
log(f"✅ Tags added to existing file via '{service_name}'", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Failed to add tags to existing file: {exc}", file=sys.stderr)
return file_hash
except Exception:
pass
# Upload file to Hydrus
log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
log(f"Hydrus: {file_hash}", file=sys.stderr)
# Add tags if provided
if tags:
try:
service_name = hydrus_wrapper.get_tag_service_name(config)
except Exception:
service_name = "my tags"
try:
debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}")
client.add_tags(file_hash, tags, service_name)
log(f"✅ Tags added via '{service_name}'", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr)
return file_hash
except Exception as exc:
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Hydrus database for files matching query.
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
config: Optional override for config (uses default if not provided)
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
from helper import hydrus as hydrus_wrapper
config = kwargs.get("config") or self._config
if not config:
raise ValueError("'config' parameter required for Hydrus search (not configured)")
limit = kwargs.get("limit", 100)
try:
client = hydrus_wrapper.get_client(config)
if client is None:
raise Exception("Hydrus client unavailable")
debug(f"Searching Hydrus for: {query}")
# Parse the query into tags
# Handle both simple tags and complex queries
# "*" means "match all" - use system:everything tag in Hydrus
if query.strip() == "*":
# Use system:everything to match all files in Hydrus
tags = ["system:everything"]
else:
query_lower = query.lower().strip()
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
# If query has explicit namespace, use it as a tag search
if ':' not in query_lower:
# No namespace provided: search all files, then filter by title/tags containing the query
tags = ["system:everything"]
else:
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
# Use it as a tag search
tags = [query_lower]
if not tags:
debug(f"Found 0 result(s)")
return []
# Search files with the tags
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
# Extract file IDs from search result
file_ids = search_result.get("file_ids", [])
hashes = search_result.get("hashes", [])
if not file_ids and not hashes:
debug(f"Found 0 result(s)")
return []
# Fetch metadata for the found files
results = []
query_lower = query.lower().strip()
# Split by comma or space for AND logic
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
if file_ids:
metadata = client.fetch_file_metadata(file_ids=file_ids)
metadata_list = metadata.get("metadata", [])
for meta in metadata_list:
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# debug(f"[HydrusBackend.search] Tags payload keys: {list(tags_set.keys())}")
for service_name, service_tags in tags_set.items():
# debug(f"[HydrusBackend.search] Processing service: {service_name}")
if isinstance(service_tags, dict):
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_type, tag_list in storage_tags.items():
# debug(f"[HydrusBackend.search] Tag type: {tag_type}, count: {len(tag_list) if isinstance(tag_list, list) else 0}")
if isinstance(tag_list, list):
for tag in tag_list:
tag_text = str(tag) if tag else ""
if tag_text:
# debug(f"[HydrusBackend.search] Tag: {tag_text}")
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
# Extract title: namespace
if tag_text.startswith("title:"):
title = tag_text[6:].strip() # Remove "title:" prefix
# debug(f"[HydrusBackend.search] ✓ Extracted title: {title}")
break
if title != f"Hydrus File {file_id}":
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
results.append({
"hash": hash_hex,
"hash_hex": hash_hex,
"target": hash_hex,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"origin": "hydrus",
"tags": all_tags,
"file_id": file_id,
"mime": meta.get("mime"),
})
else:
# Free-form search: check if search terms match the title or tags
# Match if ALL search terms are found in title or tags (AND logic)
# AND use whole word matching
# Combine title and tags for searching
searchable_text = (title + " " + all_tags_str).lower()
match = True
if query_lower != "*":
for term in search_terms:
# Regex for whole word: \bterm\b
# Escape term to handle special chars
pattern = r'\b' + re.escape(term) + r'\b'
if not re.search(pattern, searchable_text):
match = False
break
if match:
results.append({
"hash": hash_hex,
"hash_hex": hash_hex,
"target": hash_hex,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"origin": "hydrus",
"tags": all_tags,
"file_id": file_id,
"mime": meta.get("mime"),
})
debug(f"Found {len(results)} result(s)")
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
class DebridStorageBackend(StorageBackend):
"""File storage backend for Debrid services (AllDebrid, RealDebrid, etc.)."""
def __init__(self, api_key: Optional[str] = None) -> None:
"""Initialize Debrid storage backend.
Args:
api_key: API key for Debrid service (e.g., from config["Debrid"]["All-debrid"])
"""
self._api_key = api_key
def get_name(self) -> str:
return "debrid"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Debrid service.
Args:
file_path: Path to the file to upload
**kwargs: Debrid-specific options
Returns:
Debrid link/URL
Raises:
NotImplementedError: Debrid upload not yet implemented
"""
raise NotImplementedError("Debrid upload not yet implemented")
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Debrid for files matching query.
Searches through available magnets in AllDebrid storage and returns
matching results with download links.
Args:
query: Search query string (filename or magnet name pattern)
limit: Maximum number of results to return (default: 50)
api_key: Optional override for API key (uses default if not provided)
Returns:
List of dicts with keys:
- 'name': File/magnet name
- 'title': Same as name (for compatibility)
- 'url': AllDebrid download link
- 'size': File size in bytes
- 'magnet_id': AllDebrid magnet ID
- 'origin': 'debrid'
- 'annotations': Status and seeders info
Example:
results = storage["debrid"].search("movie.mkv")
for result in results:
print(f"{result['name']} - {result['size']} bytes")
"""
api_key = kwargs.get("api_key") or self._api_key
if not api_key:
raise ValueError("'api_key' parameter required for Debrid search (not configured)")
limit = kwargs.get("limit", 50)
try:
from helper.alldebrid import AllDebridClient
debug(f"Searching AllDebrid for: {query}")
client = AllDebridClient(api_key=api_key)
# STEP 1: Get magnet status list
try:
response = client._request('magnet/status')
magnets_data = response.get('data', {})
magnets = magnets_data.get('magnets', [])
if not isinstance(magnets, list):
magnets = [magnets] if magnets else []
debug(f"[debrid_search] Got {len(magnets)} total magnets")
except Exception as e:
log(f"⚠ Failed to get magnets list: {e}", file=sys.stderr)
magnets = []
# Filter by query for relevant magnets
query_lower = query.lower()
matching_magnet_ids = []
magnet_info_map = {} # Store status info for later
# "*" means "match all" - include all magnets
match_all = query_lower == "*"
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
for magnet in magnets:
filename = magnet.get('filename', '').lower()
status_code = magnet.get('statusCode', 0)
magnet_id = magnet.get('id')
# Only include ready or nearly-ready magnets (skip error states 5+)
if status_code not in [0, 1, 2, 3, 4]:
continue
# Match query against filename (or match all if query is "*")
if not match_all:
if not all(term in filename for term in terms):
continue
matching_magnet_ids.append(magnet_id)
magnet_info_map[magnet_id] = magnet
debug(f"[debrid_search] ✓ Matched magnet {magnet_id}: {filename}")
debug(f"[debrid_search] Found {len(matching_magnet_ids)} matching magnets")
results = []
# Return one result per magnet (not per file)
# This keeps search results clean and allows user to download entire magnet at once
for magnet_id in matching_magnet_ids:
magnet_status = magnet_info_map.get(magnet_id, {})
filename = magnet_status.get('filename', 'Unknown')
status = magnet_status.get('status', 'Unknown')
status_code = magnet_status.get('statusCode', 0)
size = magnet_status.get('size', 0)
seeders = magnet_status.get('seeders', 0)
# Format size nicely
size_label = f"{size / (1024**3):.2f}GB" if size > 0 else "Unknown"
# Create one result per magnet with aggregated info
results.append({
'name': filename,
'title': filename,
'url': '', # No direct file link for the magnet itself
'size': size,
'size_bytes': size,
'magnet_id': magnet_id,
'origin': 'debrid',
'annotations': [
status,
f"{seeders} seeders",
size_label,
],
'target': '', # Magnet ID is stored, user can then download it
})
debug(f"Found {len(results)} result(s) on AllDebrid")
return results[:limit]
except Exception as exc:
log(f"❌ Debrid search failed: {exc}", file=sys.stderr)
raise
def _flatten_file_tree(self, files: list[Any], prefix: str = '') -> list[Dict[str, Any]]:
"""Flatten AllDebrid's nested file tree structure.
AllDebrid returns files in a tree structure with folders ('e' key).
This flattens it to a list of individual files.
Args:
files: AllDebrid file tree structure
prefix: Current path prefix (used recursively)
Returns:
List of flattened file entries with 'name', 'size', 'link' keys
"""
result = []
if not isinstance(files, list):
return result
for item in files:
if not isinstance(item, dict):
continue
name = item.get('n', '')
# Check if it's a folder (has 'e' key with entries)
if 'e' in item:
# Recursively flatten subfolder
subfolder_path = f"{prefix}/{name}" if prefix else name
subitems = item.get('e', [])
result.extend(self._flatten_file_tree(subitems, subfolder_path))
else:
# It's a file - add it to results
file_path = f"{prefix}/{name}" if prefix else name
result.append({
'name': file_path,
'size': item.get('s', 0),
'link': item.get('l', ''),
})
return result
class MatrixStorageBackend(StorageBackend):
"""File storage backend for Matrix (Element) chat rooms."""
def get_name(self) -> str:
return "matrix"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Matrix room.
Requires 'config' in kwargs with 'storage.matrix' settings:
- homeserver: URL of homeserver (e.g. https://matrix.org)
- user_id: User ID (e.g. @user:matrix.org)
- access_token: Access token (preferred) OR password
- room_id: Room ID to upload to (e.g. !roomid:matrix.org)
"""
config = kwargs.get('config', {})
if not config:
raise ValueError("Config required for Matrix upload")
matrix_conf = config.get('storage', {}).get('matrix', {})
if not matrix_conf:
raise ValueError("Matrix storage not configured in config.json")
homeserver = matrix_conf.get('homeserver')
# user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
access_token = matrix_conf.get('access_token')
room_id = matrix_conf.get('room_id')
if not homeserver or not room_id:
raise ValueError("Matrix homeserver and room_id required")
# Ensure homeserver has protocol
if not homeserver.startswith('http'):
homeserver = f"https://{homeserver}"
# Login if no access token (optional implementation, for now assume token)
if not access_token:
raise ValueError("Matrix access_token required (login not yet implemented)")
# 1. Upload Media
upload_url = f"{homeserver}/_matrix/media/r3/upload"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/octet-stream" # Or guess mime type
}
import mimetypes
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type:
headers["Content-Type"] = mime_type
filename = file_path.name
try:
with open(file_path, 'rb') as f:
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = resp.json().get('content_uri')
if not content_uri:
raise Exception("No content_uri returned from Matrix upload")
# 2. Send Message
send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"
# Determine msgtype
msgtype = "m.file"
if mime_type:
if mime_type.startswith("image/"): msgtype = "m.image"
elif mime_type.startswith("video/"): msgtype = "m.video"
elif mime_type.startswith("audio/"): msgtype = "m.audio"
payload = {
"msgtype": msgtype,
"body": filename,
"url": content_uri,
"info": {
"mimetype": mime_type,
"size": file_path.stat().st_size
}
}
resp = requests.post(send_url, headers=headers, json=payload)
if resp.status_code != 200:
raise Exception(f"Matrix send message failed: {resp.text}")
event_id = resp.json().get('event_id')
return f"matrix://{room_id}/{event_id}"
except Exception as e:
log(f"❌ Matrix upload error: {e}", file=sys.stderr)
raise
class FileStorage:
"""Unified file storage interface supporting multiple backend services.
Example:
storage = FileStorage(config)
# Upload to different backends (uses configured locations)
url = storage["0x0"].upload(Path("file.mp3"))
local_path = storage["local"].upload(Path("file.mp3")) # Uses config["Local"]["path"]
hydrus_hash = storage["hydrus"].upload(Path("file.mp3"), tags=["music"])
# Search with searchable backends (uses configured locations)
results = storage["hydrus"].search("music")
results = storage["local"].search("song") # Uses config["Local"]["path"]
results = storage["debrid"].search("movie")
"""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
"""Initialize the file storage system with available backends.
Args:
config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.)
"""
config = config or {}
# Extract backend-specific settings from config
from config import get_local_storage_path, get_debrid_api_key
local_path = get_local_storage_path(config)
local_path_str = str(local_path) if local_path else None
debrid_api_key = get_debrid_api_key(config)
self._backends: Dict[str, StorageBackend] = {}
# Always include local backend (even if no default path configured)
# The location can be specified at upload time if not configured globally
self._backends["local"] = LocalStorageBackend(location=local_path_str)
# Include Hydrus backend (configuration optional)
self._backends["hydrus"] = HydrusStorageBackend(config=config)
# Include Debrid backend (API key optional - will raise on use if not provided)
if debrid_api_key:
self._backends["debrid"] = DebridStorageBackend(api_key=debrid_api_key)
# Include Matrix backend
self._backends["matrix"] = MatrixStorageBackend()
def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name.
Args:
backend_name: Name of the backend ('0x0', 'local', 'hydrus')
Returns:
StorageBackend instance
Raises:
KeyError: If backend not found
"""
if backend_name not in self._backends:
raise KeyError(
f"Unknown storage backend: {backend_name}. "
f"Available: {list(self._backends.keys())}"
)
return self._backends[backend_name]
def register(self, backend: StorageBackend) -> None:
"""Register a custom storage backend.
Args:
backend: StorageBackend instance to register
"""
name = backend.get_name()
self._backends[name] = backend
log(f"Registered storage backend: {name}", file=sys.stderr)
def list_backends(self) -> list[str]:
"""Get list of available backend names.
Returns:
List of backend names
"""
return list(self._backends.keys())
def is_available(self, backend_name: str) -> bool:
"""Check if a backend is available.
Args:
backend_name: Name of the backend
Returns:
True if backend is registered
"""
return backend_name in self._backends
def list_searchable_backends(self) -> list[str]:
"""Get list of backends that support searching.
Returns:
List of searchable backend names
"""
return [
name for name, backend in self._backends.items()
if backend.supports_search()
]
def supports_search(self, backend_name: str) -> bool:
"""Check if a backend supports searching.
Args:
backend_name: Name of the backend
Returns:
True if backend supports search(), False otherwise
"""
if backend_name not in self._backends:
return False
return self._backends[backend_name].supports_search()