"""File storage abstraction layer for uploading files to different services. """ from __future__ import annotations from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Dict, Optional, Tuple, List import sys import shutil import requests import re from helper.logger import log, debug from helper.utils_constant import mime_maps from helper.utils import sha256_file from helper.folder_store import FolderDB from config import get_local_storage_path HEX_DIGITS = set("0123456789abcdef") def _normalize_hex_hash(value: Optional[str]) -> Optional[str]: """Return a normalized 64-character lowercase hash or None.""" if value is None: return None try: cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS) except Exception: return None if len(cleaned) == 64: return cleaned return None def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]: """Return the given hash if valid, otherwise compute sha256 from disk.""" normalized = _normalize_hex_hash(candidate) if normalized is not None: return normalized if not path.exists(): return None try: return sha256_file(path) except Exception as exc: debug(f"Failed to compute hash for {path}: {exc}") return None class store(ABC): """""" @abstractmethod def add_file(self, file_path: Path, **kwargs: Any) -> str: """""" @abstractmethod def name(self) -> str: """""" def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """""" raise NotImplementedError(f"{self.name()} backend does not support searching") @abstractmethod def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: """Retrieve file by hash, returning path to the file. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Path to the file or None if not found """ raise NotImplementedError(f"{self.name()} backend does not support get_file") @abstractmethod def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: """Get metadata for a file by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields or None if not found """ raise NotImplementedError(f"{self.name()} backend does not support get_metadata") @abstractmethod def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """""" raise NotImplementedError(f"{self.name()} backend does not support get_tags") @abstractmethod def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """""" raise NotImplementedError(f"{self.name()} backend does not support add_tag") @abstractmethod def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """""" raise NotImplementedError(f"{self.name()} backend does not support delete_tag") @abstractmethod def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """""" raise NotImplementedError(f"{self.name()} backend does not support get_url") @abstractmethod def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """""" raise NotImplementedError(f"{self.name()} backend does not support add_url") @abstractmethod def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """""" raise NotImplementedError(f"{self.name()} backend does not support delete_url") class Folder(store): """""" # Track which locations have already been migrated to avoid repeated migrations _migrated_locations = set() def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None: self._location = location self._name = name if self._location: try: from helper.folder_store import FolderDB from pathlib import Path location_path = Path(self._location).expanduser() # Use context manager to ensure connection is properly closed with FolderDB(location_path) as db: if db.connection: db.connection.commit() # Call migration and discovery at startup Folder.migrate_location(self._location) except Exception as exc: debug(f"Failed to initialize database for '{name}': {exc}") @classmethod def migrate_location(cls, location: Optional[str]) -> None: """Migrate a location to hash-based storage (one-time operation, call explicitly at startup).""" if not location: return from pathlib import Path location_path = Path(location).expanduser() location_str = str(location_path) # Only migrate once per location if location_str in cls._migrated_locations: return cls._migrated_locations.add(location_str) # Create a temporary instance just to call the migration temp_instance = cls(location=location) temp_instance._migrate_to_hash_storage(location_path) def _migrate_to_hash_storage(self, location_path: Path) -> None: """Migrate existing files from filename-based to hash-based storage. Checks for sidecars (.metadata, .tag) and imports them before renaming. Also ensures all files have a title: tag. """ from helper.folder_store import read_sidecar, write_sidecar, find_sidecar try: with FolderDB(location_path) as db: cursor = db.connection.cursor() # First pass: migrate filename-based files and add title tags # Scan all files in the storage directory for file_path in sorted(location_path.iterdir()): if not file_path.is_file(): continue # Skip database files and sidecars if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'): continue # Also skip if the file ends with -shm or -wal (SQLite journal files) if file_path.name.endswith(('-shm', '-wal')): continue # Check if filename is already a hash (without extension) if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()): continue # Already migrated, will process in second pass try: # Compute file hash file_hash = sha256_file(file_path) # Preserve extension in the hash-based filename file_ext = file_path.suffix # e.g., '.mp4' hash_filename = file_hash + file_ext if file_ext else file_hash hash_path = location_path / hash_filename # Check for sidecars and import them sidecar_path = find_sidecar(file_path) tags_to_add = [] url_to_add = [] has_title_tag = False if sidecar_path and sidecar_path.exists(): try: _, tags, url = read_sidecar(sidecar_path) if tags: tags_to_add = list(tags) # Check if title tag exists has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add) if url: url_to_add = list(url) debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr) # Delete the sidecar after importing sidecar_path.unlink() except Exception as exc: debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr) # Ensure there's a title tag (use original filename if not present) if not has_title_tag: tags_to_add.append(f"title:{file_path.name}") # Rename file to hash if needed if hash_path != file_path and not hash_path.exists(): debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr) file_path.rename(hash_path) # Create or update database entry using FolderDB methods db.get_or_create_file_entry(hash_path) # Save extension metadata ext_clean = file_ext.lstrip('.') if file_ext else '' db.save_metadata(hash_path, { 'hash': file_hash, 'ext': ext_clean, 'size': hash_path.stat().st_size }) # Add all tags (including title tag) if tags_to_add: db.save_tags(hash_path, tags_to_add) debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr) # Note: url would need a separate table if you want to store them # For now, we're just noting them in debug if url_to_add: debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr) except Exception as exc: debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr) # Second pass: ensure all files in database have a title: tag db.connection.commit() cursor.execute(''' SELECT f.hash, f.file_path FROM files f WHERE NOT EXISTS ( SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%' ) ''') files_without_title = cursor.fetchall() for file_hash, file_path_str in files_without_title: try: file_path = Path(file_path_str) if file_path.exists(): # Use the filename as the title title_tag = f"title:{file_path.name}" db.save_tags(file_path, [title_tag]) debug(f"Added title tag to {file_path.name}", file=sys.stderr) except Exception as exc: debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr) db.connection.commit() # Third pass: discover files on disk that aren't in the database yet # These are hash-named files that were added after initial indexing cursor.execute('SELECT LOWER(hash) FROM files') db_hashes = {row[0] for row in cursor.fetchall()} discovered = 0 for file_path in sorted(location_path.rglob("*")): if file_path.is_file(): # Check if file name (without extension) is a 64-char hex hash name_without_ext = file_path.stem if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()): file_hash = name_without_ext.lower() # Skip if already in DB if file_hash in db_hashes: continue try: # Add file to DB (creates entry and auto-adds title: tag) db.get_or_create_file_entry(file_path) # Save extension metadata file_ext = file_path.suffix ext_clean = file_ext.lstrip('.') if file_ext else '' db.save_metadata(file_path, { 'hash': file_hash, 'ext': ext_clean, 'size': file_path.stat().st_size }) discovered += 1 except Exception as e: debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr) if discovered > 0: debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr) db.connection.commit() except Exception as exc: debug(f"Migration to hash storage failed: {exc}", file=sys.stderr) def location(self) -> str: return self._location def name(self) -> str: return self._name def add_file(self, file_path: Path, **kwargs: Any) -> str: """Add file to local folder storage with full metadata support. Args: file_path: Path to the file to add move: If True, move file instead of copy (default: False) tags: Optional list of tags to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) Returns: File hash (SHA256 hex string) as identifier """ move_file = bool(kwargs.get("move")) tags = kwargs.get("tags", []) url = kwargs.get("url", []) title = kwargs.get("title") # Extract title from tags if not explicitly provided if not title: for tag in tags: if isinstance(tag, str) and tag.lower().startswith("title:"): title = tag.split(":", 1)[1].strip() break # Fallback to filename if no title if not title: title = file_path.name # Ensure title is in tags title_tag = f"title:{title}" if not any(str(tag).lower().startswith("title:") for tag in tags): tags = [title_tag] + list(tags) try: file_hash = sha256_file(file_path) debug(f"File hash: {file_hash}", file=sys.stderr) # Preserve extension in the stored filename file_ext = file_path.suffix # e.g., '.mp4' save_filename = file_hash + file_ext if file_ext else file_hash save_file = Path(self._location) / save_filename # Check if file already exists with FolderDB(Path(self._location)) as db: existing_path = db.search_hash(file_hash) if existing_path and existing_path.exists(): log( f"✓ File already in local storage: {existing_path}", file=sys.stderr, ) # Still add tags and url if provided if tags: self.add_tag(file_hash, tags) if url: self.add_url(file_hash, url) return file_hash # Move or copy file if move_file: shutil.move(str(file_path), str(save_file)) debug(f"Local move: {save_file}", file=sys.stderr) else: shutil.copy2(str(file_path), str(save_file)) debug(f"Local copy: {save_file}", file=sys.stderr) # Save to database with FolderDB(Path(self._location)) as db: db.get_or_create_file_entry(save_file) # Save metadata including extension ext_clean = file_ext.lstrip('.') if file_ext else '' db.save_metadata(save_file, { 'hash': file_hash, 'ext': ext_clean, 'size': file_path.stat().st_size }) # Add tags if provided if tags: self.add_tag(file_hash, tags) # Add url if provided if url: self.add_url(file_hash, url) log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr) return file_hash except Exception as exc: log(f"❌ Local storage failed: {exc}", file=sys.stderr) raise def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search local database for files by title tag or filename.""" from fnmatch import fnmatch from helper.folder_store import DatabaseAPI limit = kwargs.get("limit") try: limit = int(limit) if limit is not None else None except (TypeError, ValueError): limit = None if isinstance(limit, int) and limit <= 0: limit = None query = query.lower() query_lower = query # Ensure query_lower is defined for all code paths match_all = query == "*" results = [] search_dir = Path(self._location).expanduser() tokens = [t.strip() for t in query.split(',') if t.strip()] if not match_all and len(tokens) == 1 and _normalize_hex_hash(query): debug("Hash queries require 'hash:' prefix for local search") return results if not match_all and _normalize_hex_hash(query): debug("Hash queries require 'hash:' prefix for local search") return results def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]: path_str = str(file_path) # Get title from tags if available, otherwise use hash as fallback title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) if not title: # Fallback to hash if no title tag exists hash_value = _resolve_file_hash(db_hash, file_path) title = hash_value if hash_value else file_path.stem # Extract extension from file path ext = file_path.suffix.lstrip('.') if not ext: # Fallback: try to extract from title (original filename might be in title) title_path = Path(title) ext = title_path.suffix.lstrip('.') # Build clean entry with only necessary fields hash_value = _resolve_file_hash(db_hash, file_path) entry = { "title": title, "ext": ext, "path": path_str, "target": path_str, "store": self._name, "size": size_bytes, "hash": hash_value, "tag": tags, } return entry try: if not search_dir.exists(): debug(f"Search directory does not exist: {search_dir}") return results try: with DatabaseAPI(search_dir) as api: if tokens and len(tokens) > 1: def _like_pattern(term: str) -> str: return term.replace('*', '%').replace('?', '_') def _ids_for_token(token: str) -> set[int]: token = token.strip() if not token: return set() if ':' in token and not token.startswith(':'): namespace, pattern = token.split(':', 1) namespace = namespace.strip().lower() pattern = pattern.strip().lower() if namespace == 'hash': normalized_hash = _normalize_hex_hash(pattern) if not normalized_hash: return set() h = api.get_file_hash_by_hash(normalized_hash) return {h} if h else set() if namespace == 'store': if pattern not in {'local', 'file', 'filesystem'}: return set() return api.get_all_file_hashes() query_pattern = f"{namespace}:%" tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern) matched: set[str] = set() for file_hash, tag_val in tag_rows: if not tag_val: continue tag_lower = str(tag_val).lower() if not tag_lower.startswith(f"{namespace}:"): continue value = tag_lower[len(namespace)+1:] if fnmatch(value, pattern): matched.add(file_hash) return matched term = token.lower() like_pattern = f"%{_like_pattern(term)}%" hashes = api.get_file_hashes_by_path_pattern(like_pattern) hashes.update(api.get_file_hashes_by_tag_substring(like_pattern)) return hashes try: matching_hashes: set[str] | None = None for token in tokens: hashes = _ids_for_token(token) matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes if not matching_hashes: return results if not matching_hashes: return results rows = api.get_file_metadata(matching_hashes, limit) for file_hash, file_path_str, size_bytes, ext in rows: if not file_path_str: continue file_path = Path(file_path_str) if not file_path.exists(): continue if size_bytes is None: try: size_bytes = file_path.stat().st_size except OSError: size_bytes = None tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) results.append(entry) if limit is not None and len(results) >= limit: return results return results except Exception as exc: log(f"⚠️ AND search failed: {exc}", file=sys.stderr) debug(f"AND search exception details: {exc}") return [] if ":" in query and not query.startswith(":"): namespace, pattern = query.split(":", 1) namespace = namespace.strip().lower() pattern = pattern.strip().lower() debug(f"Performing namespace search: {namespace}:{pattern}") if namespace == "hash": normalized_hash = _normalize_hex_hash(pattern) if not normalized_hash: return results h = api.get_file_hash_by_hash(normalized_hash) hashes = {h} if h else set() rows = api.get_file_metadata(hashes, limit) for file_hash, file_path_str, size_bytes, ext in rows: if not file_path_str: continue file_path = Path(file_path_str) if not file_path.exists(): continue if size_bytes is None: try: size_bytes = file_path.stat().st_size except OSError: size_bytes = None tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) results.append(entry) if limit is not None and len(results) >= limit: return results return results query_pattern = f"{namespace}:%" rows = api.get_files_by_namespace_pattern(query_pattern, limit) debug(f"Found {len(rows)} potential matches in DB") for file_hash, file_path_str, size_bytes, ext in rows: if not file_path_str: continue tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern) for tag in tags: tag_lower = tag.lower() if tag_lower.startswith(f"{namespace}:"): value = tag_lower[len(namespace)+1:] if fnmatch(value, pattern): file_path = Path(file_path_str) if file_path.exists(): if size_bytes is None: size_bytes = file_path.stat().st_size all_tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, all_tags, size_bytes, file_hash) results.append(entry) else: debug(f"File missing on disk: {file_path}") break if limit is not None and len(results) >= limit: return results elif not match_all: terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] if not terms: terms = [query_lower] debug(f"Performing filename/tag search for terms: {terms}") fetch_limit = (limit or 45) * 50 conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] params = [f"%{t}%" for t in terms] rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit) debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") word_regex = None if len(terms) == 1: term = terms[0] has_wildcard = '*' in term or '?' in term if has_wildcard: try: from fnmatch import translate word_regex = re.compile(translate(term), re.IGNORECASE) except Exception: word_regex = None else: try: pattern = r'(?= limit: return results if terms: title_hits: dict[str, dict[str, Any]] = {} for term in terms: title_pattern = f"title:%{term}%" title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit) for file_hash, file_path_str, size_bytes, ext in title_rows: if not file_path_str: continue entry = title_hits.get(file_hash) if entry: entry["count"] += 1 if size_bytes is not None: entry["size"] = size_bytes else: title_hits[file_hash] = { "path": file_path_str, "size": size_bytes, "hash": file_hash, "count": 1, } if title_hits: required = len(terms) for file_hash, info in title_hits.items(): if info.get("count") != required: continue file_path_str = info.get("path") if not file_path_str or file_path_str in seen_files: continue file_path = Path(file_path_str) if not file_path.exists(): continue seen_files.add(file_path_str) size_bytes = info.get("size") if size_bytes is None: try: size_bytes = file_path.stat().st_size except OSError: size_bytes = None tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, info.get("hash")) results.append(entry) if limit is not None and len(results) >= limit: return results query_pattern = f"%{query_lower}%" tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit) for file_hash, file_path_str, size_bytes, ext in tag_rows: if not file_path_str or file_path_str in seen_files: continue seen_files.add(file_path_str) file_path = Path(file_path_str) if file_path.exists(): if size_bytes is None: size_bytes = file_path.stat().st_size tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) results.append(entry) if limit is not None and len(results) >= limit: return results else: rows = api.get_all_files(limit) for file_hash, file_path_str, size_bytes, ext in rows: if file_path_str: file_path = Path(file_path_str) if file_path.exists(): if size_bytes is None: size_bytes = file_path.stat().st_size tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) results.append(entry) if results: debug(f"Returning {len(results)} results from DB") else: debug("No results found in DB") return results except Exception as e: log(f"⚠️ Database search failed: {e}", file=sys.stderr) debug(f"DB search exception details: {e}") return [] except Exception as exc: log(f"❌ Local search failed: {exc}", file=sys.stderr) raise def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Alias for search_file to match the interface expected by FileStorage.""" return self.search_file(query, **kwargs) def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]: """Return the library root containing medios-macina.db. Prefer the store's configured location, then config override, then walk parents of the file path to find a directory with medios-macina.db.""" candidates: list[Path] = [] if self._location: candidates.append(Path(self._location).expanduser()) cfg_root = get_local_storage_path(config) if config else None if cfg_root: candidates.append(Path(cfg_root).expanduser()) for root in candidates: db_path = root / "medios-macina.db" if db_path.exists(): return root try: for parent in [file_path] + list(file_path.parents): db_path = parent / "medios-macina.db" if db_path.exists(): return parent except Exception: pass return None def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: """Retrieve file by hash, returning path to the file. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Path to the file or None if not found """ try: # Normalize the hash normalized_hash = _normalize_hex_hash(file_hash) if not normalized_hash: return None search_dir = Path(self._location).expanduser() from helper.folder_store import FolderDB with FolderDB(search_dir) as db: # Search for file by hash file_path = db.search_hash(normalized_hash) if file_path and file_path.exists(): return file_path return None except Exception as exc: debug(f"Failed to get file for hash {file_hash}: {exc}") return None def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: """Get a playable path for a file in this folder store. For folder stores, this resolves the hash to the actual file path on disk. Args: file_hash: SHA256 hash of the file config: Optional config dict (unused for folder stores) Returns: Absolute file path as string, or None if file not found """ file_path = self.get_file(file_hash) if file_path: return str(file_path.absolute()) return None def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]: """Get metadata for a file from the database by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found """ try: # Normalize the hash normalized_hash = _normalize_hex_hash(file_hash) if not normalized_hash: return None search_dir = Path(self._location).expanduser() from helper.folder_store import DatabaseAPI with DatabaseAPI(search_dir) as api: # Get file hash file_hash_result = api.get_file_hash_by_hash(normalized_hash) if not file_hash_result: return None # Query metadata directly from database cursor = api.get_cursor() cursor.execute(""" SELECT * FROM metadata WHERE hash = ? """, (file_hash_result,)) row = cursor.fetchone() if not row: return None metadata = dict(row) # Parse JSON fields for field in ['url', 'relationships']: if metadata.get(field): try: import json metadata[field] = json.loads(metadata[field]) except (json.JSONDecodeError, TypeError): metadata[field] = [] if field == 'url' else [] return metadata except Exception as exc: debug(f"Failed to get metadata for hash {file_hash}: {exc}") return None def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """Get tags for a local file by hash. Returns: Tuple of (tags_list, store_name) where store_name is the actual store name """ from helper.folder_store import FolderDB try: file_hash = file_identifier if self._location: try: with FolderDB(Path(self._location)) as db: db_tags = db.get_tags(file_hash) if db_tags: # Return actual store name instead of generic "local_db" store_name = self._name if self._name else "local" return list(db_tags), store_name except Exception as exc: debug(f"Local DB lookup failed: {exc}") return [], "unknown" except Exception as exc: debug(f"get_tags failed for local file: {exc}") return [], "unknown" def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool: """Add tags to a local file by hash (via FolderDB). Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags. Returns True if tags were successfully added. """ from helper.folder_store import FolderDB try: if not self._location: return False try: with FolderDB(Path(self._location)) as db: # Get existing tags existing_tags = list(db.get_tags(hash) or []) original_tags_lower = {t.lower() for t in existing_tags} # Merge new tags, handling namespace overwrites for new_tag in tag: if ':' in new_tag: namespace = new_tag.split(':', 1)[0] # Remove existing tags in same namespace existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')] # Add new tag if not already present (case-insensitive check) if new_tag.lower() not in original_tags_lower: existing_tags.append(new_tag) # Save merged tags db.add_tags_to_hash(hash, existing_tags) return True except Exception as exc: debug(f"Local DB add_tags failed: {exc}") return False except Exception as exc: debug(f"add_tag failed for local file: {exc}") return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Remove tags from a local file by hash.""" from helper.folder_store import FolderDB try: file_hash = file_identifier if self._location: try: with FolderDB(Path(self._location)) as db: db.remove_tags_from_hash(file_hash, list(tags)) return True except Exception as exc: debug(f"Local DB remove_tags failed: {exc}") return False except Exception as exc: debug(f"delete_tag failed for local file: {exc}") return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """Get known url for a local file by hash.""" from helper.folder_store import FolderDB try: file_hash = file_identifier if self._location: try: with FolderDB(Path(self._location)) as db: meta = db.get_metadata(file_hash) or {} return list(meta.get("url") or []) except Exception as exc: debug(f"Local DB get_metadata failed: {exc}") return [] except Exception as exc: debug(f"get_url failed for local file: {exc}") return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Add known url to a local file by hash.""" from helper.folder_store import FolderDB try: file_hash = file_identifier if self._location: try: with FolderDB(Path(self._location)) as db: meta = db.get_metadata(file_hash) or {} url = list(meta.get("url") or []) changed = False for u in url: if u not in url: url.append(u) changed = True if changed: db.update_metadata_by_hash(file_hash, {"url": url}) return True except Exception as exc: debug(f"Local DB add_url failed: {exc}") return False except Exception as exc: debug(f"add_url failed for local file: {exc}") return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete known url from a local file by hash.""" from helper.folder_store import FolderDB try: file_hash = file_identifier if self._location: try: with FolderDB(Path(self._location)) as db: meta = db.get_metadata(file_hash) or {} url = list(meta.get("url") or []) changed = False for u in url: if u in url: url.remove(u) changed = True if changed: db.update_metadata_by_hash(file_hash, {"url": url}) return True except Exception as exc: debug(f"Local DB delete_url failed: {exc}") return False except Exception as exc: debug(f"delete_url failed for local file: {exc}") return False def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: """Delete a file from the folder store. Args: file_identifier: The file path (as string) or hash of the file to delete **kwargs: Optional parameters Returns: True if deletion succeeded, False otherwise """ from helper.folder_store import FolderDB try: file_path = Path(file_identifier) # Delete from database with FolderDB(Path(self._location)) as db: db.delete_file(file_path) # Delete the actual file from disk if file_path.exists(): file_path.unlink() debug(f"Deleted file: {file_path}") return True else: debug(f"File not found on disk: {file_path}") return True # Already gone except Exception as exc: debug(f"delete_file failed: {exc}") return False class HydrusNetwork(store): """File storage backend for Hydrus client. Each instance represents a specific Hydrus client connection. Maintains its own HydrusClient with session key. """ def __init__(self, instance_name: str, api_key: str, url: str) -> None: """Initialize Hydrus storage backend. Args: instance_name: Name of this Hydrus instance (e.g., 'home', 'work') api_key: Hydrus Client API access key url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') """ from helper import hydrus as hydrus_wrapper self._instance_name = instance_name self._api_key = api_key self._url = url # Create persistent client with session key for this instance self._client = hydrus_wrapper.HydrusClient(url=url, access_key=api_key) def name(self) -> str: return self._instance_name def get_name(self) -> str: return self._instance_name def add_file(self, file_path: Path, **kwargs: Any) -> str: """Upload file to Hydrus with full metadata support. Args: file_path: Path to the file to upload tags: Optional list of tags to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) Returns: File hash from Hydrus Raises: Exception: If upload fails """ from helper import hydrus as hydrus_wrapper from helper.utils import sha256_file tags = kwargs.get("tags", []) url = kwargs.get("url", []) title = kwargs.get("title") # Add title to tags if provided and not already present if title: title_tag = f"title:{title}" if not any(str(tag).lower().startswith("title:") for tag in tags): tags = [title_tag] + list(tags) try: # Compute file hash file_hash = sha256_file(file_path) debug(f"File hash: {file_hash}") # Use persistent client with session key client = self._client if client is None: raise Exception("Hydrus client unavailable") # Check if file already exists in Hydrus file_exists = False try: metadata = client.fetch_file_metadata(hashes=[file_hash]) if metadata and isinstance(metadata, dict): files = metadata.get("file_metadata", []) if files: file_exists = True log( f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", file=sys.stderr, ) except Exception: pass # Upload file if not already present if not file_exists: log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) response = client.add_file(file_path) # Extract hash from response hydrus_hash: Optional[str] = None if isinstance(response, dict): hydrus_hash = response.get("hash") or response.get("file_hash") if not hydrus_hash: hashes = response.get("hashes") if isinstance(hashes, list) and hashes: hydrus_hash = hashes[0] if not hydrus_hash: raise Exception(f"Hydrus response missing file hash: {response}") file_hash = hydrus_hash log(f"Hydrus: {file_hash}", file=sys.stderr) # Add tags if provided (both for new and existing files) if tags: try: # Use default tag service service_name = "my tags" except Exception: service_name = "my tags" try: debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") client.add_tags(file_hash, tags, service_name) log(f"Tags added via '{service_name}'", file=sys.stderr) except Exception as exc: log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) # Associate url if provided (both for new and existing files) if url: log(f"Associating {len(url)} URL(s) with file", file=sys.stderr) for url in url: if url: try: client.associate_url(file_hash, str(url)) debug(f"Associated URL: {url}") except Exception as exc: log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr) return file_hash except Exception as exc: log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) raise def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search Hydrus database for files matching query. Args: query: Search query (tags, filenames, hashes, etc.) limit: Maximum number of results to return (default: 100) Returns: List of dicts with 'name', 'hash', 'size', 'tags' fields Example: results = storage["hydrus"].search("artist:john_doe music") results = storage["hydrus"].search("Simple Man") """ limit = kwargs.get("limit", 100) try: client = self._client if client is None: raise Exception("Hydrus client unavailable") debug(f"Searching Hydrus for: {query}") # Parse the query into tags # Handle both simple tags and complex queries # "*" means "match all" - use system:everything tag in Hydrus if query.strip() == "*": # Use system:everything to match all files in Hydrus tags = ["system:everything"] else: query_lower = query.lower().strip() # If query doesn't have a namespace (no ':'), search all files and filter by title/tags # If query has explicit namespace, use it as a tag search if ':' not in query_lower: # No namespace provided: search all files, then filter by title/tags containing the query tags = ["system:everything"] else: # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") # Use it as a tag search tags = [query_lower] if not tags: debug(f"Found 0 result(s)") return [] # Search files with the tags search_result = client.search_files( tags=tags, return_hashes=True, return_file_ids=True ) # Extract file IDs from search result file_ids = search_result.get("file_ids", []) hashes = search_result.get("hashes", []) if not file_ids and not hashes: debug(f"Found 0 result(s)") return [] # Fetch metadata for the found files results = [] query_lower = query.lower().strip() # Split by comma or space for AND logic search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching if file_ids: metadata = client.fetch_file_metadata(file_ids=file_ids) metadata_list = metadata.get("metadata", []) for meta in metadata_list: if len(results) >= limit: break file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) # Get tags for this file and extract title tags_set = meta.get("tags", {}) all_tags = [] title = f"Hydrus File {file_id}" # Default fallback all_tags_str = "" # For substring matching # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") if isinstance(tags_set, dict): # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen def _collect(tag_list: Any) -> None: nonlocal title, all_tags_str if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue all_tags.append(tag_text) all_tags_str += " " + tag_text.lower() if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": title = tag_text.split(":", 1)[1].strip() for service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get("storage_tags", {}) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get("display_tags", []) _collect(display_tags) # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) _collect(top_level_tags) # Resolve extension from MIME type mime_type = meta.get("mime") ext = "" if mime_type: for category in mime_maps.values(): for ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = info.get("ext", "").lstrip('.') break if ext: break # Filter results based on query type # If user provided explicit namespace (has ':'), don't do substring filtering # Just include what the tag search returned has_namespace = ':' in query_lower if has_namespace: # Explicit namespace search - already filtered by Hydrus tag search # Include this result as-is results.append({ "hash": hash_hex, "hash_hex": hash_hex, "target": hash_hex, "name": title, "title": title, "size": size, "size_bytes": size, "origin": self._instance_name, "tags": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, }) else: # Free-form search: check if search terms match the title or tags # Match if ALL search terms are found in title or tags (AND logic) # AND use whole word matching # Combine title and tags for searching searchable_text = (title + " " + all_tags_str).lower() match = True if query_lower != "*": for term in search_terms: # Regex for whole word: \bterm\b # Escape term to handle special chars pattern = r'\b' + re.escape(term) + r'\b' if not re.search(pattern, searchable_text): match = False break if match: results.append({ "hash": hash_hex, "hash_hex": hash_hex, "target": hash_hex, "name": title, "title": title, "size": size, "size_bytes": size, "origin": self._instance_name, "tags": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, }) debug(f"Found {len(results)} result(s)") return results[:limit] except Exception as exc: log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) raise def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: """Open file in browser via Hydrus client API URL.""" import tempfile import webbrowser debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") # Build browser URL with access key base_url = self._client.url.rstrip('/') access_key = self._client.access_key browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") # Open in default browser webbrowser.open(browser_url) debug(f"[HydrusNetwork.get_file] Browser opened successfully") # Return the URL string instead of downloading debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") return browser_url def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: """Get a playable path for a file in this Hydrus instance. For Hydrus stores, this builds a file URL with authentication. Args: file_hash: SHA256 hash of the file config: Optional config dict (unused, URL and key are from instance) Returns: Hydrus API file URL with embedded access key, or None if client unavailable """ try: if not self._client: return None base_url = self._client.url.rstrip('/') access_key = self._client.access_key # Build Hydrus file URL with access key url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" return url except Exception as e: debug(f"Error building Hydrus URL for {file_hash}: {e}") return None def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: """Get metadata for a file from Hydrus by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields or None if not found """ try: client = self._client if not client: debug("get_metadata: Hydrus client unavailable") return None # Fetch file metadata payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) if not payload or not payload.get("metadata"): return None meta = payload["metadata"][0] # Extract title from tags title = f"Hydrus_{file_hash[:12]}" tags_payload = meta.get("tags", {}) if isinstance(tags_payload, dict): for service_data in tags_payload.values(): if isinstance(service_data, dict): display_tags = service_data.get("display_tags", {}) if isinstance(display_tags, dict): current_tags = display_tags.get("0", []) if isinstance(current_tags, list): for tag in current_tags: if str(tag).lower().startswith("title:"): title = tag.split(":", 1)[1].strip() break if title != f"Hydrus_{file_hash[:12]}": break # Determine extension from mime type mime_type = meta.get("mime", "") ext = "" if mime_type: from helper.utils_constant import mime_maps for category, extensions in mime_maps.items(): for extension, mime in extensions.items(): if mime == mime_type: ext = extension.lstrip(".") break if ext: break return { "hash": file_hash, "title": title, "ext": ext, "size": meta.get("size", 0), "mime": mime_type, } except Exception as exc: debug(f"Failed to get metadata from Hydrus: {exc}") return None def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """Get tags for a file from Hydrus by hash. Args: file_identifier: File hash (SHA256 hex string) **kwargs: Optional service_name parameter Returns: Tuple of (tags_list, source_description) where source is always "hydrus" """ try: file_hash = str(file_identifier) # Get Hydrus client and service info client = self._client if not client: debug("get_tags: Hydrus client unavailable") return [], "unknown" # Fetch file metadata payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=False ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: debug(f"get_tags: No metadata returned for hash {file_hash}") return [], "unknown" meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict) or meta.get("file_id") is None: debug(f"get_tags: Invalid metadata for hash {file_hash}") return [], "unknown" # Extract tags using service name service_name = "my tags" service_key = hydrus_wrapper.get_tag_service_key(client, service_name) # Extract tags from metadata tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) return tags, "hydrus" except Exception as exc: debug(f"get_tags failed for Hydrus file: {exc}") return [], "unknown" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Add tags to a Hydrus file. """ try: client = self._client if client is None: debug("add_tag: Hydrus client unavailable") return False service_name = kwargs.get("service_name") or "my tags" # Ensure tags is a list tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False client.add_tags(file_identifier, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus add_tag failed: {exc}") return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Delete tags from a Hydrus file. """ try: client = self._client if client is None: debug("delete_tag: Hydrus client unavailable") return False service_name = kwargs.get("service_name") or "my tags" tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False client.delete_tags(file_identifier, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus delete_tag failed: {exc}") return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """Get known url for a Hydrus file. """ try: client = self._client if client is None: debug("get_url: Hydrus client unavailable") return [] payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] meta = items[0] url = meta.get("url") or [] return list(url) except Exception as exc: debug(f"Hydrus get_url failed: {exc}") return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Associate one or more url with a Hydrus file. """ try: client = self._client if client is None: debug("add_url: Hydrus client unavailable") return False for u in url: client.associate_url(file_identifier, u) return True except Exception as exc: debug(f"Hydrus add_url failed: {exc}") return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete one or more url from a Hydrus file. """ try: client = self._client if client is None: debug("delete_url: Hydrus client unavailable") return False for u in url: client.delete_url(file_identifier, u) return True except Exception as exc: debug(f"Hydrus delete_url failed: {exc}") return False @staticmethod def _extract_tags_from_hydrus_meta( meta: Dict[str, Any], service_key: Optional[str], service_name: str ) -> List[str]: """Extract current tags from Hydrus metadata dict. Prefers display_tags (includes siblings/parents, excludes deleted). Falls back to storage_tags status '0' (current). """ tags_payload = meta.get("tags") if not isinstance(tags_payload, dict): return [] svc_data = None if service_key: svc_data = tags_payload.get(service_key) if not isinstance(svc_data, dict): return [] # Prefer display_tags (Hydrus computes siblings/parents) display = svc_data.get("display_tags") if isinstance(display, list) and display: return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] # Fallback to storage_tags status '0' (current) storage = svc_data.get("storage_tags") if isinstance(storage, dict): current_list = storage.get("0") or storage.get(0) if isinstance(current_list, list): return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] return [] class MatrixStorageBackend(store): """File storage backend for Matrix (Element) chat rooms.""" def get_name(self) -> str: return "matrix" def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]: """List joined rooms with their names.""" matrix_conf = config.get('storage', {}).get('matrix', {}) homeserver = matrix_conf.get('homeserver') access_token = matrix_conf.get('access_token') if not homeserver or not access_token: return [] if not homeserver.startswith('http'): homeserver = f"https://{homeserver}" headers = {"Authorization": f"Bearer {access_token}"} try: # Get joined rooms resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10) if resp.status_code != 200: return [] room_ids = resp.json().get('joined_rooms', []) rooms = [] for rid in room_ids: # Try to get room name name = "Unknown Room" try: # Get state event for name name_resp = requests.get( f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name", headers=headers, timeout=2 ) if name_resp.status_code == 200: name = name_resp.json().get('name', name) else: # Try canonical alias alias_resp = requests.get( f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias", headers=headers, timeout=2 ) if alias_resp.status_code == 200: name = alias_resp.json().get('alias', name) except Exception: pass rooms.append({'id': rid, 'name': name}) return rooms except Exception as e: log(f"Error listing Matrix rooms: {e}", file=sys.stderr) return [] def upload(self, file_path: Path, **kwargs: Any) -> str: """Upload file to Matrix room. Requires 'config' in kwargs with 'storage.matrix' settings: - homeserver: URL of homeserver (e.g. https://matrix.org) - user_id: User ID (e.g. @user:matrix.org) - access_token: Access token (preferred) OR password - room_id: Room ID to upload to (e.g. !roomid:matrix.org) """ config = kwargs.get('config', {}) if not config: raise ValueError("Config required for Matrix upload") matrix_conf = config.get('storage', {}).get('matrix', {}) if not matrix_conf: raise ValueError("Matrix storage not configured in config.json") homeserver = matrix_conf.get('homeserver') # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token access_token = matrix_conf.get('access_token') room_id = matrix_conf.get('room_id') if not homeserver: raise ValueError("Matrix homeserver required") # Ensure homeserver has protocol if not homeserver.startswith('http'): homeserver = f"https://{homeserver}" # Login if no access token (optional implementation, for now assume token) if not access_token: raise ValueError("Matrix access_token required (login not yet implemented)") # Handle room selection if not provided if not room_id: log("No room_id configured. Fetching joined rooms...", file=sys.stderr) rooms = self.list_rooms(config) if not rooms: raise ValueError("No joined rooms found or failed to fetch rooms.") from result_table import ResultTable table = ResultTable("Matrix Rooms") for i, room in enumerate(rooms): row = table.add_row() row.add_column("#", str(i + 1)) row.add_column("Name", room['name']) row.add_column("ID", room['id']) print(table) # Simple interactive selection try: selection = input("Select room # to upload to: ") idx = int(selection) - 1 if 0 <= idx < len(rooms): room_id = rooms[idx]['id'] log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr) else: raise ValueError("Invalid selection") except Exception: raise ValueError("Invalid room selection") if not room_id: raise ValueError("Matrix room_id required") # 1. Upload Media upload_url = f"{homeserver}/_matrix/media/r3/upload" headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/octet-stream" # Or guess mime type } import mimetypes mime_type, _ = mimetypes.guess_type(file_path) if mime_type: headers["Content-Type"] = mime_type filename = file_path.name try: with open(file_path, 'rb') as f: resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) if resp.status_code != 200: raise Exception(f"Matrix upload failed: {resp.text}") content_uri = resp.json().get('content_uri') if not content_uri: raise Exception("No content_uri returned from Matrix upload") # 2. Send Message send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message" # Determine msgtype msgtype = "m.file" if mime_type: if mime_type.startswith("image/"): msgtype = "m.image" elif mime_type.startswith("video/"): msgtype = "m.video" elif mime_type.startswith("audio/"): msgtype = "m.audio" payload = { "msgtype": msgtype, "body": filename, "url": content_uri, "info": { "mimetype": mime_type, "size": file_path.stat().st_size } } resp = requests.post(send_url, headers=headers, json=payload) if resp.status_code != 200: raise Exception(f"Matrix send message failed: {resp.text}") event_id = resp.json().get('event_id') return f"matrix://{room_id}/{event_id}" except Exception as e: log(f"❌ Matrix upload error: {e}", file=sys.stderr) raise # --- Not supported for Matrix: tagging & URL operations (return safe defaults) --- def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: return [], "matrix" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: return False class RemoteStorageBackend(store): """File storage backend for remote Android/network storage servers. Connects to a remote storage server (e.g., running on Android phone) via REST API. All operations are proxied to the remote server. """ def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None: """Initialize remote storage backend. Args: server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000) timeout: Request timeout in seconds api_key: Optional API key for authentication """ try: import requests except ImportError: raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests") self.server_url = server_url.rstrip('/') self.timeout = timeout self.api_key = api_key self._session = requests.Session() # Add API key to default headers if provided if self.api_key: self._session.headers.update({'X-API-Key': self.api_key}) def get_name(self) -> str: return "remote" def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]: """Make HTTP request to remote server.""" import requests from urllib.parse import urljoin url = urljoin(self.server_url, endpoint) try: response = self._session.request( method, url, timeout=self.timeout, **kwargs ) if response.status_code == 404: raise Exception(f"Remote resource not found: {endpoint}") if response.status_code >= 400: try: error_data = response.json() error_msg = error_data.get('error', response.text) except: error_msg = response.text raise Exception(f"Remote server error {response.status_code}: {error_msg}") return response.json() except requests.exceptions.RequestException as e: raise Exception(f"Connection to {self.server_url} failed: {e}") def upload(self, file_path: Path, **kwargs: Any) -> str: """Upload file to remote storage. Args: file_path: Path to the file to upload tags: Optional list of tags to add url: Optional list of known url Returns: Remote file hash """ from helper.utils import sha256_file if not file_path.exists(): raise ValueError(f"File not found: {file_path}") try: # Index the file on remote server data = {"path": str(file_path)} tags = kwargs.get("tags", []) if tags: data["tags"] = tags url = kwargs.get("url", []) if url: data["url"] = url result = self._request('POST', '/files/index', json=data) file_hash = result.get('hash') if file_hash: log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr) return file_hash else: raise Exception("Remote server did not return file hash") except Exception as exc: debug(f"Remote upload failed: {exc}", file=sys.stderr) raise # Tag and URL operations - Remote server default: not supported def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: return [], "remote" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: return False def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search files on remote storage. Args: query: Search query limit: Maximum results Returns: List of search results """ limit = kwargs.get("limit") try: limit = int(limit) if limit is not None else 100 except (TypeError, ValueError): limit = 100 if limit <= 0: limit = 100 try: response = self._request('GET', '/files/search', params={ 'q': query, 'limit': limit }) files = response.get('files', []) # Transform remote format to standard result format results = [] for f in files: results.append({ "name": f.get('name', '').split('/')[-1], # Get filename from path "title": f.get('name', f.get('path', '')).split('/')[-1], "ext": f.get('ext', ''), "path": f.get('path', ''), "target": f.get('path', ''), "hash": f.get('hash', ''), "origin": "remote", "size": f.get('size', 0), "size_bytes": f.get('size', 0), "tags": f.get('tags', []), }) debug(f"Remote search found {len(results)} results", file=sys.stderr) return results except Exception as exc: log(f"❌ Remote search failed: {exc}", file=sys.stderr) raise class FileStorage: """Unified file storage interface supporting multiple backend instances. Each backend type (folder, hydrusnetwork) can have multiple named instances. Access backends by their configured names. Config structure: { "store": { "folder": { "default": {"path": "C:\\Media Machina"}, "test": {"path": "C:\\Users\\Admin\\Downloads\\Video"} }, "hydrusnetwork": { "home": { "Hydrus-Client-API-Access-Key": "d4321f...", "url": "http://192.168.1.230:45869" }, "work": { "Hydrus-Client-API-Access-Key": "abc123...", "url": "http://192.168.1.100:45869" } } } } Example: storage = FileStorage(config) # Upload to different named instances hash1 = storage["test"].add_file(Path("file.mp3"), tags=["music"]) hash2 = storage["home"].add_file(Path("file.mp3"), tags=["music"]) hash3 = storage["work"].add_file(Path("file.mp3"), tags=["music"]) # Search across different instances results = storage["home"].search("music") results = storage["test"].search("song") """ def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None: """Initialize the file storage system with available backends. Args: config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.) suppress_debug: If True, suppress debug output during initialization (useful for autocomplete) """ self.suppress_debug = suppress_debug config = config or {} # Extract backend-specific settings from config from config import get_local_storage_path local_path = get_local_storage_path(config) local_path_str = str(local_path) if local_path else None self._backends: Dict[str, store] = {} # Build folder stores from config (support both 'storage' and legacy 'store' top-level keys) folder_sources = None cfg_storage = config.get("storage") or config.get("store") or {} if isinstance(cfg_storage, dict): val = cfg_storage.get("folder") if isinstance(val, dict): folder_sources = val # If folder sources provided, create backends for each entry if folder_sources: # Normalize into name -> path mapping folder_map: Dict[str, str] = {} for key, value in folder_sources.items(): if isinstance(value, dict): path_val = value.get("path") elif isinstance(value, (str, bytes)): path_val = str(value) else: path_val = None if path_val: folder_map[str(key)] = str(Path(path_val).expanduser()) # Register all folder stores by their explicit names from config for name, path in folder_map.items(): self._backends[name] = Folder(location=path, name=name) else: # Fallback: use legacy single local path if present if local_path_str: self._backends["default"] = Folder(location=local_path_str, name="default") # Matrix (chat room) acts as a provider, not a persistent storage backend. # We no longer register Matrix as a storage backend here; providers should be separate classes. # Build HydrusNetwork backends from config['store']['hydrusnetwork'] # Register all instances regardless of current connectivity - connection errors # will be caught when actually trying to use the backend hydrus_sources = cfg_storage.get("hydrusnetwork") if isinstance(hydrus_sources, dict): for instance_name, instance_config in hydrus_sources.items(): if isinstance(instance_config, dict): api_key = instance_config.get("Hydrus-Client-API-Access-Key") url = instance_config.get("url") # Skip if missing credentials - don't register instances without full config if not api_key or not url: continue # Register the instance - connection will be tested when actually used try: self._backends[instance_name] = HydrusNetwork( instance_name=instance_name, api_key=api_key, url=url ) if not self.suppress_debug: debug(f"[FileStorage] Registered Hydrus instance '{instance_name}': {url}") except Exception as e: if not self.suppress_debug: debug(f"[FileStorage] Failed to register Hydrus instance '{instance_name}': {e}") continue # Include remote storage backends from config (for Android/network servers) remote_storages = config.get("remote_storages", []) if isinstance(remote_storages, list): for remote_config in remote_storages: if isinstance(remote_config, dict): name = remote_config.get("name", "remote") url = remote_config.get("url") timeout = remote_config.get("timeout", 30) api_key = remote_config.get("api_key") if url: try: backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key) self._backends[name] = backend auth_status = " (with auth)" if api_key else " (no auth)" log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) except Exception as e: log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) def list_backends(self) -> list[str]: """Return available backend keys for autocomplete and validation.""" return sorted(self._backends.keys()) def list_searchable_backends(self) -> list[str]: """Return backend names that support searching.""" searchable = [] for name, backend in self._backends.items(): if callable(getattr(backend, 'search', None)): searchable.append(name) return sorted(searchable) def __getitem__(self, backend_name: str) -> store: """Get a storage backend by name. Args: backend_name: Name of the backend ('0x0', 'local', 'hydrus') Returns: StorageBackend instance Raises: KeyError: If backend not found """ if backend_name not in self._backends: raise KeyError( f"Unknown storage backend: {backend_name}. " f"Available: {list(self._backends.keys())}" ) return self._backends[backend_name] def register(self, backend: store) -> None: """Register a custom storage backend. Args: backend: StorageBackend instance to register """ name = backend.get_name() self._backends[name] = backend log(f"Registered storage backend: {name}", file=sys.stderr) def is_available(self, backend_name: str) -> bool: """Check if a backend is available. Args: backend_name: Name of the backend Returns: True if backend is registered """ return backend_name in self._backends def list_searchable_backends(self) -> list[str]: """Get list of backends that support searching. Returns: List of searchable backend names """ return [ name for name, backend in self._backends.items() if callable(getattr(backend, 'search', None)) or callable(getattr(backend, 'search_file', None)) ] # --- remaining FileStorage methods ---