2269 lines
95 KiB
Python
2269 lines
95 KiB
Python
|
|
"""File storage abstraction layer for uploading files to different services.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
from abc import ABC, abstractmethod
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import Any, Dict, Optional, Tuple, List
|
|||
|
|
import sys
|
|||
|
|
import shutil
|
|||
|
|
import requests
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
from helper.logger import log, debug
|
|||
|
|
from helper.utils_constant import mime_maps
|
|||
|
|
from helper.utils import sha256_file
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
from config import get_local_storage_path
|
|||
|
|
|
|||
|
|
|
|||
|
|
HEX_DIGITS = set("0123456789abcdef")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _normalize_hex_hash(value: Optional[str]) -> Optional[str]:
|
|||
|
|
"""Return a normalized 64-character lowercase hash or None."""
|
|||
|
|
if value is None:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS)
|
|||
|
|
except Exception:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
if len(cleaned) == 64:
|
|||
|
|
return cleaned
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]:
|
|||
|
|
"""Return the given hash if valid, otherwise compute sha256 from disk."""
|
|||
|
|
normalized = _normalize_hex_hash(candidate)
|
|||
|
|
if normalized is not None:
|
|||
|
|
return normalized
|
|||
|
|
|
|||
|
|
if not path.exists():
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
return sha256_file(path)
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to compute hash for {path}: {exc}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
class store(ABC):
|
|||
|
|
""""""
|
|||
|
|
@abstractmethod
|
|||
|
|
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
|||
|
|
""""""
|
|||
|
|
@abstractmethod
|
|||
|
|
def name(self) -> str:
|
|||
|
|
""""""
|
|||
|
|
def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support searching")
|
|||
|
|
|
|||
|
|
@abstractmethod
|
|||
|
|
def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
|
|||
|
|
"""Retrieve file by hash, returning path to the file.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Path to the file or None if not found
|
|||
|
|
"""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support get_file")
|
|||
|
|
|
|||
|
|
@abstractmethod
|
|||
|
|
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|||
|
|
"""Get metadata for a file by hash.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Dict with metadata fields or None if not found
|
|||
|
|
"""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support get_metadata")
|
|||
|
|
|
|||
|
|
@abstractmethod
|
|||
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support get_tags")
|
|||
|
|
@abstractmethod
|
|||
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support add_tag")
|
|||
|
|
@abstractmethod
|
|||
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support delete_tag")
|
|||
|
|
@abstractmethod
|
|||
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support get_url")
|
|||
|
|
@abstractmethod
|
|||
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support add_url")
|
|||
|
|
@abstractmethod
|
|||
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
""""""
|
|||
|
|
raise NotImplementedError(f"{self.name()} backend does not support delete_url")
|
|||
|
|
class Folder(store):
|
|||
|
|
""""""
|
|||
|
|
# Track which locations have already been migrated to avoid repeated migrations
|
|||
|
|
_migrated_locations = set()
|
|||
|
|
|
|||
|
|
def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None:
|
|||
|
|
self._location = location
|
|||
|
|
self._name = name
|
|||
|
|
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
from pathlib import Path
|
|||
|
|
location_path = Path(self._location).expanduser()
|
|||
|
|
|
|||
|
|
# Use context manager to ensure connection is properly closed
|
|||
|
|
with FolderDB(location_path) as db:
|
|||
|
|
if db.connection:
|
|||
|
|
db.connection.commit()
|
|||
|
|
|
|||
|
|
# Call migration and discovery at startup
|
|||
|
|
Folder.migrate_location(self._location)
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to initialize database for '{name}': {exc}")
|
|||
|
|
|
|||
|
|
@classmethod
|
|||
|
|
def migrate_location(cls, location: Optional[str]) -> None:
|
|||
|
|
"""Migrate a location to hash-based storage (one-time operation, call explicitly at startup)."""
|
|||
|
|
if not location:
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
from pathlib import Path
|
|||
|
|
location_path = Path(location).expanduser()
|
|||
|
|
location_str = str(location_path)
|
|||
|
|
|
|||
|
|
# Only migrate once per location
|
|||
|
|
if location_str in cls._migrated_locations:
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
cls._migrated_locations.add(location_str)
|
|||
|
|
|
|||
|
|
# Create a temporary instance just to call the migration
|
|||
|
|
temp_instance = cls(location=location)
|
|||
|
|
temp_instance._migrate_to_hash_storage(location_path)
|
|||
|
|
|
|||
|
|
def _migrate_to_hash_storage(self, location_path: Path) -> None:
|
|||
|
|
"""Migrate existing files from filename-based to hash-based storage.
|
|||
|
|
|
|||
|
|
Checks for sidecars (.metadata, .tag) and imports them before renaming.
|
|||
|
|
Also ensures all files have a title: tag.
|
|||
|
|
"""
|
|||
|
|
from helper.folder_store import read_sidecar, write_sidecar, find_sidecar
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with FolderDB(location_path) as db:
|
|||
|
|
cursor = db.connection.cursor()
|
|||
|
|
|
|||
|
|
# First pass: migrate filename-based files and add title tags
|
|||
|
|
# Scan all files in the storage directory
|
|||
|
|
for file_path in sorted(location_path.iterdir()):
|
|||
|
|
if not file_path.is_file():
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Skip database files and sidecars
|
|||
|
|
if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'):
|
|||
|
|
continue
|
|||
|
|
# Also skip if the file ends with -shm or -wal (SQLite journal files)
|
|||
|
|
if file_path.name.endswith(('-shm', '-wal')):
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Check if filename is already a hash (without extension)
|
|||
|
|
if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()):
|
|||
|
|
continue # Already migrated, will process in second pass
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Compute file hash
|
|||
|
|
file_hash = sha256_file(file_path)
|
|||
|
|
# Preserve extension in the hash-based filename
|
|||
|
|
file_ext = file_path.suffix # e.g., '.mp4'
|
|||
|
|
hash_filename = file_hash + file_ext if file_ext else file_hash
|
|||
|
|
hash_path = location_path / hash_filename
|
|||
|
|
|
|||
|
|
# Check for sidecars and import them
|
|||
|
|
sidecar_path = find_sidecar(file_path)
|
|||
|
|
tags_to_add = []
|
|||
|
|
url_to_add = []
|
|||
|
|
has_title_tag = False
|
|||
|
|
|
|||
|
|
if sidecar_path and sidecar_path.exists():
|
|||
|
|
try:
|
|||
|
|
_, tags, url = read_sidecar(sidecar_path)
|
|||
|
|
if tags:
|
|||
|
|
tags_to_add = list(tags)
|
|||
|
|
# Check if title tag exists
|
|||
|
|
has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add)
|
|||
|
|
if url:
|
|||
|
|
url_to_add = list(url)
|
|||
|
|
debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr)
|
|||
|
|
# Delete the sidecar after importing
|
|||
|
|
sidecar_path.unlink()
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Ensure there's a title tag (use original filename if not present)
|
|||
|
|
if not has_title_tag:
|
|||
|
|
tags_to_add.append(f"title:{file_path.name}")
|
|||
|
|
|
|||
|
|
# Rename file to hash if needed
|
|||
|
|
if hash_path != file_path and not hash_path.exists():
|
|||
|
|
debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr)
|
|||
|
|
file_path.rename(hash_path)
|
|||
|
|
|
|||
|
|
# Create or update database entry using FolderDB methods
|
|||
|
|
db.get_or_create_file_entry(hash_path)
|
|||
|
|
|
|||
|
|
# Save extension metadata
|
|||
|
|
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
|||
|
|
db.save_metadata(hash_path, {
|
|||
|
|
'hash': file_hash,
|
|||
|
|
'ext': ext_clean,
|
|||
|
|
'size': hash_path.stat().st_size
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Add all tags (including title tag)
|
|||
|
|
if tags_to_add:
|
|||
|
|
db.save_tags(hash_path, tags_to_add)
|
|||
|
|
debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Note: url would need a separate table if you want to store them
|
|||
|
|
# For now, we're just noting them in debug
|
|||
|
|
if url_to_add:
|
|||
|
|
debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Second pass: ensure all files in database have a title: tag
|
|||
|
|
db.connection.commit()
|
|||
|
|
cursor.execute('''
|
|||
|
|
SELECT f.hash, f.file_path
|
|||
|
|
FROM files f
|
|||
|
|
WHERE NOT EXISTS (
|
|||
|
|
SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%'
|
|||
|
|
)
|
|||
|
|
''')
|
|||
|
|
files_without_title = cursor.fetchall()
|
|||
|
|
|
|||
|
|
for file_hash, file_path_str in files_without_title:
|
|||
|
|
try:
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if file_path.exists():
|
|||
|
|
# Use the filename as the title
|
|||
|
|
title_tag = f"title:{file_path.name}"
|
|||
|
|
db.save_tags(file_path, [title_tag])
|
|||
|
|
debug(f"Added title tag to {file_path.name}", file=sys.stderr)
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
db.connection.commit()
|
|||
|
|
|
|||
|
|
# Third pass: discover files on disk that aren't in the database yet
|
|||
|
|
# These are hash-named files that were added after initial indexing
|
|||
|
|
cursor.execute('SELECT LOWER(hash) FROM files')
|
|||
|
|
db_hashes = {row[0] for row in cursor.fetchall()}
|
|||
|
|
|
|||
|
|
discovered = 0
|
|||
|
|
for file_path in sorted(location_path.rglob("*")):
|
|||
|
|
if file_path.is_file():
|
|||
|
|
# Check if file name (without extension) is a 64-char hex hash
|
|||
|
|
name_without_ext = file_path.stem
|
|||
|
|
if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()):
|
|||
|
|
file_hash = name_without_ext.lower()
|
|||
|
|
|
|||
|
|
# Skip if already in DB
|
|||
|
|
if file_hash in db_hashes:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Add file to DB (creates entry and auto-adds title: tag)
|
|||
|
|
db.get_or_create_file_entry(file_path)
|
|||
|
|
|
|||
|
|
# Save extension metadata
|
|||
|
|
file_ext = file_path.suffix
|
|||
|
|
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
|||
|
|
db.save_metadata(file_path, {
|
|||
|
|
'hash': file_hash,
|
|||
|
|
'ext': ext_clean,
|
|||
|
|
'size': file_path.stat().st_size
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
discovered += 1
|
|||
|
|
except Exception as e:
|
|||
|
|
debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
if discovered > 0:
|
|||
|
|
debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr)
|
|||
|
|
db.connection.commit()
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Migration to hash storage failed: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def location(self) -> str:
|
|||
|
|
return self._location
|
|||
|
|
|
|||
|
|
def name(self) -> str:
|
|||
|
|
return self._name
|
|||
|
|
|
|||
|
|
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
|||
|
|
"""Add file to local folder storage with full metadata support.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_path: Path to the file to add
|
|||
|
|
move: If True, move file instead of copy (default: False)
|
|||
|
|
tags: Optional list of tags to add
|
|||
|
|
url: Optional list of url to associate with the file
|
|||
|
|
title: Optional title (will be added as 'title:value' tag)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
File hash (SHA256 hex string) as identifier
|
|||
|
|
"""
|
|||
|
|
move_file = bool(kwargs.get("move"))
|
|||
|
|
tags = kwargs.get("tags", [])
|
|||
|
|
url = kwargs.get("url", [])
|
|||
|
|
title = kwargs.get("title")
|
|||
|
|
|
|||
|
|
# Extract title from tags if not explicitly provided
|
|||
|
|
if not title:
|
|||
|
|
for tag in tags:
|
|||
|
|
if isinstance(tag, str) and tag.lower().startswith("title:"):
|
|||
|
|
title = tag.split(":", 1)[1].strip()
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# Fallback to filename if no title
|
|||
|
|
if not title:
|
|||
|
|
title = file_path.name
|
|||
|
|
|
|||
|
|
# Ensure title is in tags
|
|||
|
|
title_tag = f"title:{title}"
|
|||
|
|
if not any(str(tag).lower().startswith("title:") for tag in tags):
|
|||
|
|
tags = [title_tag] + list(tags)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
file_hash = sha256_file(file_path)
|
|||
|
|
debug(f"File hash: {file_hash}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Preserve extension in the stored filename
|
|||
|
|
file_ext = file_path.suffix # e.g., '.mp4'
|
|||
|
|
save_filename = file_hash + file_ext if file_ext else file_hash
|
|||
|
|
save_file = Path(self._location) / save_filename
|
|||
|
|
|
|||
|
|
# Check if file already exists
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
existing_path = db.search_hash(file_hash)
|
|||
|
|
if existing_path and existing_path.exists():
|
|||
|
|
log(
|
|||
|
|
f"✓ File already in local storage: {existing_path}",
|
|||
|
|
file=sys.stderr,
|
|||
|
|
)
|
|||
|
|
# Still add tags and url if provided
|
|||
|
|
if tags:
|
|||
|
|
self.add_tag(file_hash, tags)
|
|||
|
|
if url:
|
|||
|
|
self.add_url(file_hash, url)
|
|||
|
|
return file_hash
|
|||
|
|
|
|||
|
|
# Move or copy file
|
|||
|
|
if move_file:
|
|||
|
|
shutil.move(str(file_path), str(save_file))
|
|||
|
|
debug(f"Local move: {save_file}", file=sys.stderr)
|
|||
|
|
else:
|
|||
|
|
shutil.copy2(str(file_path), str(save_file))
|
|||
|
|
debug(f"Local copy: {save_file}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Save to database
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
db.get_or_create_file_entry(save_file)
|
|||
|
|
# Save metadata including extension
|
|||
|
|
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
|||
|
|
db.save_metadata(save_file, {
|
|||
|
|
'hash': file_hash,
|
|||
|
|
'ext': ext_clean,
|
|||
|
|
'size': file_path.stat().st_size
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# Add tags if provided
|
|||
|
|
if tags:
|
|||
|
|
self.add_tag(file_hash, tags)
|
|||
|
|
|
|||
|
|
# Add url if provided
|
|||
|
|
if url:
|
|||
|
|
self.add_url(file_hash, url)
|
|||
|
|
|
|||
|
|
log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr)
|
|||
|
|
return file_hash
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"❌ Local storage failed: {exc}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|||
|
|
"""Search local database for files by title tag or filename."""
|
|||
|
|
from fnmatch import fnmatch
|
|||
|
|
from helper.folder_store import DatabaseAPI
|
|||
|
|
|
|||
|
|
limit = kwargs.get("limit")
|
|||
|
|
try:
|
|||
|
|
limit = int(limit) if limit is not None else None
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
limit = None
|
|||
|
|
if isinstance(limit, int) and limit <= 0:
|
|||
|
|
limit = None
|
|||
|
|
|
|||
|
|
query = query.lower()
|
|||
|
|
query_lower = query # Ensure query_lower is defined for all code paths
|
|||
|
|
match_all = query == "*"
|
|||
|
|
results = []
|
|||
|
|
search_dir = Path(self._location).expanduser()
|
|||
|
|
|
|||
|
|
tokens = [t.strip() for t in query.split(',') if t.strip()]
|
|||
|
|
|
|||
|
|
if not match_all and len(tokens) == 1 and _normalize_hex_hash(query):
|
|||
|
|
debug("Hash queries require 'hash:' prefix for local search")
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
if not match_all and _normalize_hex_hash(query):
|
|||
|
|
debug("Hash queries require 'hash:' prefix for local search")
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
|
|||
|
|
path_str = str(file_path)
|
|||
|
|
# Get title from tags if available, otherwise use hash as fallback
|
|||
|
|
title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
|||
|
|
if not title:
|
|||
|
|
# Fallback to hash if no title tag exists
|
|||
|
|
hash_value = _resolve_file_hash(db_hash, file_path)
|
|||
|
|
title = hash_value if hash_value else file_path.stem
|
|||
|
|
|
|||
|
|
# Extract extension from file path
|
|||
|
|
ext = file_path.suffix.lstrip('.')
|
|||
|
|
if not ext:
|
|||
|
|
# Fallback: try to extract from title (original filename might be in title)
|
|||
|
|
title_path = Path(title)
|
|||
|
|
ext = title_path.suffix.lstrip('.')
|
|||
|
|
|
|||
|
|
# Build clean entry with only necessary fields
|
|||
|
|
hash_value = _resolve_file_hash(db_hash, file_path)
|
|||
|
|
entry = {
|
|||
|
|
"title": title,
|
|||
|
|
"ext": ext,
|
|||
|
|
"path": path_str,
|
|||
|
|
"target": path_str,
|
|||
|
|
"store": self._name,
|
|||
|
|
"size": size_bytes,
|
|||
|
|
"hash": hash_value,
|
|||
|
|
"tag": tags,
|
|||
|
|
}
|
|||
|
|
return entry
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
if not search_dir.exists():
|
|||
|
|
debug(f"Search directory does not exist: {search_dir}")
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with DatabaseAPI(search_dir) as api:
|
|||
|
|
if tokens and len(tokens) > 1:
|
|||
|
|
def _like_pattern(term: str) -> str:
|
|||
|
|
return term.replace('*', '%').replace('?', '_')
|
|||
|
|
|
|||
|
|
def _ids_for_token(token: str) -> set[int]:
|
|||
|
|
token = token.strip()
|
|||
|
|
if not token:
|
|||
|
|
return set()
|
|||
|
|
|
|||
|
|
if ':' in token and not token.startswith(':'):
|
|||
|
|
namespace, pattern = token.split(':', 1)
|
|||
|
|
namespace = namespace.strip().lower()
|
|||
|
|
pattern = pattern.strip().lower()
|
|||
|
|
|
|||
|
|
if namespace == 'hash':
|
|||
|
|
normalized_hash = _normalize_hex_hash(pattern)
|
|||
|
|
if not normalized_hash:
|
|||
|
|
return set()
|
|||
|
|
h = api.get_file_hash_by_hash(normalized_hash)
|
|||
|
|
return {h} if h else set()
|
|||
|
|
|
|||
|
|
if namespace == 'store':
|
|||
|
|
if pattern not in {'local', 'file', 'filesystem'}:
|
|||
|
|
return set()
|
|||
|
|
return api.get_all_file_hashes()
|
|||
|
|
|
|||
|
|
query_pattern = f"{namespace}:%"
|
|||
|
|
tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern)
|
|||
|
|
matched: set[str] = set()
|
|||
|
|
for file_hash, tag_val in tag_rows:
|
|||
|
|
if not tag_val:
|
|||
|
|
continue
|
|||
|
|
tag_lower = str(tag_val).lower()
|
|||
|
|
if not tag_lower.startswith(f"{namespace}:"):
|
|||
|
|
continue
|
|||
|
|
value = tag_lower[len(namespace)+1:]
|
|||
|
|
if fnmatch(value, pattern):
|
|||
|
|
matched.add(file_hash)
|
|||
|
|
return matched
|
|||
|
|
|
|||
|
|
term = token.lower()
|
|||
|
|
like_pattern = f"%{_like_pattern(term)}%"
|
|||
|
|
hashes = api.get_file_hashes_by_path_pattern(like_pattern)
|
|||
|
|
hashes.update(api.get_file_hashes_by_tag_substring(like_pattern))
|
|||
|
|
return hashes
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
matching_hashes: set[str] | None = None
|
|||
|
|
for token in tokens:
|
|||
|
|
hashes = _ids_for_token(token)
|
|||
|
|
matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes
|
|||
|
|
if not matching_hashes:
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
if not matching_hashes:
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
rows = api.get_file_metadata(matching_hashes, limit)
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in rows:
|
|||
|
|
if not file_path_str:
|
|||
|
|
continue
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if not file_path.exists():
|
|||
|
|
continue
|
|||
|
|
if size_bytes is None:
|
|||
|
|
try:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
except OSError:
|
|||
|
|
size_bytes = None
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
return results
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"⚠️ AND search failed: {exc}", file=sys.stderr)
|
|||
|
|
debug(f"AND search exception details: {exc}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
if ":" in query and not query.startswith(":"):
|
|||
|
|
namespace, pattern = query.split(":", 1)
|
|||
|
|
namespace = namespace.strip().lower()
|
|||
|
|
pattern = pattern.strip().lower()
|
|||
|
|
debug(f"Performing namespace search: {namespace}:{pattern}")
|
|||
|
|
|
|||
|
|
if namespace == "hash":
|
|||
|
|
normalized_hash = _normalize_hex_hash(pattern)
|
|||
|
|
if not normalized_hash:
|
|||
|
|
return results
|
|||
|
|
h = api.get_file_hash_by_hash(normalized_hash)
|
|||
|
|
hashes = {h} if h else set()
|
|||
|
|
rows = api.get_file_metadata(hashes, limit)
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in rows:
|
|||
|
|
if not file_path_str:
|
|||
|
|
continue
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if not file_path.exists():
|
|||
|
|
continue
|
|||
|
|
if size_bytes is None:
|
|||
|
|
try:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
except OSError:
|
|||
|
|
size_bytes = None
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
query_pattern = f"{namespace}:%"
|
|||
|
|
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
|
|||
|
|
debug(f"Found {len(rows)} potential matches in DB")
|
|||
|
|
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in rows:
|
|||
|
|
if not file_path_str:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern)
|
|||
|
|
|
|||
|
|
for tag in tags:
|
|||
|
|
tag_lower = tag.lower()
|
|||
|
|
if tag_lower.startswith(f"{namespace}:"):
|
|||
|
|
value = tag_lower[len(namespace)+1:]
|
|||
|
|
if fnmatch(value, pattern):
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if file_path.exists():
|
|||
|
|
if size_bytes is None:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
all_tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
else:
|
|||
|
|
debug(f"File missing on disk: {file_path}")
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
elif not match_all:
|
|||
|
|
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
|||
|
|
if not terms:
|
|||
|
|
terms = [query_lower]
|
|||
|
|
|
|||
|
|
debug(f"Performing filename/tag search for terms: {terms}")
|
|||
|
|
|
|||
|
|
fetch_limit = (limit or 45) * 50
|
|||
|
|
|
|||
|
|
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
|||
|
|
params = [f"%{t}%" for t in terms]
|
|||
|
|
|
|||
|
|
rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
|
|||
|
|
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
|||
|
|
|
|||
|
|
word_regex = None
|
|||
|
|
if len(terms) == 1:
|
|||
|
|
term = terms[0]
|
|||
|
|
has_wildcard = '*' in term or '?' in term
|
|||
|
|
|
|||
|
|
if has_wildcard:
|
|||
|
|
try:
|
|||
|
|
from fnmatch import translate
|
|||
|
|
word_regex = re.compile(translate(term), re.IGNORECASE)
|
|||
|
|
except Exception:
|
|||
|
|
word_regex = None
|
|||
|
|
else:
|
|||
|
|
try:
|
|||
|
|
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
|||
|
|
word_regex = re.compile(pattern, re.IGNORECASE)
|
|||
|
|
except Exception:
|
|||
|
|
word_regex = None
|
|||
|
|
|
|||
|
|
seen_files = set()
|
|||
|
|
for file_id, file_path_str, size_bytes, file_hash in rows:
|
|||
|
|
if not file_path_str or file_path_str in seen_files:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
if word_regex:
|
|||
|
|
p = Path(file_path_str)
|
|||
|
|
if not word_regex.search(p.name):
|
|||
|
|
continue
|
|||
|
|
seen_files.add(file_path_str)
|
|||
|
|
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if file_path.exists():
|
|||
|
|
if size_bytes is None:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
if terms:
|
|||
|
|
title_hits: dict[str, dict[str, Any]] = {}
|
|||
|
|
for term in terms:
|
|||
|
|
title_pattern = f"title:%{term}%"
|
|||
|
|
title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in title_rows:
|
|||
|
|
if not file_path_str:
|
|||
|
|
continue
|
|||
|
|
entry = title_hits.get(file_hash)
|
|||
|
|
if entry:
|
|||
|
|
entry["count"] += 1
|
|||
|
|
if size_bytes is not None:
|
|||
|
|
entry["size"] = size_bytes
|
|||
|
|
else:
|
|||
|
|
title_hits[file_hash] = {
|
|||
|
|
"path": file_path_str,
|
|||
|
|
"size": size_bytes,
|
|||
|
|
"hash": file_hash,
|
|||
|
|
"count": 1,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if title_hits:
|
|||
|
|
required = len(terms)
|
|||
|
|
for file_hash, info in title_hits.items():
|
|||
|
|
if info.get("count") != required:
|
|||
|
|
continue
|
|||
|
|
file_path_str = info.get("path")
|
|||
|
|
if not file_path_str or file_path_str in seen_files:
|
|||
|
|
continue
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if not file_path.exists():
|
|||
|
|
continue
|
|||
|
|
seen_files.add(file_path_str)
|
|||
|
|
|
|||
|
|
size_bytes = info.get("size")
|
|||
|
|
if size_bytes is None:
|
|||
|
|
try:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
except OSError:
|
|||
|
|
size_bytes = None
|
|||
|
|
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
|
|||
|
|
results.append(entry)
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
query_pattern = f"%{query_lower}%"
|
|||
|
|
tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
|
|||
|
|
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in tag_rows:
|
|||
|
|
if not file_path_str or file_path_str in seen_files:
|
|||
|
|
continue
|
|||
|
|
seen_files.add(file_path_str)
|
|||
|
|
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if file_path.exists():
|
|||
|
|
if size_bytes is None:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
|
|||
|
|
if limit is not None and len(results) >= limit:
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
else:
|
|||
|
|
rows = api.get_all_files(limit)
|
|||
|
|
for file_hash, file_path_str, size_bytes, ext in rows:
|
|||
|
|
if file_path_str:
|
|||
|
|
file_path = Path(file_path_str)
|
|||
|
|
if file_path.exists():
|
|||
|
|
if size_bytes is None:
|
|||
|
|
size_bytes = file_path.stat().st_size
|
|||
|
|
|
|||
|
|
tags = api.get_tags_for_file(file_hash)
|
|||
|
|
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
|||
|
|
results.append(entry)
|
|||
|
|
|
|||
|
|
if results:
|
|||
|
|
debug(f"Returning {len(results)} results from DB")
|
|||
|
|
else:
|
|||
|
|
debug("No results found in DB")
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
|
|||
|
|
debug(f"DB search exception details: {e}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"❌ Local search failed: {exc}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|||
|
|
"""Alias for search_file to match the interface expected by FileStorage."""
|
|||
|
|
return self.search_file(query, **kwargs)
|
|||
|
|
|
|||
|
|
def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]:
|
|||
|
|
"""Return the library root containing medios-macina.db.
|
|||
|
|
|
|||
|
|
Prefer the store's configured location, then config override, then walk parents
|
|||
|
|
of the file path to find a directory with medios-macina.db."""
|
|||
|
|
candidates: list[Path] = []
|
|||
|
|
if self._location:
|
|||
|
|
candidates.append(Path(self._location).expanduser())
|
|||
|
|
cfg_root = get_local_storage_path(config) if config else None
|
|||
|
|
if cfg_root:
|
|||
|
|
candidates.append(Path(cfg_root).expanduser())
|
|||
|
|
|
|||
|
|
for root in candidates:
|
|||
|
|
db_path = root / "medios-macina.db"
|
|||
|
|
if db_path.exists():
|
|||
|
|
return root
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
for parent in [file_path] + list(file_path.parents):
|
|||
|
|
db_path = parent / "medios-macina.db"
|
|||
|
|
if db_path.exists():
|
|||
|
|
return parent
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
|
|||
|
|
"""Retrieve file by hash, returning path to the file.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Path to the file or None if not found
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# Normalize the hash
|
|||
|
|
normalized_hash = _normalize_hex_hash(file_hash)
|
|||
|
|
if not normalized_hash:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
search_dir = Path(self._location).expanduser()
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
|
|||
|
|
with FolderDB(search_dir) as db:
|
|||
|
|
# Search for file by hash
|
|||
|
|
file_path = db.search_hash(normalized_hash)
|
|||
|
|
|
|||
|
|
if file_path and file_path.exists():
|
|||
|
|
return file_path
|
|||
|
|
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to get file for hash {file_hash}: {exc}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
|||
|
|
"""Get a playable path for a file in this folder store.
|
|||
|
|
|
|||
|
|
For folder stores, this resolves the hash to the actual file path on disk.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file
|
|||
|
|
config: Optional config dict (unused for folder stores)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Absolute file path as string, or None if file not found
|
|||
|
|
"""
|
|||
|
|
file_path = self.get_file(file_hash)
|
|||
|
|
if file_path:
|
|||
|
|
return str(file_path.absolute())
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
|||
|
|
"""Get metadata for a file from the database by hash.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# Normalize the hash
|
|||
|
|
normalized_hash = _normalize_hex_hash(file_hash)
|
|||
|
|
if not normalized_hash:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
search_dir = Path(self._location).expanduser()
|
|||
|
|
from helper.folder_store import DatabaseAPI
|
|||
|
|
|
|||
|
|
with DatabaseAPI(search_dir) as api:
|
|||
|
|
# Get file hash
|
|||
|
|
file_hash_result = api.get_file_hash_by_hash(normalized_hash)
|
|||
|
|
if not file_hash_result:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# Query metadata directly from database
|
|||
|
|
cursor = api.get_cursor()
|
|||
|
|
cursor.execute("""
|
|||
|
|
SELECT * FROM metadata WHERE hash = ?
|
|||
|
|
""", (file_hash_result,))
|
|||
|
|
|
|||
|
|
row = cursor.fetchone()
|
|||
|
|
if not row:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
metadata = dict(row)
|
|||
|
|
|
|||
|
|
# Parse JSON fields
|
|||
|
|
for field in ['url', 'relationships']:
|
|||
|
|
if metadata.get(field):
|
|||
|
|
try:
|
|||
|
|
import json
|
|||
|
|
metadata[field] = json.loads(metadata[field])
|
|||
|
|
except (json.JSONDecodeError, TypeError):
|
|||
|
|
metadata[field] = [] if field == 'url' else []
|
|||
|
|
|
|||
|
|
return metadata
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to get metadata for hash {file_hash}: {exc}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|||
|
|
"""Get tags for a local file by hash.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Tuple of (tags_list, store_name) where store_name is the actual store name
|
|||
|
|
"""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_hash = file_identifier
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
db_tags = db.get_tags(file_hash)
|
|||
|
|
if db_tags:
|
|||
|
|
# Return actual store name instead of generic "local_db"
|
|||
|
|
store_name = self._name if self._name else "local"
|
|||
|
|
return list(db_tags), store_name
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB lookup failed: {exc}")
|
|||
|
|
return [], "unknown"
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"get_tags failed for local file: {exc}")
|
|||
|
|
return [], "unknown"
|
|||
|
|
|
|||
|
|
def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Add tags to a local file by hash (via FolderDB).
|
|||
|
|
|
|||
|
|
Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags.
|
|||
|
|
Returns True if tags were successfully added.
|
|||
|
|
"""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
if not self._location:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
# Get existing tags
|
|||
|
|
existing_tags = list(db.get_tags(hash) or [])
|
|||
|
|
original_tags_lower = {t.lower() for t in existing_tags}
|
|||
|
|
|
|||
|
|
# Merge new tags, handling namespace overwrites
|
|||
|
|
for new_tag in tag:
|
|||
|
|
if ':' in new_tag:
|
|||
|
|
namespace = new_tag.split(':', 1)[0]
|
|||
|
|
# Remove existing tags in same namespace
|
|||
|
|
existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
|
|||
|
|
# Add new tag if not already present (case-insensitive check)
|
|||
|
|
if new_tag.lower() not in original_tags_lower:
|
|||
|
|
existing_tags.append(new_tag)
|
|||
|
|
|
|||
|
|
# Save merged tags
|
|||
|
|
db.add_tags_to_hash(hash, existing_tags)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB add_tags failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"add_tag failed for local file: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Remove tags from a local file by hash."""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_hash = file_identifier
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
db.remove_tags_from_hash(file_hash, list(tags))
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB remove_tags failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"delete_tag failed for local file: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|||
|
|
"""Get known url for a local file by hash."""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_hash = file_identifier
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
meta = db.get_metadata(file_hash) or {}
|
|||
|
|
return list(meta.get("url") or [])
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB get_metadata failed: {exc}")
|
|||
|
|
return []
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"get_url failed for local file: {exc}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Add known url to a local file by hash."""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_hash = file_identifier
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
meta = db.get_metadata(file_hash) or {}
|
|||
|
|
url = list(meta.get("url") or [])
|
|||
|
|
changed = False
|
|||
|
|
for u in url:
|
|||
|
|
if u not in url:
|
|||
|
|
url.append(u)
|
|||
|
|
changed = True
|
|||
|
|
if changed:
|
|||
|
|
db.update_metadata_by_hash(file_hash, {"url": url})
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB add_url failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"add_url failed for local file: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Delete known url from a local file by hash."""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_hash = file_identifier
|
|||
|
|
if self._location:
|
|||
|
|
try:
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
meta = db.get_metadata(file_hash) or {}
|
|||
|
|
url = list(meta.get("url") or [])
|
|||
|
|
changed = False
|
|||
|
|
for u in url:
|
|||
|
|
if u in url:
|
|||
|
|
url.remove(u)
|
|||
|
|
changed = True
|
|||
|
|
if changed:
|
|||
|
|
db.update_metadata_by_hash(file_hash, {"url": url})
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Local DB delete_url failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"delete_url failed for local file: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
|||
|
|
"""Delete a file from the folder store.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_identifier: The file path (as string) or hash of the file to delete
|
|||
|
|
**kwargs: Optional parameters
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
True if deletion succeeded, False otherwise
|
|||
|
|
"""
|
|||
|
|
from helper.folder_store import FolderDB
|
|||
|
|
try:
|
|||
|
|
file_path = Path(file_identifier)
|
|||
|
|
|
|||
|
|
# Delete from database
|
|||
|
|
with FolderDB(Path(self._location)) as db:
|
|||
|
|
db.delete_file(file_path)
|
|||
|
|
|
|||
|
|
# Delete the actual file from disk
|
|||
|
|
if file_path.exists():
|
|||
|
|
file_path.unlink()
|
|||
|
|
debug(f"Deleted file: {file_path}")
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
debug(f"File not found on disk: {file_path}")
|
|||
|
|
return True # Already gone
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"delete_file failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HydrusNetwork(store):
|
|||
|
|
"""File storage backend for Hydrus client.
|
|||
|
|
|
|||
|
|
Each instance represents a specific Hydrus client connection.
|
|||
|
|
Maintains its own HydrusClient with session key.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, instance_name: str, api_key: str, url: str) -> None:
|
|||
|
|
"""Initialize Hydrus storage backend.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
|
|||
|
|
api_key: Hydrus Client API access key
|
|||
|
|
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
|
|||
|
|
"""
|
|||
|
|
from helper import hydrus as hydrus_wrapper
|
|||
|
|
|
|||
|
|
self._instance_name = instance_name
|
|||
|
|
self._api_key = api_key
|
|||
|
|
self._url = url
|
|||
|
|
# Create persistent client with session key for this instance
|
|||
|
|
self._client = hydrus_wrapper.HydrusClient(url=url, access_key=api_key)
|
|||
|
|
|
|||
|
|
def name(self) -> str:
|
|||
|
|
return self._instance_name
|
|||
|
|
|
|||
|
|
def get_name(self) -> str:
|
|||
|
|
return self._instance_name
|
|||
|
|
|
|||
|
|
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
|||
|
|
"""Upload file to Hydrus with full metadata support.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_path: Path to the file to upload
|
|||
|
|
tags: Optional list of tags to add
|
|||
|
|
url: Optional list of url to associate with the file
|
|||
|
|
title: Optional title (will be added as 'title:value' tag)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
File hash from Hydrus
|
|||
|
|
|
|||
|
|
Raises:
|
|||
|
|
Exception: If upload fails
|
|||
|
|
"""
|
|||
|
|
from helper import hydrus as hydrus_wrapper
|
|||
|
|
from helper.utils import sha256_file
|
|||
|
|
|
|||
|
|
tags = kwargs.get("tags", [])
|
|||
|
|
url = kwargs.get("url", [])
|
|||
|
|
title = kwargs.get("title")
|
|||
|
|
|
|||
|
|
# Add title to tags if provided and not already present
|
|||
|
|
if title:
|
|||
|
|
title_tag = f"title:{title}"
|
|||
|
|
if not any(str(tag).lower().startswith("title:") for tag in tags):
|
|||
|
|
tags = [title_tag] + list(tags)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Compute file hash
|
|||
|
|
file_hash = sha256_file(file_path)
|
|||
|
|
debug(f"File hash: {file_hash}")
|
|||
|
|
|
|||
|
|
# Use persistent client with session key
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
raise Exception("Hydrus client unavailable")
|
|||
|
|
|
|||
|
|
# Check if file already exists in Hydrus
|
|||
|
|
file_exists = False
|
|||
|
|
try:
|
|||
|
|
metadata = client.fetch_file_metadata(hashes=[file_hash])
|
|||
|
|
if metadata and isinstance(metadata, dict):
|
|||
|
|
files = metadata.get("file_metadata", [])
|
|||
|
|
if files:
|
|||
|
|
file_exists = True
|
|||
|
|
log(
|
|||
|
|
f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}",
|
|||
|
|
file=sys.stderr,
|
|||
|
|
)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# Upload file if not already present
|
|||
|
|
if not file_exists:
|
|||
|
|
log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
|
|||
|
|
response = client.add_file(file_path)
|
|||
|
|
|
|||
|
|
# Extract hash from response
|
|||
|
|
hydrus_hash: Optional[str] = None
|
|||
|
|
if isinstance(response, dict):
|
|||
|
|
hydrus_hash = response.get("hash") or response.get("file_hash")
|
|||
|
|
if not hydrus_hash:
|
|||
|
|
hashes = response.get("hashes")
|
|||
|
|
if isinstance(hashes, list) and hashes:
|
|||
|
|
hydrus_hash = hashes[0]
|
|||
|
|
|
|||
|
|
if not hydrus_hash:
|
|||
|
|
raise Exception(f"Hydrus response missing file hash: {response}")
|
|||
|
|
|
|||
|
|
file_hash = hydrus_hash
|
|||
|
|
log(f"Hydrus: {file_hash}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Add tags if provided (both for new and existing files)
|
|||
|
|
if tags:
|
|||
|
|
try:
|
|||
|
|
# Use default tag service
|
|||
|
|
service_name = "my tags"
|
|||
|
|
except Exception:
|
|||
|
|
service_name = "my tags"
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}")
|
|||
|
|
client.add_tags(file_hash, tags, service_name)
|
|||
|
|
log(f"Tags added via '{service_name}'", file=sys.stderr)
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
# Associate url if provided (both for new and existing files)
|
|||
|
|
if url:
|
|||
|
|
log(f"Associating {len(url)} URL(s) with file", file=sys.stderr)
|
|||
|
|
for url in url:
|
|||
|
|
if url:
|
|||
|
|
try:
|
|||
|
|
client.associate_url(file_hash, str(url))
|
|||
|
|
debug(f"Associated URL: {url}")
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
return file_hash
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|||
|
|
"""Search Hydrus database for files matching query.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
query: Search query (tags, filenames, hashes, etc.)
|
|||
|
|
limit: Maximum number of results to return (default: 100)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
List of dicts with 'name', 'hash', 'size', 'tags' fields
|
|||
|
|
|
|||
|
|
Example:
|
|||
|
|
results = storage["hydrus"].search("artist:john_doe music")
|
|||
|
|
results = storage["hydrus"].search("Simple Man")
|
|||
|
|
"""
|
|||
|
|
limit = kwargs.get("limit", 100)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
raise Exception("Hydrus client unavailable")
|
|||
|
|
|
|||
|
|
debug(f"Searching Hydrus for: {query}")
|
|||
|
|
|
|||
|
|
# Parse the query into tags
|
|||
|
|
# Handle both simple tags and complex queries
|
|||
|
|
# "*" means "match all" - use system:everything tag in Hydrus
|
|||
|
|
if query.strip() == "*":
|
|||
|
|
# Use system:everything to match all files in Hydrus
|
|||
|
|
tags = ["system:everything"]
|
|||
|
|
else:
|
|||
|
|
query_lower = query.lower().strip()
|
|||
|
|
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
|
|||
|
|
# If query has explicit namespace, use it as a tag search
|
|||
|
|
if ':' not in query_lower:
|
|||
|
|
# No namespace provided: search all files, then filter by title/tags containing the query
|
|||
|
|
tags = ["system:everything"]
|
|||
|
|
else:
|
|||
|
|
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
|
|||
|
|
# Use it as a tag search
|
|||
|
|
tags = [query_lower]
|
|||
|
|
|
|||
|
|
if not tags:
|
|||
|
|
debug(f"Found 0 result(s)")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
# Search files with the tags
|
|||
|
|
search_result = client.search_files(
|
|||
|
|
tags=tags,
|
|||
|
|
return_hashes=True,
|
|||
|
|
return_file_ids=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Extract file IDs from search result
|
|||
|
|
file_ids = search_result.get("file_ids", [])
|
|||
|
|
hashes = search_result.get("hashes", [])
|
|||
|
|
|
|||
|
|
if not file_ids and not hashes:
|
|||
|
|
debug(f"Found 0 result(s)")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
# Fetch metadata for the found files
|
|||
|
|
results = []
|
|||
|
|
query_lower = query.lower().strip()
|
|||
|
|
# Split by comma or space for AND logic
|
|||
|
|
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
|
|||
|
|
|
|||
|
|
if file_ids:
|
|||
|
|
metadata = client.fetch_file_metadata(file_ids=file_ids)
|
|||
|
|
metadata_list = metadata.get("metadata", [])
|
|||
|
|
|
|||
|
|
for meta in metadata_list:
|
|||
|
|
if len(results) >= limit:
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
file_id = meta.get("file_id")
|
|||
|
|
hash_hex = meta.get("hash")
|
|||
|
|
size = meta.get("size", 0)
|
|||
|
|
|
|||
|
|
# Get tags for this file and extract title
|
|||
|
|
tags_set = meta.get("tags", {})
|
|||
|
|
all_tags = []
|
|||
|
|
title = f"Hydrus File {file_id}" # Default fallback
|
|||
|
|
all_tags_str = "" # For substring matching
|
|||
|
|
|
|||
|
|
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
|
|||
|
|
|
|||
|
|
if isinstance(tags_set, dict):
|
|||
|
|
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
|
|||
|
|
def _collect(tag_list: Any) -> None:
|
|||
|
|
nonlocal title, all_tags_str
|
|||
|
|
if not isinstance(tag_list, list):
|
|||
|
|
return
|
|||
|
|
for tag in tag_list:
|
|||
|
|
tag_text = str(tag) if tag else ""
|
|||
|
|
if not tag_text:
|
|||
|
|
continue
|
|||
|
|
all_tags.append(tag_text)
|
|||
|
|
all_tags_str += " " + tag_text.lower()
|
|||
|
|
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
|
|||
|
|
title = tag_text.split(":", 1)[1].strip()
|
|||
|
|
|
|||
|
|
for service_name, service_tags in tags_set.items():
|
|||
|
|
if not isinstance(service_tags, dict):
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
storage_tags = service_tags.get("storage_tags", {})
|
|||
|
|
if isinstance(storage_tags, dict):
|
|||
|
|
for tag_list in storage_tags.values():
|
|||
|
|
_collect(tag_list)
|
|||
|
|
|
|||
|
|
display_tags = service_tags.get("display_tags", [])
|
|||
|
|
_collect(display_tags)
|
|||
|
|
|
|||
|
|
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
|
|||
|
|
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
|
|||
|
|
_collect(top_level_tags)
|
|||
|
|
|
|||
|
|
# Resolve extension from MIME type
|
|||
|
|
mime_type = meta.get("mime")
|
|||
|
|
ext = ""
|
|||
|
|
if mime_type:
|
|||
|
|
for category in mime_maps.values():
|
|||
|
|
for ext_key, info in category.items():
|
|||
|
|
if mime_type in info.get("mimes", []):
|
|||
|
|
ext = info.get("ext", "").lstrip('.')
|
|||
|
|
break
|
|||
|
|
if ext:
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# Filter results based on query type
|
|||
|
|
# If user provided explicit namespace (has ':'), don't do substring filtering
|
|||
|
|
# Just include what the tag search returned
|
|||
|
|
has_namespace = ':' in query_lower
|
|||
|
|
|
|||
|
|
if has_namespace:
|
|||
|
|
# Explicit namespace search - already filtered by Hydrus tag search
|
|||
|
|
# Include this result as-is
|
|||
|
|
results.append({
|
|||
|
|
"hash": hash_hex,
|
|||
|
|
"hash_hex": hash_hex,
|
|||
|
|
"target": hash_hex,
|
|||
|
|
"name": title,
|
|||
|
|
"title": title,
|
|||
|
|
"size": size,
|
|||
|
|
"size_bytes": size,
|
|||
|
|
"origin": self._instance_name,
|
|||
|
|
"tags": all_tags,
|
|||
|
|
"file_id": file_id,
|
|||
|
|
"mime": mime_type,
|
|||
|
|
"ext": ext,
|
|||
|
|
})
|
|||
|
|
else:
|
|||
|
|
# Free-form search: check if search terms match the title or tags
|
|||
|
|
# Match if ALL search terms are found in title or tags (AND logic)
|
|||
|
|
# AND use whole word matching
|
|||
|
|
|
|||
|
|
# Combine title and tags for searching
|
|||
|
|
searchable_text = (title + " " + all_tags_str).lower()
|
|||
|
|
|
|||
|
|
match = True
|
|||
|
|
if query_lower != "*":
|
|||
|
|
for term in search_terms:
|
|||
|
|
# Regex for whole word: \bterm\b
|
|||
|
|
# Escape term to handle special chars
|
|||
|
|
pattern = r'\b' + re.escape(term) + r'\b'
|
|||
|
|
if not re.search(pattern, searchable_text):
|
|||
|
|
match = False
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if match:
|
|||
|
|
results.append({
|
|||
|
|
"hash": hash_hex,
|
|||
|
|
"hash_hex": hash_hex,
|
|||
|
|
"target": hash_hex,
|
|||
|
|
"name": title,
|
|||
|
|
"title": title,
|
|||
|
|
"size": size,
|
|||
|
|
"size_bytes": size,
|
|||
|
|
"origin": self._instance_name,
|
|||
|
|
"tags": all_tags,
|
|||
|
|
"file_id": file_id,
|
|||
|
|
"mime": mime_type,
|
|||
|
|
"ext": ext,
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
debug(f"Found {len(results)} result(s)")
|
|||
|
|
return results[:limit]
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
|
|||
|
|
import traceback
|
|||
|
|
traceback.print_exc(file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
|
|||
|
|
"""Open file in browser via Hydrus client API URL."""
|
|||
|
|
import tempfile
|
|||
|
|
import webbrowser
|
|||
|
|
|
|||
|
|
debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...")
|
|||
|
|
|
|||
|
|
# Build browser URL with access key
|
|||
|
|
base_url = self._client.url.rstrip('/')
|
|||
|
|
access_key = self._client.access_key
|
|||
|
|
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
|||
|
|
debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}")
|
|||
|
|
|
|||
|
|
# Open in default browser
|
|||
|
|
webbrowser.open(browser_url)
|
|||
|
|
debug(f"[HydrusNetwork.get_file] Browser opened successfully")
|
|||
|
|
|
|||
|
|
# Return the URL string instead of downloading
|
|||
|
|
debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}")
|
|||
|
|
return browser_url
|
|||
|
|
|
|||
|
|
def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
|||
|
|
"""Get a playable path for a file in this Hydrus instance.
|
|||
|
|
|
|||
|
|
For Hydrus stores, this builds a file URL with authentication.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file
|
|||
|
|
config: Optional config dict (unused, URL and key are from instance)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Hydrus API file URL with embedded access key, or None if client unavailable
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
if not self._client:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
base_url = self._client.url.rstrip('/')
|
|||
|
|
access_key = self._client.access_key
|
|||
|
|
|
|||
|
|
# Build Hydrus file URL with access key
|
|||
|
|
url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
|||
|
|
return url
|
|||
|
|
except Exception as e:
|
|||
|
|
debug(f"Error building Hydrus URL for {file_hash}: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|||
|
|
"""Get metadata for a file from Hydrus by hash.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Dict with metadata fields or None if not found
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if not client:
|
|||
|
|
debug("get_metadata: Hydrus client unavailable")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# Fetch file metadata
|
|||
|
|
payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True)
|
|||
|
|
|
|||
|
|
if not payload or not payload.get("metadata"):
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
meta = payload["metadata"][0]
|
|||
|
|
|
|||
|
|
# Extract title from tags
|
|||
|
|
title = f"Hydrus_{file_hash[:12]}"
|
|||
|
|
tags_payload = meta.get("tags", {})
|
|||
|
|
if isinstance(tags_payload, dict):
|
|||
|
|
for service_data in tags_payload.values():
|
|||
|
|
if isinstance(service_data, dict):
|
|||
|
|
display_tags = service_data.get("display_tags", {})
|
|||
|
|
if isinstance(display_tags, dict):
|
|||
|
|
current_tags = display_tags.get("0", [])
|
|||
|
|
if isinstance(current_tags, list):
|
|||
|
|
for tag in current_tags:
|
|||
|
|
if str(tag).lower().startswith("title:"):
|
|||
|
|
title = tag.split(":", 1)[1].strip()
|
|||
|
|
break
|
|||
|
|
if title != f"Hydrus_{file_hash[:12]}":
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# Determine extension from mime type
|
|||
|
|
mime_type = meta.get("mime", "")
|
|||
|
|
ext = ""
|
|||
|
|
if mime_type:
|
|||
|
|
from helper.utils_constant import mime_maps
|
|||
|
|
for category, extensions in mime_maps.items():
|
|||
|
|
for extension, mime in extensions.items():
|
|||
|
|
if mime == mime_type:
|
|||
|
|
ext = extension.lstrip(".")
|
|||
|
|
break
|
|||
|
|
if ext:
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"hash": file_hash,
|
|||
|
|
"title": title,
|
|||
|
|
"ext": ext,
|
|||
|
|
"size": meta.get("size", 0),
|
|||
|
|
"mime": mime_type,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Failed to get metadata from Hydrus: {exc}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|||
|
|
"""Get tags for a file from Hydrus by hash.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_identifier: File hash (SHA256 hex string)
|
|||
|
|
**kwargs: Optional service_name parameter
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Tuple of (tags_list, source_description)
|
|||
|
|
where source is always "hydrus"
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
file_hash = str(file_identifier)
|
|||
|
|
|
|||
|
|
# Get Hydrus client and service info
|
|||
|
|
client = self._client
|
|||
|
|
if not client:
|
|||
|
|
debug("get_tags: Hydrus client unavailable")
|
|||
|
|
return [], "unknown"
|
|||
|
|
|
|||
|
|
# Fetch file metadata
|
|||
|
|
payload = client.fetch_file_metadata(
|
|||
|
|
hashes=[file_hash],
|
|||
|
|
include_service_keys_to_tags=True,
|
|||
|
|
include_file_url=False
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|||
|
|
if not isinstance(items, list) or not items:
|
|||
|
|
debug(f"get_tags: No metadata returned for hash {file_hash}")
|
|||
|
|
return [], "unknown"
|
|||
|
|
|
|||
|
|
meta = items[0] if isinstance(items[0], dict) else None
|
|||
|
|
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
|||
|
|
debug(f"get_tags: Invalid metadata for hash {file_hash}")
|
|||
|
|
return [], "unknown"
|
|||
|
|
|
|||
|
|
# Extract tags using service name
|
|||
|
|
service_name = "my tags"
|
|||
|
|
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
|
|||
|
|
|
|||
|
|
# Extract tags from metadata
|
|||
|
|
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
|
|||
|
|
|
|||
|
|
return tags, "hydrus"
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"get_tags failed for Hydrus file: {exc}")
|
|||
|
|
return [], "unknown"
|
|||
|
|
|
|||
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Add tags to a Hydrus file.
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
debug("add_tag: Hydrus client unavailable")
|
|||
|
|
return False
|
|||
|
|
service_name = kwargs.get("service_name") or "my tags"
|
|||
|
|
# Ensure tags is a list
|
|||
|
|
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
|||
|
|
if not tag_list:
|
|||
|
|
return False
|
|||
|
|
client.add_tags(file_identifier, tag_list, service_name)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Hydrus add_tag failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Delete tags from a Hydrus file.
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
debug("delete_tag: Hydrus client unavailable")
|
|||
|
|
return False
|
|||
|
|
service_name = kwargs.get("service_name") or "my tags"
|
|||
|
|
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
|||
|
|
if not tag_list:
|
|||
|
|
return False
|
|||
|
|
client.delete_tags(file_identifier, tag_list, service_name)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Hydrus delete_tag failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|||
|
|
"""Get known url for a Hydrus file.
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
debug("get_url: Hydrus client unavailable")
|
|||
|
|
return []
|
|||
|
|
payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True)
|
|||
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|||
|
|
if not isinstance(items, list) or not items:
|
|||
|
|
return []
|
|||
|
|
meta = items[0]
|
|||
|
|
url = meta.get("url") or []
|
|||
|
|
return list(url)
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Hydrus get_url failed: {exc}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Associate one or more url with a Hydrus file.
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
debug("add_url: Hydrus client unavailable")
|
|||
|
|
return False
|
|||
|
|
for u in url:
|
|||
|
|
client.associate_url(file_identifier, u)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Hydrus add_url failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
"""Delete one or more url from a Hydrus file.
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
client = self._client
|
|||
|
|
if client is None:
|
|||
|
|
debug("delete_url: Hydrus client unavailable")
|
|||
|
|
return False
|
|||
|
|
for u in url:
|
|||
|
|
client.delete_url(file_identifier, u)
|
|||
|
|
return True
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Hydrus delete_url failed: {exc}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _extract_tags_from_hydrus_meta(
|
|||
|
|
meta: Dict[str, Any],
|
|||
|
|
service_key: Optional[str],
|
|||
|
|
service_name: str
|
|||
|
|
) -> List[str]:
|
|||
|
|
"""Extract current tags from Hydrus metadata dict.
|
|||
|
|
|
|||
|
|
Prefers display_tags (includes siblings/parents, excludes deleted).
|
|||
|
|
Falls back to storage_tags status '0' (current).
|
|||
|
|
"""
|
|||
|
|
tags_payload = meta.get("tags")
|
|||
|
|
if not isinstance(tags_payload, dict):
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
svc_data = None
|
|||
|
|
if service_key:
|
|||
|
|
svc_data = tags_payload.get(service_key)
|
|||
|
|
if not isinstance(svc_data, dict):
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
# Prefer display_tags (Hydrus computes siblings/parents)
|
|||
|
|
display = svc_data.get("display_tags")
|
|||
|
|
if isinstance(display, list) and display:
|
|||
|
|
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
|
|||
|
|
|
|||
|
|
# Fallback to storage_tags status '0' (current)
|
|||
|
|
storage = svc_data.get("storage_tags")
|
|||
|
|
if isinstance(storage, dict):
|
|||
|
|
current_list = storage.get("0") or storage.get(0)
|
|||
|
|
if isinstance(current_list, list):
|
|||
|
|
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
|
|||
|
|
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
class MatrixStorageBackend(store):
|
|||
|
|
"""File storage backend for Matrix (Element) chat rooms."""
|
|||
|
|
|
|||
|
|
def get_name(self) -> str:
|
|||
|
|
return "matrix"
|
|||
|
|
|
|||
|
|
def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|||
|
|
"""List joined rooms with their names."""
|
|||
|
|
matrix_conf = config.get('storage', {}).get('matrix', {})
|
|||
|
|
homeserver = matrix_conf.get('homeserver')
|
|||
|
|
access_token = matrix_conf.get('access_token')
|
|||
|
|
|
|||
|
|
if not homeserver or not access_token:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
if not homeserver.startswith('http'):
|
|||
|
|
homeserver = f"https://{homeserver}"
|
|||
|
|
|
|||
|
|
headers = {"Authorization": f"Bearer {access_token}"}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Get joined rooms
|
|||
|
|
resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
|
|||
|
|
if resp.status_code != 200:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
room_ids = resp.json().get('joined_rooms', [])
|
|||
|
|
rooms = []
|
|||
|
|
|
|||
|
|
for rid in room_ids:
|
|||
|
|
# Try to get room name
|
|||
|
|
name = "Unknown Room"
|
|||
|
|
try:
|
|||
|
|
# Get state event for name
|
|||
|
|
name_resp = requests.get(
|
|||
|
|
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name",
|
|||
|
|
headers=headers,
|
|||
|
|
timeout=2
|
|||
|
|
)
|
|||
|
|
if name_resp.status_code == 200:
|
|||
|
|
name = name_resp.json().get('name', name)
|
|||
|
|
else:
|
|||
|
|
# Try canonical alias
|
|||
|
|
alias_resp = requests.get(
|
|||
|
|
f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias",
|
|||
|
|
headers=headers,
|
|||
|
|
timeout=2
|
|||
|
|
)
|
|||
|
|
if alias_resp.status_code == 200:
|
|||
|
|
name = alias_resp.json().get('alias', name)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
rooms.append({'id': rid, 'name': name})
|
|||
|
|
|
|||
|
|
return rooms
|
|||
|
|
except Exception as e:
|
|||
|
|
log(f"Error listing Matrix rooms: {e}", file=sys.stderr)
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
|||
|
|
"""Upload file to Matrix room.
|
|||
|
|
|
|||
|
|
Requires 'config' in kwargs with 'storage.matrix' settings:
|
|||
|
|
- homeserver: URL of homeserver (e.g. https://matrix.org)
|
|||
|
|
- user_id: User ID (e.g. @user:matrix.org)
|
|||
|
|
- access_token: Access token (preferred) OR password
|
|||
|
|
- room_id: Room ID to upload to (e.g. !roomid:matrix.org)
|
|||
|
|
"""
|
|||
|
|
config = kwargs.get('config', {})
|
|||
|
|
if not config:
|
|||
|
|
raise ValueError("Config required for Matrix upload")
|
|||
|
|
|
|||
|
|
matrix_conf = config.get('storage', {}).get('matrix', {})
|
|||
|
|
if not matrix_conf:
|
|||
|
|
raise ValueError("Matrix storage not configured in config.json")
|
|||
|
|
|
|||
|
|
homeserver = matrix_conf.get('homeserver')
|
|||
|
|
# user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
|
|||
|
|
access_token = matrix_conf.get('access_token')
|
|||
|
|
room_id = matrix_conf.get('room_id')
|
|||
|
|
|
|||
|
|
if not homeserver:
|
|||
|
|
raise ValueError("Matrix homeserver required")
|
|||
|
|
|
|||
|
|
# Ensure homeserver has protocol
|
|||
|
|
if not homeserver.startswith('http'):
|
|||
|
|
homeserver = f"https://{homeserver}"
|
|||
|
|
|
|||
|
|
# Login if no access token (optional implementation, for now assume token)
|
|||
|
|
if not access_token:
|
|||
|
|
raise ValueError("Matrix access_token required (login not yet implemented)")
|
|||
|
|
|
|||
|
|
# Handle room selection if not provided
|
|||
|
|
if not room_id:
|
|||
|
|
log("No room_id configured. Fetching joined rooms...", file=sys.stderr)
|
|||
|
|
rooms = self.list_rooms(config)
|
|||
|
|
|
|||
|
|
if not rooms:
|
|||
|
|
raise ValueError("No joined rooms found or failed to fetch rooms.")
|
|||
|
|
|
|||
|
|
from result_table import ResultTable
|
|||
|
|
table = ResultTable("Matrix Rooms")
|
|||
|
|
for i, room in enumerate(rooms):
|
|||
|
|
row = table.add_row()
|
|||
|
|
row.add_column("#", str(i + 1))
|
|||
|
|
row.add_column("Name", room['name'])
|
|||
|
|
row.add_column("ID", room['id'])
|
|||
|
|
|
|||
|
|
print(table)
|
|||
|
|
|
|||
|
|
# Simple interactive selection
|
|||
|
|
try:
|
|||
|
|
selection = input("Select room # to upload to: ")
|
|||
|
|
idx = int(selection) - 1
|
|||
|
|
if 0 <= idx < len(rooms):
|
|||
|
|
room_id = rooms[idx]['id']
|
|||
|
|
log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr)
|
|||
|
|
else:
|
|||
|
|
raise ValueError("Invalid selection")
|
|||
|
|
except Exception:
|
|||
|
|
raise ValueError("Invalid room selection")
|
|||
|
|
|
|||
|
|
if not room_id:
|
|||
|
|
raise ValueError("Matrix room_id required")
|
|||
|
|
|
|||
|
|
# 1. Upload Media
|
|||
|
|
upload_url = f"{homeserver}/_matrix/media/r3/upload"
|
|||
|
|
headers = {
|
|||
|
|
"Authorization": f"Bearer {access_token}",
|
|||
|
|
"Content-Type": "application/octet-stream" # Or guess mime type
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
import mimetypes
|
|||
|
|
mime_type, _ = mimetypes.guess_type(file_path)
|
|||
|
|
if mime_type:
|
|||
|
|
headers["Content-Type"] = mime_type
|
|||
|
|
|
|||
|
|
filename = file_path.name
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with open(file_path, 'rb') as f:
|
|||
|
|
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
|
|||
|
|
|
|||
|
|
if resp.status_code != 200:
|
|||
|
|
raise Exception(f"Matrix upload failed: {resp.text}")
|
|||
|
|
|
|||
|
|
content_uri = resp.json().get('content_uri')
|
|||
|
|
if not content_uri:
|
|||
|
|
raise Exception("No content_uri returned from Matrix upload")
|
|||
|
|
|
|||
|
|
# 2. Send Message
|
|||
|
|
send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"
|
|||
|
|
|
|||
|
|
# Determine msgtype
|
|||
|
|
msgtype = "m.file"
|
|||
|
|
if mime_type:
|
|||
|
|
if mime_type.startswith("image/"): msgtype = "m.image"
|
|||
|
|
elif mime_type.startswith("video/"): msgtype = "m.video"
|
|||
|
|
elif mime_type.startswith("audio/"): msgtype = "m.audio"
|
|||
|
|
|
|||
|
|
payload = {
|
|||
|
|
"msgtype": msgtype,
|
|||
|
|
"body": filename,
|
|||
|
|
"url": content_uri,
|
|||
|
|
"info": {
|
|||
|
|
"mimetype": mime_type,
|
|||
|
|
"size": file_path.stat().st_size
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
resp = requests.post(send_url, headers=headers, json=payload)
|
|||
|
|
if resp.status_code != 200:
|
|||
|
|
raise Exception(f"Matrix send message failed: {resp.text}")
|
|||
|
|
|
|||
|
|
event_id = resp.json().get('event_id')
|
|||
|
|
return f"matrix://{room_id}/{event_id}"
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
log(f"❌ Matrix upload error: {e}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
|
|||
|
|
# --- Not supported for Matrix: tagging & URL operations (return safe defaults) ---
|
|||
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|||
|
|
return [], "matrix"
|
|||
|
|
|
|||
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
class RemoteStorageBackend(store):
|
|||
|
|
"""File storage backend for remote Android/network storage servers.
|
|||
|
|
|
|||
|
|
Connects to a remote storage server (e.g., running on Android phone)
|
|||
|
|
via REST API. All operations are proxied to the remote server.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
|
|||
|
|
"""Initialize remote storage backend.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
|
|||
|
|
timeout: Request timeout in seconds
|
|||
|
|
api_key: Optional API key for authentication
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
import requests
|
|||
|
|
except ImportError:
|
|||
|
|
raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")
|
|||
|
|
|
|||
|
|
self.server_url = server_url.rstrip('/')
|
|||
|
|
self.timeout = timeout
|
|||
|
|
self.api_key = api_key
|
|||
|
|
self._session = requests.Session()
|
|||
|
|
|
|||
|
|
# Add API key to default headers if provided
|
|||
|
|
if self.api_key:
|
|||
|
|
self._session.headers.update({'X-API-Key': self.api_key})
|
|||
|
|
|
|||
|
|
def get_name(self) -> str:
|
|||
|
|
return "remote"
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
|
|||
|
|
"""Make HTTP request to remote server."""
|
|||
|
|
import requests
|
|||
|
|
from urllib.parse import urljoin
|
|||
|
|
|
|||
|
|
url = urljoin(self.server_url, endpoint)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = self._session.request(
|
|||
|
|
method,
|
|||
|
|
url,
|
|||
|
|
timeout=self.timeout,
|
|||
|
|
**kwargs
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if response.status_code == 404:
|
|||
|
|
raise Exception(f"Remote resource not found: {endpoint}")
|
|||
|
|
|
|||
|
|
if response.status_code >= 400:
|
|||
|
|
try:
|
|||
|
|
error_data = response.json()
|
|||
|
|
error_msg = error_data.get('error', response.text)
|
|||
|
|
except:
|
|||
|
|
error_msg = response.text
|
|||
|
|
raise Exception(f"Remote server error {response.status_code}: {error_msg}")
|
|||
|
|
|
|||
|
|
return response.json()
|
|||
|
|
|
|||
|
|
except requests.exceptions.RequestException as e:
|
|||
|
|
raise Exception(f"Connection to {self.server_url} failed: {e}")
|
|||
|
|
|
|||
|
|
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
|||
|
|
"""Upload file to remote storage.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
file_path: Path to the file to upload
|
|||
|
|
tags: Optional list of tags to add
|
|||
|
|
url: Optional list of known url
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
Remote file hash
|
|||
|
|
"""
|
|||
|
|
from helper.utils import sha256_file
|
|||
|
|
|
|||
|
|
if not file_path.exists():
|
|||
|
|
raise ValueError(f"File not found: {file_path}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Index the file on remote server
|
|||
|
|
data = {"path": str(file_path)}
|
|||
|
|
|
|||
|
|
tags = kwargs.get("tags", [])
|
|||
|
|
if tags:
|
|||
|
|
data["tags"] = tags
|
|||
|
|
|
|||
|
|
url = kwargs.get("url", [])
|
|||
|
|
if url:
|
|||
|
|
data["url"] = url
|
|||
|
|
|
|||
|
|
result = self._request('POST', '/files/index', json=data)
|
|||
|
|
file_hash = result.get('hash')
|
|||
|
|
|
|||
|
|
if file_hash:
|
|||
|
|
log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
|
|||
|
|
return file_hash
|
|||
|
|
else:
|
|||
|
|
raise Exception("Remote server did not return file hash")
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
debug(f"Remote upload failed: {exc}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
# Tag and URL operations - Remote server default: not supported
|
|||
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|||
|
|
return [], "remote"
|
|||
|
|
|
|||
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|||
|
|
"""Search files on remote storage.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
query: Search query
|
|||
|
|
limit: Maximum results
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
List of search results
|
|||
|
|
"""
|
|||
|
|
limit = kwargs.get("limit")
|
|||
|
|
try:
|
|||
|
|
limit = int(limit) if limit is not None else 100
|
|||
|
|
except (TypeError, ValueError):
|
|||
|
|
limit = 100
|
|||
|
|
|
|||
|
|
if limit <= 0:
|
|||
|
|
limit = 100
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = self._request('GET', '/files/search', params={
|
|||
|
|
'q': query,
|
|||
|
|
'limit': limit
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
files = response.get('files', [])
|
|||
|
|
|
|||
|
|
# Transform remote format to standard result format
|
|||
|
|
results = []
|
|||
|
|
for f in files:
|
|||
|
|
results.append({
|
|||
|
|
"name": f.get('name', '').split('/')[-1], # Get filename from path
|
|||
|
|
"title": f.get('name', f.get('path', '')).split('/')[-1],
|
|||
|
|
"ext": f.get('ext', ''),
|
|||
|
|
"path": f.get('path', ''),
|
|||
|
|
"target": f.get('path', ''),
|
|||
|
|
"hash": f.get('hash', ''),
|
|||
|
|
"origin": "remote",
|
|||
|
|
"size": f.get('size', 0),
|
|||
|
|
"size_bytes": f.get('size', 0),
|
|||
|
|
"tags": f.get('tags', []),
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
debug(f"Remote search found {len(results)} results", file=sys.stderr)
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
except Exception as exc:
|
|||
|
|
log(f"❌ Remote search failed: {exc}", file=sys.stderr)
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
|
|||
|
|
class FileStorage:
|
|||
|
|
"""Unified file storage interface supporting multiple backend instances.
|
|||
|
|
|
|||
|
|
Each backend type (folder, hydrusnetwork) can have multiple named instances.
|
|||
|
|
Access backends by their configured names.
|
|||
|
|
|
|||
|
|
Config structure:
|
|||
|
|
{
|
|||
|
|
"store": {
|
|||
|
|
"folder": {
|
|||
|
|
"default": {"path": "C:\\Media Machina"},
|
|||
|
|
"test": {"path": "C:\\Users\\Admin\\Downloads\\Video"}
|
|||
|
|
},
|
|||
|
|
"hydrusnetwork": {
|
|||
|
|
"home": {
|
|||
|
|
"Hydrus-Client-API-Access-Key": "d4321f...",
|
|||
|
|
"url": "http://192.168.1.230:45869"
|
|||
|
|
},
|
|||
|
|
"work": {
|
|||
|
|
"Hydrus-Client-API-Access-Key": "abc123...",
|
|||
|
|
"url": "http://192.168.1.100:45869"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
Example:
|
|||
|
|
storage = FileStorage(config)
|
|||
|
|
|
|||
|
|
# Upload to different named instances
|
|||
|
|
hash1 = storage["test"].add_file(Path("file.mp3"), tags=["music"])
|
|||
|
|
hash2 = storage["home"].add_file(Path("file.mp3"), tags=["music"])
|
|||
|
|
hash3 = storage["work"].add_file(Path("file.mp3"), tags=["music"])
|
|||
|
|
|
|||
|
|
# Search across different instances
|
|||
|
|
results = storage["home"].search("music")
|
|||
|
|
results = storage["test"].search("song")
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None:
|
|||
|
|
"""Initialize the file storage system with available backends.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.)
|
|||
|
|
suppress_debug: If True, suppress debug output during initialization (useful for autocomplete)
|
|||
|
|
"""
|
|||
|
|
self.suppress_debug = suppress_debug
|
|||
|
|
config = config or {}
|
|||
|
|
|
|||
|
|
# Extract backend-specific settings from config
|
|||
|
|
from config import get_local_storage_path
|
|||
|
|
|
|||
|
|
local_path = get_local_storage_path(config)
|
|||
|
|
local_path_str = str(local_path) if local_path else None
|
|||
|
|
|
|||
|
|
self._backends: Dict[str, store] = {}
|
|||
|
|
|
|||
|
|
# Build folder stores from config (support both 'storage' and legacy 'store' top-level keys)
|
|||
|
|
folder_sources = None
|
|||
|
|
cfg_storage = config.get("storage") or config.get("store") or {}
|
|||
|
|
if isinstance(cfg_storage, dict):
|
|||
|
|
val = cfg_storage.get("folder")
|
|||
|
|
if isinstance(val, dict):
|
|||
|
|
folder_sources = val
|
|||
|
|
|
|||
|
|
# If folder sources provided, create backends for each entry
|
|||
|
|
if folder_sources:
|
|||
|
|
# Normalize into name -> path mapping
|
|||
|
|
folder_map: Dict[str, str] = {}
|
|||
|
|
for key, value in folder_sources.items():
|
|||
|
|
if isinstance(value, dict):
|
|||
|
|
path_val = value.get("path")
|
|||
|
|
elif isinstance(value, (str, bytes)):
|
|||
|
|
path_val = str(value)
|
|||
|
|
else:
|
|||
|
|
path_val = None
|
|||
|
|
if path_val:
|
|||
|
|
folder_map[str(key)] = str(Path(path_val).expanduser())
|
|||
|
|
|
|||
|
|
# Register all folder stores by their explicit names from config
|
|||
|
|
for name, path in folder_map.items():
|
|||
|
|
self._backends[name] = Folder(location=path, name=name)
|
|||
|
|
else:
|
|||
|
|
# Fallback: use legacy single local path if present
|
|||
|
|
if local_path_str:
|
|||
|
|
self._backends["default"] = Folder(location=local_path_str, name="default")
|
|||
|
|
|
|||
|
|
# Matrix (chat room) acts as a provider, not a persistent storage backend.
|
|||
|
|
# We no longer register Matrix as a storage backend here; providers should be separate classes.
|
|||
|
|
|
|||
|
|
# Build HydrusNetwork backends from config['store']['hydrusnetwork']
|
|||
|
|
# Register all instances regardless of current connectivity - connection errors
|
|||
|
|
# will be caught when actually trying to use the backend
|
|||
|
|
hydrus_sources = cfg_storage.get("hydrusnetwork")
|
|||
|
|
if isinstance(hydrus_sources, dict):
|
|||
|
|
for instance_name, instance_config in hydrus_sources.items():
|
|||
|
|
if isinstance(instance_config, dict):
|
|||
|
|
api_key = instance_config.get("Hydrus-Client-API-Access-Key")
|
|||
|
|
url = instance_config.get("url")
|
|||
|
|
|
|||
|
|
# Skip if missing credentials - don't register instances without full config
|
|||
|
|
if not api_key or not url:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Register the instance - connection will be tested when actually used
|
|||
|
|
try:
|
|||
|
|
self._backends[instance_name] = HydrusNetwork(
|
|||
|
|
instance_name=instance_name,
|
|||
|
|
api_key=api_key,
|
|||
|
|
url=url
|
|||
|
|
)
|
|||
|
|
if not self.suppress_debug:
|
|||
|
|
debug(f"[FileStorage] Registered Hydrus instance '{instance_name}': {url}")
|
|||
|
|
except Exception as e:
|
|||
|
|
if not self.suppress_debug:
|
|||
|
|
debug(f"[FileStorage] Failed to register Hydrus instance '{instance_name}': {e}")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Include remote storage backends from config (for Android/network servers)
|
|||
|
|
remote_storages = config.get("remote_storages", [])
|
|||
|
|
if isinstance(remote_storages, list):
|
|||
|
|
for remote_config in remote_storages:
|
|||
|
|
if isinstance(remote_config, dict):
|
|||
|
|
name = remote_config.get("name", "remote")
|
|||
|
|
url = remote_config.get("url")
|
|||
|
|
timeout = remote_config.get("timeout", 30)
|
|||
|
|
api_key = remote_config.get("api_key")
|
|||
|
|
|
|||
|
|
if url:
|
|||
|
|
try:
|
|||
|
|
backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
|
|||
|
|
self._backends[name] = backend
|
|||
|
|
auth_status = " (with auth)" if api_key else " (no auth)"
|
|||
|
|
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
|
|||
|
|
except Exception as e:
|
|||
|
|
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
def list_backends(self) -> list[str]:
|
|||
|
|
"""Return available backend keys for autocomplete and validation."""
|
|||
|
|
return sorted(self._backends.keys())
|
|||
|
|
|
|||
|
|
def list_searchable_backends(self) -> list[str]:
|
|||
|
|
"""Return backend names that support searching."""
|
|||
|
|
searchable = []
|
|||
|
|
for name, backend in self._backends.items():
|
|||
|
|
if callable(getattr(backend, 'search', None)):
|
|||
|
|
searchable.append(name)
|
|||
|
|
return sorted(searchable)
|
|||
|
|
|
|||
|
|
def __getitem__(self, backend_name: str) -> store:
|
|||
|
|
"""Get a storage backend by name.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
backend_name: Name of the backend ('0x0', 'local', 'hydrus')
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
StorageBackend instance
|
|||
|
|
|
|||
|
|
Raises:
|
|||
|
|
KeyError: If backend not found
|
|||
|
|
"""
|
|||
|
|
if backend_name not in self._backends:
|
|||
|
|
raise KeyError(
|
|||
|
|
f"Unknown storage backend: {backend_name}. "
|
|||
|
|
f"Available: {list(self._backends.keys())}"
|
|||
|
|
)
|
|||
|
|
return self._backends[backend_name]
|
|||
|
|
|
|||
|
|
def register(self, backend: store) -> None:
|
|||
|
|
"""Register a custom storage backend.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
backend: StorageBackend instance to register
|
|||
|
|
"""
|
|||
|
|
name = backend.get_name()
|
|||
|
|
self._backends[name] = backend
|
|||
|
|
log(f"Registered storage backend: {name}", file=sys.stderr)
|
|||
|
|
|
|||
|
|
def is_available(self, backend_name: str) -> bool:
|
|||
|
|
"""Check if a backend is available.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
backend_name: Name of the backend
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
True if backend is registered
|
|||
|
|
"""
|
|||
|
|
return backend_name in self._backends
|
|||
|
|
|
|||
|
|
def list_searchable_backends(self) -> list[str]:
|
|||
|
|
"""Get list of backends that support searching.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
List of searchable backend names
|
|||
|
|
"""
|
|||
|
|
return [
|
|||
|
|
name for name, backend in self._backends.items()
|
|||
|
|
if callable(getattr(backend, 'search', None)) or callable(getattr(backend, 'search_file', None))
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# --- remaining FileStorage methods ---
|
|||
|
|
|
|||
|
|
|