This commit is contained in:
nose
2025-12-11 19:04:02 -08:00
parent 6863c6c7ea
commit 16d8a763cd
103 changed files with 4759 additions and 9156 deletions

977
Store/Folder.py Normal file
View File

@@ -0,0 +1,977 @@
from __future__ import annotations
import json
import re
import shutil
import sys
from fnmatch import translate
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from SYS.logger import debug, log
from SYS.utils import sha256_file
from Store._base import StoreBackend
def _normalize_hash(value: Any) -> Optional[str]:
candidate = str(value or '').strip().lower()
if len(candidate) != 64:
return None
if any(ch not in '0123456789abcdef' for ch in candidate):
return None
return candidate
def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]:
normalized = _normalize_hash(db_hash) if db_hash else None
if normalized:
return normalized
return _normalize_hash(file_path.stem)
class Folder(StoreBackend):
""""""
# Track which locations have already been migrated to avoid repeated migrations
_migrated_locations = set()
def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None:
self._location = location
self._name = name
if self._location:
try:
from API.folder import API_folder_store
from pathlib import Path
location_path = Path(self._location).expanduser()
# Use context manager to ensure connection is properly closed
with API_folder_store(location_path) as db:
if db.connection:
db.connection.commit()
# Call migration and discovery at startup
Folder.migrate_location(self._location)
except Exception as exc:
debug(f"Failed to initialize database for '{name}': {exc}")
@classmethod
def migrate_location(cls, location: Optional[str]) -> None:
"""Migrate a location to hash-based storage (one-time operation, call explicitly at startup)."""
if not location:
return
from pathlib import Path
location_path = Path(location).expanduser()
location_str = str(location_path)
# Only migrate once per location
if location_str in cls._migrated_locations:
return
cls._migrated_locations.add(location_str)
# Create a temporary instance just to call the migration
temp_instance = cls(location=location)
temp_instance._migrate_to_hash_storage(location_path)
def _migrate_to_hash_storage(self, location_path: Path) -> None:
"""Migrate existing files from filename-based to hash-based storage.
Checks for sidecars (.metadata, .tag) and imports them before renaming.
Also ensures all files have a title: tag.
"""
from API.folder import API_folder_store, read_sidecar, write_sidecar, find_sidecar
try:
with API_folder_store(location_path) as db:
cursor = db.connection.cursor()
# First pass: migrate filename-based files and add title tags
# Scan all files in the storage directory
for file_path in sorted(location_path.iterdir()):
if not file_path.is_file():
continue
# Skip database files and sidecars
if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'):
continue
# Also skip if the file ends with -shm or -wal (SQLite journal files)
if file_path.name.endswith(('-shm', '-wal')):
continue
# Check if filename is already a hash (without extension)
if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()):
continue # Already migrated, will process in second pass
try:
# Compute file hash
file_hash = sha256_file(file_path)
# Preserve extension in the hash-based filename
file_ext = file_path.suffix # e.g., '.mp4'
hash_filename = file_hash + file_ext if file_ext else file_hash
hash_path = location_path / hash_filename
# Check for sidecars and import them
sidecar_path = find_sidecar(file_path)
tags_to_add = []
url_to_add = []
has_title_tag = False
if sidecar_path and sidecar_path.exists():
try:
_, tags, url = read_sidecar(sidecar_path)
if tags:
tags_to_add = list(tags)
# Check if title tag exists
has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add)
if url:
url_to_add = list(url)
debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr)
# Delete the sidecar after importing
sidecar_path.unlink()
except Exception as exc:
debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr)
# Ensure there's a title tag (use original filename if not present)
if not has_title_tag:
tags_to_add.append(f"title:{file_path.name}")
# Rename file to hash if needed
if hash_path != file_path and not hash_path.exists():
debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr)
file_path.rename(hash_path)
# Create or update database entry
db.get_or_create_file_entry(hash_path)
# Save extension metadata
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(hash_path, {
'hash': file_hash,
'ext': ext_clean,
'size': hash_path.stat().st_size
})
# Add all tags (including title tag)
if tags_to_add:
db.save_tags(hash_path, tags_to_add)
debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr)
# Note: url would need a separate table if you want to store them
# For now, we're just noting them in debug
if url_to_add:
debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr)
except Exception as exc:
debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr)
# Second pass: ensure all files in database have a title: tag
db.connection.commit()
cursor.execute('''
SELECT f.hash, f.file_path
FROM files f
WHERE NOT EXISTS (
SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%'
)
''')
files_without_title = cursor.fetchall()
for file_hash, file_path_str in files_without_title:
try:
file_path = Path(file_path_str)
if file_path.exists():
# Use the filename as the title
title_tag = f"title:{file_path.name}"
db.save_tags(file_path, [title_tag])
debug(f"Added title tag to {file_path.name}", file=sys.stderr)
except Exception as exc:
debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr)
db.connection.commit()
# Third pass: discover files on disk that aren't in the database yet
# These are hash-named files that were added after initial indexing
cursor.execute('SELECT LOWER(hash) FROM files')
db_hashes = {row[0] for row in cursor.fetchall()}
discovered = 0
for file_path in sorted(location_path.rglob("*")):
if file_path.is_file():
# Check if file name (without extension) is a 64-char hex hash
name_without_ext = file_path.stem
if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()):
file_hash = name_without_ext.lower()
# Skip if already in DB
if file_hash in db_hashes:
continue
try:
# Add file to DB (creates entry and auto-adds title: tag)
db.get_or_create_file_entry(file_path)
# Save extension metadata
file_ext = file_path.suffix
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(file_path, {
'hash': file_hash,
'ext': ext_clean,
'size': file_path.stat().st_size
})
discovered += 1
except Exception as e:
debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr)
if discovered > 0:
debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr)
db.connection.commit()
except Exception as exc:
debug(f"Migration to hash storage failed: {exc}", file=sys.stderr)
def location(self) -> str:
return self._location
def name(self) -> str:
return self._name
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Add file to local folder storage with full metadata support.
Args:
file_path: Path to the file to add
move: If True, move file instead of copy (default: False)
tags: Optional list of tags to add
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
Returns:
File hash (SHA256 hex string) as identifier
"""
move_file = bool(kwargs.get("move"))
tags = kwargs.get("tags", [])
url = kwargs.get("url", [])
title = kwargs.get("title")
# Extract title from tags if not explicitly provided
if not title:
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
title = tag.split(":", 1)[1].strip()
break
# Fallback to filename if no title
if not title:
title = file_path.name
# Ensure title is in tags
title_tag = f"title:{title}"
if not any(str(tag).lower().startswith("title:") for tag in tags):
tags = [title_tag] + list(tags)
try:
file_hash = sha256_file(file_path)
debug(f"File hash: {file_hash}", file=sys.stderr)
# Preserve extension in the stored filename
file_ext = file_path.suffix # e.g., '.mp4'
save_filename = file_hash + file_ext if file_ext else file_hash
save_file = Path(self._location) / save_filename
# Check if file already exists
from API.folder import API_folder_store
with API_folder_store(Path(self._location)) as db:
existing_path = db.search_hash(file_hash)
if existing_path and existing_path.exists():
log(
f"✓ File already in local storage: {existing_path}",
file=sys.stderr,
)
# Still add tags and url if provided
if tags:
self.add_tag(file_hash, tags)
if url:
self.add_url(file_hash, url)
return file_hash
# Move or copy file
if move_file:
shutil.move(str(file_path), str(save_file))
debug(f"Local move: {save_file}", file=sys.stderr)
else:
shutil.copy2(str(file_path), str(save_file))
debug(f"Local copy: {save_file}", file=sys.stderr)
# Save to database
with API_folder_store(Path(self._location)) as db:
db.get_or_create_file_entry(save_file)
# Save metadata including extension
ext_clean = file_ext.lstrip('.') if file_ext else ''
db.save_metadata(save_file, {
'hash': file_hash,
'ext': ext_clean,
'size': file_path.stat().st_size
})
# Add tags if provided
if tags:
self.add_tag(file_hash, tags)
# Add url if provided
if url:
self.add_url(file_hash, url)
log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr)
return file_hash
except Exception as exc:
log(f"❌ Local storage failed: {exc}", file=sys.stderr)
raise
def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search local database for files by title tag or filename."""
from fnmatch import fnmatch
from API.folder import DatabaseAPI
limit = kwargs.get("limit")
try:
limit = int(limit) if limit is not None else None
except (TypeError, ValueError):
limit = None
if isinstance(limit, int) and limit <= 0:
limit = None
query = query.lower()
query_lower = query # Ensure query_lower is defined for all code paths
match_all = query == "*"
results = []
search_dir = Path(self._location).expanduser()
tokens = [t.strip() for t in query.split(',') if t.strip()]
if not match_all and len(tokens) == 1 and _normalize_hash(query):
debug("Hash queries require 'hash:' prefix for local search")
return results
if not match_all and _normalize_hash(query):
debug("Hash queries require 'hash:' prefix for local search")
return results
def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
path_str = str(file_path)
# Get title from tags if available, otherwise use hash as fallback
title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
if not title:
# Fallback to hash if no title tag exists
hash_value = _resolve_file_hash(db_hash, file_path)
title = hash_value if hash_value else file_path.stem
# Extract extension from file path
ext = file_path.suffix.lstrip('.')
if not ext:
# Fallback: try to extract from title (original filename might be in title)
title_path = Path(title)
ext = title_path.suffix.lstrip('.')
# Build clean entry with only necessary fields
hash_value = _resolve_file_hash(db_hash, file_path)
entry = {
"title": title,
"ext": ext,
"path": path_str,
"target": path_str,
"store": self._name,
"size": size_bytes,
"hash": hash_value,
"tag": tags,
}
return entry
try:
if not search_dir.exists():
debug(f"Search directory does not exist: {search_dir}")
return results
try:
with DatabaseAPI(search_dir) as api:
if tokens and len(tokens) > 1:
def _like_pattern(term: str) -> str:
return term.replace('*', '%').replace('?', '_')
def _ids_for_token(token: str) -> set[int]:
token = token.strip()
if not token:
return set()
if ':' in token and not token.startswith(':'):
namespace, pattern = token.split(':', 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
if namespace == 'hash':
normalized_hash = _normalize_hash(pattern)
if not normalized_hash:
return set()
h = api.get_file_hash_by_hash(normalized_hash)
return {h} if h else set()
if namespace == 'store':
if pattern not in {'local', 'file', 'filesystem'}:
return set()
return api.get_all_file_hashes()
query_pattern = f"{namespace}:%"
tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern)
matched: set[str] = set()
for file_hash, tag_val in tag_rows:
if not tag_val:
continue
tag_lower = str(tag_val).lower()
if not tag_lower.startswith(f"{namespace}:"):
continue
value = tag_lower[len(namespace)+1:]
if fnmatch(value, pattern):
matched.add(file_hash)
return matched
term = token.lower()
like_pattern = f"%{_like_pattern(term)}%"
hashes = api.get_file_hashes_by_path_pattern(like_pattern)
hashes.update(api.get_file_hashes_by_tag_substring(like_pattern))
return hashes
try:
matching_hashes: set[str] | None = None
for token in tokens:
hashes = _ids_for_token(token)
matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes
if not matching_hashes:
return results
if not matching_hashes:
return results
rows = api.get_file_metadata(matching_hashes, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
except Exception as exc:
log(f"⚠️ AND search failed: {exc}", file=sys.stderr)
debug(f"AND search exception details: {exc}")
return []
if ":" in query and not query.startswith(":"):
namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}")
if namespace == "hash":
normalized_hash = _normalize_hash(pattern)
if not normalized_hash:
return results
h = api.get_file_hash_by_hash(normalized_hash)
hashes = {h} if h else set()
rows = api.get_file_metadata(hashes, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
query_pattern = f"{namespace}:%"
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
debug(f"Found {len(rows)} potential matches in DB")
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern)
for tag in tags:
tag_lower = tag.lower()
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
all_tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
results.append(entry)
else:
debug(f"File missing on disk: {file_path}")
break
if limit is not None and len(results) >= limit:
return results
elif not match_all:
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
fetch_limit = (limit or 45) * 50
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
word_regex = None
if len(terms) == 1:
term = terms[0]
has_wildcard = '*' in term or '?' in term
if has_wildcard:
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
try:
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes, file_hash in rows:
if not file_path_str or file_path_str in seen_files:
continue
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
if terms:
title_hits: dict[str, dict[str, Any]] = {}
for term in terms:
title_pattern = f"title:%{term}%"
title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
for file_hash, file_path_str, size_bytes, ext in title_rows:
if not file_path_str:
continue
entry = title_hits.get(file_hash)
if entry:
entry["count"] += 1
if size_bytes is not None:
entry["size"] = size_bytes
else:
title_hits[file_hash] = {
"path": file_path_str,
"size": size_bytes,
"hash": file_hash,
"count": 1,
}
if title_hits:
required = len(terms)
for file_hash, info in title_hits.items():
if info.get("count") != required:
continue
file_path_str = info.get("path")
if not file_path_str or file_path_str in seen_files:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
seen_files.add(file_path_str)
size_bytes = info.get("size")
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
results.append(entry)
if limit is not None and len(results) >= limit:
return results
query_pattern = f"%{query_lower}%"
tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
for file_hash, file_path_str, size_bytes, ext in tag_rows:
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
else:
rows = api.get_all_files(limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if file_path_str:
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if results:
debug(f"Returning {len(results)} results from DB")
else:
debug("No results found in DB")
return results
except Exception as e:
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
debug(f"DB search exception details: {e}")
return []
except Exception as exc:
log(f"❌ Local search failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Alias for search_file to match the interface expected by FileStorage."""
return self.search_store(query, **kwargs)
def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]:
"""Return the library root containing medios-macina.db.
Prefer the store's configured location, then config override, then walk parents
of the file path to find a directory with medios-macina.db."""
candidates: list[Path] = []
if self._location:
candidates.append(Path(self._location).expanduser())
cfg_root = get_local_storage_path(config) if config else None
if cfg_root:
candidates.append(Path(cfg_root).expanduser())
for root in candidates:
db_path = root / "medios-macina.db"
if db_path.exists():
return root
try:
for parent in [file_path] + list(file_path.parents):
db_path = parent / "medios-macina.db"
if db_path.exists():
return parent
except Exception:
pass
return None
def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
"""Retrieve file by hash, returning path to the file.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Path to the file or None if not found
"""
try:
# Normalize the hash
normalized_hash = _normalize_hash(file_hash)
if not normalized_hash:
return None
search_dir = Path(self._location).expanduser()
from API.folder import API_folder_store
with API_folder_store(search_dir) as db:
# Search for file by hash
file_path = db.search_hash(normalized_hash)
if file_path and file_path.exists():
return file_path
return None
except Exception as exc:
debug(f"Failed to get file for hash {file_hash}: {exc}")
return None
def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from the database by hash.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found
"""
try:
# Normalize the hash
normalized_hash = _normalize_hash(file_hash)
if not normalized_hash:
return None
search_dir = Path(self._location).expanduser()
from API.folder import DatabaseAPI
with DatabaseAPI(search_dir) as api:
# Get file hash
file_hash_result = api.get_file_hash_by_hash(normalized_hash)
if not file_hash_result:
return None
# Query metadata directly from database
cursor = api.get_cursor()
cursor.execute("""
SELECT * FROM metadata WHERE hash = ?
""", (file_hash_result,))
row = cursor.fetchone()
if not row:
return None
metadata = dict(row)
# Canonicalize metadata keys (no legacy aliases)
if "file_path" in metadata and "path" not in metadata:
metadata["path"] = metadata.get("file_path")
metadata.pop("file_path", None)
# Parse JSON fields
for field in ['url', 'relationships']:
if metadata.get(field):
try:
metadata[field] = json.loads(metadata[field])
except (json.JSONDecodeError, TypeError):
metadata[field] = [] if field == 'url' else []
return metadata
except Exception as exc:
debug(f"Failed to get metadata for hash {file_hash}: {exc}")
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a local file by hash.
Returns:
Tuple of (tags_list, store_name) where store_name is the actual store name
"""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
db_tags = db.get_tags(file_hash)
if db_tags:
# Return actual store name instead of generic "local_db"
store_name = self._name if self._name else "local"
return list(db_tags), store_name
except Exception as exc:
debug(f"Local DB lookup failed: {exc}")
return [], "unknown"
except Exception as exc:
debug(f"get_tags failed for local file: {exc}")
return [], "unknown"
def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool:
"""Add tags to a local file by hash (via API_folder_store).
Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags.
Returns True if tags were successfully added.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
try:
with API_folder_store(Path(self._location)) as db:
# Get existing tags
existing_tags = list(db.get_tags(hash) or [])
original_tags_lower = {t.lower() for t in existing_tags}
# Merge new tags, handling namespace overwrites
for new_tag in tag:
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
# Remove existing tags in same namespace
existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
# Add new tag if not already present (case-insensitive check)
if new_tag.lower() not in original_tags_lower:
existing_tags.append(new_tag)
# Save merged tags
db.add_tags_to_hash(hash, existing_tags)
return True
except Exception as exc:
debug(f"Local DB add_tags failed: {exc}")
return False
except Exception as exc:
debug(f"add_tag failed for local file: {exc}")
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Remove tags from a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
db.remove_tags_from_hash(file_hash, list(tags))
return True
except Exception as exc:
debug(f"Local DB remove_tags failed: {exc}")
return False
except Exception as exc:
debug(f"delete_tag failed for local file: {exc}")
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
return list(meta.get("url") or [])
except Exception as exc:
debug(f"Local DB get_metadata failed: {exc}")
return []
except Exception as exc:
debug(f"get_url failed for local file: {exc}")
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Add known url to a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = list(meta.get("url") or [])
changed = False
for u in list(url or []):
if not u:
continue
if u not in existing_urls:
existing_urls.append(u)
changed = True
if changed:
db.update_metadata_by_hash(file_hash, {"url": existing_urls})
return True
except Exception as exc:
debug(f"Local DB add_url failed: {exc}")
return False
except Exception as exc:
debug(f"add_url failed for local file: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete known url from a local file by hash."""
from API.folder import API_folder_store
try:
file_hash = file_identifier
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = list(meta.get("url") or [])
remove_set = {u for u in (url or []) if u}
if not remove_set:
return False
new_urls = [u for u in existing_urls if u not in remove_set]
if new_urls != existing_urls:
db.update_metadata_by_hash(file_hash, {"url": new_urls})
return True
except Exception as exc:
debug(f"Local DB delete_url failed: {exc}")
return False
except Exception as exc:
debug(f"delete_url failed for local file: {exc}")
return False
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
"""Delete a file from the folder store.
Args:
file_identifier: The file path (as string) or hash of the file to delete
**kwargs: Optional parameters
Returns:
True if deletion succeeded, False otherwise
"""
from API.folder import API_folder_store
try:
file_path = Path(file_identifier)
# Delete from database
with API_folder_store(Path(self._location)) as db:
db.delete_file(file_path)
# Delete the actual file from disk
if file_path.exists():
file_path.unlink()
debug(f"Deleted file: {file_path}")
return True
else:
debug(f"File not found on disk: {file_path}")
return True # Already gone
except Exception as exc:
debug(f"delete_file failed: {exc}")
return False

597
Store/HydrusNetwork.py Normal file
View File

@@ -0,0 +1,597 @@
from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
from Store._base import StoreBackend
class HydrusNetwork(StoreBackend):
"""File storage backend for Hydrus client.
Each instance represents a specific Hydrus client connection.
Maintains its own HydrusClient with session key.
"""
def __init__(self, instance_name: str, api_key: str, url: str) -> None:
"""Initialize Hydrus storage backend.
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
"""
from API.HydrusNetwork import HydrusClient
self._instance_name = instance_name
self._api_key = api_key
self._url = url
# Create persistent client with session key for this instance
self._client = HydrusClient(url=url, access_key=api_key)
def name(self) -> str:
return self._instance_name
def get_name(self) -> str:
return self._instance_name
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
Args:
file_path: Path to the file to upload
tags: Optional list of tags to add
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
Returns:
File hash from Hydrus
Raises:
Exception: If upload fails
"""
from SYS.utils import sha256_file
tags = kwargs.get("tags", [])
url = kwargs.get("url", [])
title = kwargs.get("title")
# Add title to tags if provided and not already present
if title:
title_tag = f"title:{title}"
if not any(str(tag).lower().startswith("title:") for tag in tags):
tags = [title_tag] + list(tags)
try:
# Compute file hash
file_hash = sha256_file(file_path)
debug(f"File hash: {file_hash}")
# Use persistent client with session key
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
# Check if file already exists in Hydrus
file_exists = False
try:
metadata = client.fetch_file_metadata(hashes=[file_hash])
if metadata and isinstance(metadata, dict):
files = metadata.get("file_metadata", [])
if files:
file_exists = True
log(
f" Duplicate detected - file already in Hydrus with hash: {file_hash}",
file=sys.stderr,
)
except Exception:
pass
# Upload file if not already present
if not file_exists:
log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
log(f"Hydrus: {file_hash}", file=sys.stderr)
# Add tags if provided (both for new and existing files)
if tags:
try:
# Use default tag service
service_name = "my tags"
except Exception:
service_name = "my tags"
try:
debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}")
client.add_tags(file_hash, tags, service_name)
log(f"Tags added via '{service_name}'", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr)
# Associate url if provided (both for new and existing files)
if url:
log(f"Associating {len(url)} URL(s) with file", file=sys.stderr)
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
debug(f"Associated URL: {url}")
except Exception as exc:
log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
return file_hash
except Exception as exc:
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
raise
def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Hydrus database for files matching query.
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
limit = kwargs.get("limit", 100)
try:
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
debug(f"Searching Hydrus for: {query}")
# Parse the query into tags
# Handle both simple tags and complex queries
# "*" means "match all" - use system:everything tag in Hydrus
if query.strip() == "*":
# Use system:everything to match all files in Hydrus
tags = ["system:everything"]
else:
query_lower = query.lower().strip()
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
# If query has explicit namespace, use it as a tag search
if ':' not in query_lower:
# No namespace provided: search all files, then filter by title/tags containing the query
tags = ["system:everything"]
else:
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
# Use it as a tag search
tags = [query_lower]
if not tags:
debug(f"Found 0 result(s)")
return []
# Search files with the tags
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
# Extract file IDs from search result
file_ids = search_result.get("file_ids", [])
hashes = search_result.get("hashes", [])
if not file_ids and not hashes:
debug(f"Found 0 result(s)")
return []
# Fetch metadata for the found files
results = []
query_lower = query.lower().strip()
# Split by comma or space for AND logic
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
if file_ids:
metadata = client.fetch_file_metadata(file_ids=file_ids)
metadata_list = metadata.get("metadata", [])
for meta in metadata_list:
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_text.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Resolve extension from MIME type
mime_type = meta.get("mime")
ext = ""
if mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = info.get("ext", "").lstrip('.')
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self._url.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self._instance_name,
"tags": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
else:
# Free-form search: check if search terms match the title or tags
# Match if ALL search terms are found in title or tags (AND logic)
# AND use whole word matching
# Combine title and tags for searching
searchable_text = (title + " " + all_tags_str).lower()
match = True
if query_lower != "*":
for term in search_terms:
# Regex for whole word: \bterm\b
# Escape term to handle special chars
pattern = r'\b' + re.escape(term) + r'\b'
if not re.search(pattern, searchable_text):
match = False
break
if match:
file_url = f"{self._url.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self._instance_name,
"tags": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
debug(f"Found {len(results)} result(s)")
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
"""Open file in browser via Hydrus client API URL."""
import webbrowser
debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...")
# Build browser URL with access key
base_url = self._client.url.rstrip('/')
access_key = self._client.access_key
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}")
# Open in default browser
webbrowser.open(browser_url)
debug(f"[HydrusNetwork.get_file] Browser opened successfully")
# Return the URL string instead of downloading
debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}")
return browser_url
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Dict with metadata fields or None if not found
"""
try:
client = self._client
if not client:
debug("get_metadata: Hydrus client unavailable")
return None
# Fetch file metadata
payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True)
if not payload or not payload.get("metadata"):
return None
meta = payload["metadata"][0]
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
tags_payload = meta.get("tags", {})
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
display_tags = service_data.get("display_tags", {})
if isinstance(display_tags, dict):
current_tags = display_tags.get("0", [])
if isinstance(current_tags, list):
for tag in current_tags:
if str(tag).lower().startswith("title:"):
title = tag.split(":", 1)[1].strip()
break
if title != f"Hydrus_{file_hash[:12]}":
break
# Determine extension from mime type
mime_type = meta.get("mime", "")
ext = ""
if mime_type:
from SYS.utils_constant import mime_maps
for _category, extensions in mime_maps.items():
for extension, mime in extensions.items():
if mime == mime_type:
ext = extension.lstrip(".")
break
if ext:
break
return {
"hash": file_hash,
"title": title,
"ext": ext,
"size": meta.get("size", 0),
"mime": mime_type,
}
except Exception as exc:
debug(f"Failed to get metadata from Hydrus: {exc}")
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
"""
try:
from API import HydrusNetwork as hydrus_wrapper
file_hash = str(file_identifier)
# Get Hydrus client and service info
client = self._client
if not client:
debug("get_tags: Hydrus client unavailable")
return [], "unknown"
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(f"get_tags: No metadata returned for hash {file_hash}")
return [], "unknown"
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(f"get_tags: Invalid metadata for hash {file_hash}")
return [], "unknown"
# Extract tags using service name
service_name = "my tags"
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return tags, "hydrus"
except Exception as exc:
debug(f"get_tags failed for Hydrus file: {exc}")
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Add tags to a Hydrus file.
"""
try:
client = self._client
if client is None:
debug("add_tag: Hydrus client unavailable")
return False
service_name = kwargs.get("service_name") or "my tags"
# Ensure tags is a list
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.add_tags(file_identifier, tag_list, service_name)
return True
except Exception as exc:
debug(f"Hydrus add_tag failed: {exc}")
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Delete tags from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug("delete_tag: Hydrus client unavailable")
return False
service_name = kwargs.get("service_name") or "my tags"
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.delete_tags(file_identifier, tag_list, service_name)
return True
except Exception as exc:
debug(f"Hydrus delete_tag failed: {exc}")
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a Hydrus file.
"""
try:
client = self._client
if client is None:
debug("get_url: Hydrus client unavailable")
return []
payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
meta = items[0]
url = meta.get("url") or []
return list(url)
except Exception as exc:
debug(f"Hydrus get_url failed: {exc}")
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Associate one or more url with a Hydrus file.
"""
try:
client = self._client
if client is None:
debug("add_url: Hydrus client unavailable")
return False
for u in url:
client.associate_url(file_identifier, u)
return True
except Exception as exc:
debug(f"Hydrus add_url failed: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug("delete_url: Hydrus client unavailable")
return False
for u in url:
client.delete_url(file_identifier, u)
return True
except Exception as exc:
debug(f"Hydrus delete_url failed: {exc}")
return False
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],
service_key: Optional[str],
service_name: str
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return []

7
Store/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
from Store._base import StoreBackend
from Store.registry import Store
__all__ = [
"StoreBackend",
"Store",
]

55
Store/_base.py Normal file
View File

@@ -0,0 +1,55 @@
"""Store backend base types.
Concrete store implementations live in the `Store/` package.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
class StoreBackend(ABC):
@abstractmethod
def add_file(self, file_path: Path, **kwargs: Any) -> str:
raise NotImplementedError
@abstractmethod
def name(self) -> str:
raise NotImplementedError
def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
raise NotImplementedError(f"{self.name()} backend does not support searching")
@abstractmethod
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
raise NotImplementedError
@abstractmethod
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
raise NotImplementedError
@abstractmethod
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
raise NotImplementedError
@abstractmethod
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
raise NotImplementedError
@abstractmethod
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
raise NotImplementedError
@abstractmethod
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
raise NotImplementedError
@abstractmethod
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
raise NotImplementedError
@abstractmethod
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
raise NotImplementedError

99
Store/registry.py Normal file
View File

@@ -0,0 +1,99 @@
"""Store registry.
Concrete store implementations live in the `Store/` package.
This module is the single source of truth for store discovery.
Config schema (canonical):
{
"store": {
"folder": {
"default": {"path": "C:/Media"},
"test": {"path": "C:/Temp"}
},
"hydrusnetwork": {
"home": {"Hydrus-Client-API-Access-Key": "...", "url": "http://..."}
}
}
}
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional
from SYS.logger import debug
from Store._base import StoreBackend
from Store.Folder import Folder
from Store.HydrusNetwork import HydrusNetwork
class Store:
def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None:
self._config = config or {}
self._suppress_debug = suppress_debug
self._backends: Dict[str, StoreBackend] = {}
self._load_backends()
def _load_backends(self) -> None:
store_cfg = self._config.get("store")
if not isinstance(store_cfg, dict):
store_cfg = {}
folder_cfg = store_cfg.get("folder")
if isinstance(folder_cfg, dict):
for name, value in folder_cfg.items():
path_val: Optional[str]
if isinstance(value, dict):
path_val = value.get("path")
elif isinstance(value, (str, bytes)):
path_val = str(value)
else:
path_val = None
if not path_val:
continue
location = str(Path(str(path_val)).expanduser())
self._backends[str(name)] = Folder(location=location, name=str(name))
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
for instance_name, instance_config in hydrus_cfg.items():
if not isinstance(instance_config, dict):
continue
api_key = instance_config.get("Hydrus-Client-API-Access-Key")
url = instance_config.get("url")
if not api_key or not url:
continue
try:
self._backends[str(instance_name)] = HydrusNetwork(
instance_name=str(instance_name),
api_key=str(api_key),
url=str(url),
)
except Exception as exc:
if not self._suppress_debug:
debug(f"[Store] Failed to register Hydrus instance '{instance_name}': {exc}")
def list_backends(self) -> list[str]:
return sorted(self._backends.keys())
def list_searchable_backends(self) -> list[str]:
searchable: list[str] = []
for name, backend in self._backends.items():
if type(backend).search_store is not StoreBackend.search_store:
searchable.append(name)
return sorted(searchable)
def __getitem__(self, backend_name: str) -> StoreBackend:
if backend_name not in self._backends:
raise KeyError(f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}")
return self._backends[backend_name]
def is_available(self, backend_name: str) -> bool:
return backend_name in self._backends