dfdsf
This commit is contained in:
977
Store/Folder.py
Normal file
977
Store/Folder.py
Normal file
@@ -0,0 +1,977 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from fnmatch import translate
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from SYS.logger import debug, log
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
from Store._base import StoreBackend
|
||||
|
||||
|
||||
def _normalize_hash(value: Any) -> Optional[str]:
|
||||
candidate = str(value or '').strip().lower()
|
||||
if len(candidate) != 64:
|
||||
return None
|
||||
if any(ch not in '0123456789abcdef' for ch in candidate):
|
||||
return None
|
||||
return candidate
|
||||
|
||||
|
||||
def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]:
|
||||
normalized = _normalize_hash(db_hash) if db_hash else None
|
||||
if normalized:
|
||||
return normalized
|
||||
return _normalize_hash(file_path.stem)
|
||||
|
||||
|
||||
class Folder(StoreBackend):
|
||||
""""""
|
||||
# Track which locations have already been migrated to avoid repeated migrations
|
||||
_migrated_locations = set()
|
||||
|
||||
def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None:
|
||||
self._location = location
|
||||
self._name = name
|
||||
|
||||
if self._location:
|
||||
try:
|
||||
from API.folder import API_folder_store
|
||||
from pathlib import Path
|
||||
location_path = Path(self._location).expanduser()
|
||||
|
||||
# Use context manager to ensure connection is properly closed
|
||||
with API_folder_store(location_path) as db:
|
||||
if db.connection:
|
||||
db.connection.commit()
|
||||
|
||||
# Call migration and discovery at startup
|
||||
Folder.migrate_location(self._location)
|
||||
except Exception as exc:
|
||||
debug(f"Failed to initialize database for '{name}': {exc}")
|
||||
|
||||
@classmethod
|
||||
def migrate_location(cls, location: Optional[str]) -> None:
|
||||
"""Migrate a location to hash-based storage (one-time operation, call explicitly at startup)."""
|
||||
if not location:
|
||||
return
|
||||
|
||||
from pathlib import Path
|
||||
location_path = Path(location).expanduser()
|
||||
location_str = str(location_path)
|
||||
|
||||
# Only migrate once per location
|
||||
if location_str in cls._migrated_locations:
|
||||
return
|
||||
|
||||
cls._migrated_locations.add(location_str)
|
||||
|
||||
# Create a temporary instance just to call the migration
|
||||
temp_instance = cls(location=location)
|
||||
temp_instance._migrate_to_hash_storage(location_path)
|
||||
|
||||
def _migrate_to_hash_storage(self, location_path: Path) -> None:
|
||||
"""Migrate existing files from filename-based to hash-based storage.
|
||||
|
||||
Checks for sidecars (.metadata, .tag) and imports them before renaming.
|
||||
Also ensures all files have a title: tag.
|
||||
"""
|
||||
from API.folder import API_folder_store, read_sidecar, write_sidecar, find_sidecar
|
||||
|
||||
try:
|
||||
with API_folder_store(location_path) as db:
|
||||
cursor = db.connection.cursor()
|
||||
|
||||
# First pass: migrate filename-based files and add title tags
|
||||
# Scan all files in the storage directory
|
||||
for file_path in sorted(location_path.iterdir()):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
# Skip database files and sidecars
|
||||
if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'):
|
||||
continue
|
||||
# Also skip if the file ends with -shm or -wal (SQLite journal files)
|
||||
if file_path.name.endswith(('-shm', '-wal')):
|
||||
continue
|
||||
|
||||
# Check if filename is already a hash (without extension)
|
||||
if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()):
|
||||
continue # Already migrated, will process in second pass
|
||||
|
||||
try:
|
||||
# Compute file hash
|
||||
file_hash = sha256_file(file_path)
|
||||
# Preserve extension in the hash-based filename
|
||||
file_ext = file_path.suffix # e.g., '.mp4'
|
||||
hash_filename = file_hash + file_ext if file_ext else file_hash
|
||||
hash_path = location_path / hash_filename
|
||||
|
||||
# Check for sidecars and import them
|
||||
sidecar_path = find_sidecar(file_path)
|
||||
tags_to_add = []
|
||||
url_to_add = []
|
||||
has_title_tag = False
|
||||
|
||||
if sidecar_path and sidecar_path.exists():
|
||||
try:
|
||||
_, tags, url = read_sidecar(sidecar_path)
|
||||
if tags:
|
||||
tags_to_add = list(tags)
|
||||
# Check if title tag exists
|
||||
has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add)
|
||||
if url:
|
||||
url_to_add = list(url)
|
||||
debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr)
|
||||
# Delete the sidecar after importing
|
||||
sidecar_path.unlink()
|
||||
except Exception as exc:
|
||||
debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr)
|
||||
|
||||
# Ensure there's a title tag (use original filename if not present)
|
||||
if not has_title_tag:
|
||||
tags_to_add.append(f"title:{file_path.name}")
|
||||
|
||||
# Rename file to hash if needed
|
||||
if hash_path != file_path and not hash_path.exists():
|
||||
debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr)
|
||||
file_path.rename(hash_path)
|
||||
|
||||
# Create or update database entry
|
||||
db.get_or_create_file_entry(hash_path)
|
||||
|
||||
# Save extension metadata
|
||||
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
||||
db.save_metadata(hash_path, {
|
||||
'hash': file_hash,
|
||||
'ext': ext_clean,
|
||||
'size': hash_path.stat().st_size
|
||||
})
|
||||
|
||||
# Add all tags (including title tag)
|
||||
if tags_to_add:
|
||||
db.save_tags(hash_path, tags_to_add)
|
||||
debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr)
|
||||
|
||||
# Note: url would need a separate table if you want to store them
|
||||
# For now, we're just noting them in debug
|
||||
if url_to_add:
|
||||
debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr)
|
||||
|
||||
except Exception as exc:
|
||||
debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr)
|
||||
|
||||
# Second pass: ensure all files in database have a title: tag
|
||||
db.connection.commit()
|
||||
cursor.execute('''
|
||||
SELECT f.hash, f.file_path
|
||||
FROM files f
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%'
|
||||
)
|
||||
''')
|
||||
files_without_title = cursor.fetchall()
|
||||
|
||||
for file_hash, file_path_str in files_without_title:
|
||||
try:
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
# Use the filename as the title
|
||||
title_tag = f"title:{file_path.name}"
|
||||
db.save_tags(file_path, [title_tag])
|
||||
debug(f"Added title tag to {file_path.name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr)
|
||||
|
||||
db.connection.commit()
|
||||
|
||||
# Third pass: discover files on disk that aren't in the database yet
|
||||
# These are hash-named files that were added after initial indexing
|
||||
cursor.execute('SELECT LOWER(hash) FROM files')
|
||||
db_hashes = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
discovered = 0
|
||||
for file_path in sorted(location_path.rglob("*")):
|
||||
if file_path.is_file():
|
||||
# Check if file name (without extension) is a 64-char hex hash
|
||||
name_without_ext = file_path.stem
|
||||
if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()):
|
||||
file_hash = name_without_ext.lower()
|
||||
|
||||
# Skip if already in DB
|
||||
if file_hash in db_hashes:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Add file to DB (creates entry and auto-adds title: tag)
|
||||
db.get_or_create_file_entry(file_path)
|
||||
|
||||
# Save extension metadata
|
||||
file_ext = file_path.suffix
|
||||
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
||||
db.save_metadata(file_path, {
|
||||
'hash': file_hash,
|
||||
'ext': ext_clean,
|
||||
'size': file_path.stat().st_size
|
||||
})
|
||||
|
||||
discovered += 1
|
||||
except Exception as e:
|
||||
debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr)
|
||||
|
||||
if discovered > 0:
|
||||
debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr)
|
||||
db.connection.commit()
|
||||
except Exception as exc:
|
||||
debug(f"Migration to hash storage failed: {exc}", file=sys.stderr)
|
||||
|
||||
|
||||
def location(self) -> str:
|
||||
return self._location
|
||||
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
||||
"""Add file to local folder storage with full metadata support.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to add
|
||||
move: If True, move file instead of copy (default: False)
|
||||
tags: Optional list of tags to add
|
||||
url: Optional list of url to associate with the file
|
||||
title: Optional title (will be added as 'title:value' tag)
|
||||
|
||||
Returns:
|
||||
File hash (SHA256 hex string) as identifier
|
||||
"""
|
||||
move_file = bool(kwargs.get("move"))
|
||||
tags = kwargs.get("tags", [])
|
||||
url = kwargs.get("url", [])
|
||||
title = kwargs.get("title")
|
||||
|
||||
# Extract title from tags if not explicitly provided
|
||||
if not title:
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith("title:"):
|
||||
title = tag.split(":", 1)[1].strip()
|
||||
break
|
||||
|
||||
# Fallback to filename if no title
|
||||
if not title:
|
||||
title = file_path.name
|
||||
|
||||
# Ensure title is in tags
|
||||
title_tag = f"title:{title}"
|
||||
if not any(str(tag).lower().startswith("title:") for tag in tags):
|
||||
tags = [title_tag] + list(tags)
|
||||
|
||||
try:
|
||||
file_hash = sha256_file(file_path)
|
||||
debug(f"File hash: {file_hash}", file=sys.stderr)
|
||||
|
||||
# Preserve extension in the stored filename
|
||||
file_ext = file_path.suffix # e.g., '.mp4'
|
||||
save_filename = file_hash + file_ext if file_ext else file_hash
|
||||
save_file = Path(self._location) / save_filename
|
||||
|
||||
# Check if file already exists
|
||||
from API.folder import API_folder_store
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
existing_path = db.search_hash(file_hash)
|
||||
if existing_path and existing_path.exists():
|
||||
log(
|
||||
f"✓ File already in local storage: {existing_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Still add tags and url if provided
|
||||
if tags:
|
||||
self.add_tag(file_hash, tags)
|
||||
if url:
|
||||
self.add_url(file_hash, url)
|
||||
return file_hash
|
||||
|
||||
# Move or copy file
|
||||
if move_file:
|
||||
shutil.move(str(file_path), str(save_file))
|
||||
debug(f"Local move: {save_file}", file=sys.stderr)
|
||||
else:
|
||||
shutil.copy2(str(file_path), str(save_file))
|
||||
debug(f"Local copy: {save_file}", file=sys.stderr)
|
||||
|
||||
# Save to database
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
db.get_or_create_file_entry(save_file)
|
||||
# Save metadata including extension
|
||||
ext_clean = file_ext.lstrip('.') if file_ext else ''
|
||||
db.save_metadata(save_file, {
|
||||
'hash': file_hash,
|
||||
'ext': ext_clean,
|
||||
'size': file_path.stat().st_size
|
||||
})
|
||||
|
||||
# Add tags if provided
|
||||
if tags:
|
||||
self.add_tag(file_hash, tags)
|
||||
|
||||
# Add url if provided
|
||||
if url:
|
||||
self.add_url(file_hash, url)
|
||||
|
||||
log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr)
|
||||
return file_hash
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Local storage failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||||
"""Search local database for files by title tag or filename."""
|
||||
from fnmatch import fnmatch
|
||||
from API.folder import DatabaseAPI
|
||||
|
||||
limit = kwargs.get("limit")
|
||||
try:
|
||||
limit = int(limit) if limit is not None else None
|
||||
except (TypeError, ValueError):
|
||||
limit = None
|
||||
if isinstance(limit, int) and limit <= 0:
|
||||
limit = None
|
||||
|
||||
query = query.lower()
|
||||
query_lower = query # Ensure query_lower is defined for all code paths
|
||||
match_all = query == "*"
|
||||
results = []
|
||||
search_dir = Path(self._location).expanduser()
|
||||
|
||||
tokens = [t.strip() for t in query.split(',') if t.strip()]
|
||||
|
||||
if not match_all and len(tokens) == 1 and _normalize_hash(query):
|
||||
debug("Hash queries require 'hash:' prefix for local search")
|
||||
return results
|
||||
|
||||
if not match_all and _normalize_hash(query):
|
||||
debug("Hash queries require 'hash:' prefix for local search")
|
||||
return results
|
||||
|
||||
def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
|
||||
path_str = str(file_path)
|
||||
# Get title from tags if available, otherwise use hash as fallback
|
||||
title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
if not title:
|
||||
# Fallback to hash if no title tag exists
|
||||
hash_value = _resolve_file_hash(db_hash, file_path)
|
||||
title = hash_value if hash_value else file_path.stem
|
||||
|
||||
# Extract extension from file path
|
||||
ext = file_path.suffix.lstrip('.')
|
||||
if not ext:
|
||||
# Fallback: try to extract from title (original filename might be in title)
|
||||
title_path = Path(title)
|
||||
ext = title_path.suffix.lstrip('.')
|
||||
|
||||
# Build clean entry with only necessary fields
|
||||
hash_value = _resolve_file_hash(db_hash, file_path)
|
||||
entry = {
|
||||
"title": title,
|
||||
"ext": ext,
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"store": self._name,
|
||||
"size": size_bytes,
|
||||
"hash": hash_value,
|
||||
"tag": tags,
|
||||
}
|
||||
return entry
|
||||
|
||||
try:
|
||||
if not search_dir.exists():
|
||||
debug(f"Search directory does not exist: {search_dir}")
|
||||
return results
|
||||
|
||||
try:
|
||||
with DatabaseAPI(search_dir) as api:
|
||||
if tokens and len(tokens) > 1:
|
||||
def _like_pattern(term: str) -> str:
|
||||
return term.replace('*', '%').replace('?', '_')
|
||||
|
||||
def _ids_for_token(token: str) -> set[int]:
|
||||
token = token.strip()
|
||||
if not token:
|
||||
return set()
|
||||
|
||||
if ':' in token and not token.startswith(':'):
|
||||
namespace, pattern = token.split(':', 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
|
||||
if namespace == 'hash':
|
||||
normalized_hash = _normalize_hash(pattern)
|
||||
if not normalized_hash:
|
||||
return set()
|
||||
h = api.get_file_hash_by_hash(normalized_hash)
|
||||
return {h} if h else set()
|
||||
|
||||
if namespace == 'store':
|
||||
if pattern not in {'local', 'file', 'filesystem'}:
|
||||
return set()
|
||||
return api.get_all_file_hashes()
|
||||
|
||||
query_pattern = f"{namespace}:%"
|
||||
tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern)
|
||||
matched: set[str] = set()
|
||||
for file_hash, tag_val in tag_rows:
|
||||
if not tag_val:
|
||||
continue
|
||||
tag_lower = str(tag_val).lower()
|
||||
if not tag_lower.startswith(f"{namespace}:"):
|
||||
continue
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
if fnmatch(value, pattern):
|
||||
matched.add(file_hash)
|
||||
return matched
|
||||
|
||||
term = token.lower()
|
||||
like_pattern = f"%{_like_pattern(term)}%"
|
||||
hashes = api.get_file_hashes_by_path_pattern(like_pattern)
|
||||
hashes.update(api.get_file_hashes_by_tag_substring(like_pattern))
|
||||
return hashes
|
||||
|
||||
try:
|
||||
matching_hashes: set[str] | None = None
|
||||
for token in tokens:
|
||||
hashes = _ids_for_token(token)
|
||||
matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes
|
||||
if not matching_hashes:
|
||||
return results
|
||||
|
||||
if not matching_hashes:
|
||||
return results
|
||||
|
||||
rows = api.get_file_metadata(matching_hashes, limit)
|
||||
for file_hash, file_path_str, size_bytes, ext in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
file_path = Path(file_path_str)
|
||||
if not file_path.exists():
|
||||
continue
|
||||
if size_bytes is None:
|
||||
try:
|
||||
size_bytes = file_path.stat().st_size
|
||||
except OSError:
|
||||
size_bytes = None
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
return results
|
||||
except Exception as exc:
|
||||
log(f"⚠️ AND search failed: {exc}", file=sys.stderr)
|
||||
debug(f"AND search exception details: {exc}")
|
||||
return []
|
||||
|
||||
if ":" in query and not query.startswith(":"):
|
||||
namespace, pattern = query.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||||
|
||||
if namespace == "hash":
|
||||
normalized_hash = _normalize_hash(pattern)
|
||||
if not normalized_hash:
|
||||
return results
|
||||
h = api.get_file_hash_by_hash(normalized_hash)
|
||||
hashes = {h} if h else set()
|
||||
rows = api.get_file_metadata(hashes, limit)
|
||||
for file_hash, file_path_str, size_bytes, ext in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
file_path = Path(file_path_str)
|
||||
if not file_path.exists():
|
||||
continue
|
||||
if size_bytes is None:
|
||||
try:
|
||||
size_bytes = file_path.stat().st_size
|
||||
except OSError:
|
||||
size_bytes = None
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
return results
|
||||
|
||||
query_pattern = f"{namespace}:%"
|
||||
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
|
||||
debug(f"Found {len(rows)} potential matches in DB")
|
||||
|
||||
for file_hash, file_path_str, size_bytes, ext in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
|
||||
tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern)
|
||||
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if tag_lower.startswith(f"{namespace}:"):
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
if fnmatch(value, pattern):
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
all_tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
else:
|
||||
debug(f"File missing on disk: {file_path}")
|
||||
break
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
elif not match_all:
|
||||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||||
if not terms:
|
||||
terms = [query_lower]
|
||||
|
||||
debug(f"Performing filename/tag search for terms: {terms}")
|
||||
|
||||
fetch_limit = (limit or 45) * 50
|
||||
|
||||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||||
params = [f"%{t}%" for t in terms]
|
||||
|
||||
rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
|
||||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||||
|
||||
word_regex = None
|
||||
if len(terms) == 1:
|
||||
term = terms[0]
|
||||
has_wildcard = '*' in term or '?' in term
|
||||
|
||||
if has_wildcard:
|
||||
try:
|
||||
from fnmatch import translate
|
||||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
else:
|
||||
try:
|
||||
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
||||
word_regex = re.compile(pattern, re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
|
||||
seen_files = set()
|
||||
for file_id, file_path_str, size_bytes, file_hash in rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
|
||||
if word_regex:
|
||||
p = Path(file_path_str)
|
||||
if not word_regex.search(p.name):
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
if terms:
|
||||
title_hits: dict[str, dict[str, Any]] = {}
|
||||
for term in terms:
|
||||
title_pattern = f"title:%{term}%"
|
||||
title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
|
||||
for file_hash, file_path_str, size_bytes, ext in title_rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
entry = title_hits.get(file_hash)
|
||||
if entry:
|
||||
entry["count"] += 1
|
||||
if size_bytes is not None:
|
||||
entry["size"] = size_bytes
|
||||
else:
|
||||
title_hits[file_hash] = {
|
||||
"path": file_path_str,
|
||||
"size": size_bytes,
|
||||
"hash": file_hash,
|
||||
"count": 1,
|
||||
}
|
||||
|
||||
if title_hits:
|
||||
required = len(terms)
|
||||
for file_hash, info in title_hits.items():
|
||||
if info.get("count") != required:
|
||||
continue
|
||||
file_path_str = info.get("path")
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
file_path = Path(file_path_str)
|
||||
if not file_path.exists():
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
size_bytes = info.get("size")
|
||||
if size_bytes is None:
|
||||
try:
|
||||
size_bytes = file_path.stat().st_size
|
||||
except OSError:
|
||||
size_bytes = None
|
||||
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
|
||||
results.append(entry)
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
query_pattern = f"%{query_lower}%"
|
||||
tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
|
||||
|
||||
for file_hash, file_path_str, size_bytes, ext in tag_rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
else:
|
||||
rows = api.get_all_files(limit)
|
||||
for file_hash, file_path_str, size_bytes, ext in rows:
|
||||
if file_path_str:
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
tags = api.get_tags_for_file(file_hash)
|
||||
entry = _create_entry(file_path, tags, size_bytes, file_hash)
|
||||
results.append(entry)
|
||||
|
||||
if results:
|
||||
debug(f"Returning {len(results)} results from DB")
|
||||
else:
|
||||
debug("No results found in DB")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
|
||||
debug(f"DB search exception details: {e}")
|
||||
return []
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Local search failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||||
"""Alias for search_file to match the interface expected by FileStorage."""
|
||||
return self.search_store(query, **kwargs)
|
||||
|
||||
def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]:
|
||||
"""Return the library root containing medios-macina.db.
|
||||
|
||||
Prefer the store's configured location, then config override, then walk parents
|
||||
of the file path to find a directory with medios-macina.db."""
|
||||
candidates: list[Path] = []
|
||||
if self._location:
|
||||
candidates.append(Path(self._location).expanduser())
|
||||
cfg_root = get_local_storage_path(config) if config else None
|
||||
if cfg_root:
|
||||
candidates.append(Path(cfg_root).expanduser())
|
||||
|
||||
for root in candidates:
|
||||
db_path = root / "medios-macina.db"
|
||||
if db_path.exists():
|
||||
return root
|
||||
|
||||
try:
|
||||
for parent in [file_path] + list(file_path.parents):
|
||||
db_path = parent / "medios-macina.db"
|
||||
if db_path.exists():
|
||||
return parent
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
|
||||
"""Retrieve file by hash, returning path to the file.
|
||||
|
||||
Args:
|
||||
file_hash: SHA256 hash of the file (64-char hex string)
|
||||
|
||||
Returns:
|
||||
Path to the file or None if not found
|
||||
"""
|
||||
try:
|
||||
# Normalize the hash
|
||||
normalized_hash = _normalize_hash(file_hash)
|
||||
if not normalized_hash:
|
||||
return None
|
||||
|
||||
search_dir = Path(self._location).expanduser()
|
||||
from API.folder import API_folder_store
|
||||
|
||||
with API_folder_store(search_dir) as db:
|
||||
# Search for file by hash
|
||||
file_path = db.search_hash(normalized_hash)
|
||||
|
||||
if file_path and file_path.exists():
|
||||
return file_path
|
||||
|
||||
return None
|
||||
|
||||
except Exception as exc:
|
||||
debug(f"Failed to get file for hash {file_hash}: {exc}")
|
||||
return None
|
||||
|
||||
def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get metadata for a file from the database by hash.
|
||||
|
||||
Args:
|
||||
file_hash: SHA256 hash of the file (64-char hex string)
|
||||
|
||||
Returns:
|
||||
Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found
|
||||
"""
|
||||
try:
|
||||
# Normalize the hash
|
||||
normalized_hash = _normalize_hash(file_hash)
|
||||
if not normalized_hash:
|
||||
return None
|
||||
|
||||
search_dir = Path(self._location).expanduser()
|
||||
from API.folder import DatabaseAPI
|
||||
|
||||
with DatabaseAPI(search_dir) as api:
|
||||
# Get file hash
|
||||
file_hash_result = api.get_file_hash_by_hash(normalized_hash)
|
||||
if not file_hash_result:
|
||||
return None
|
||||
|
||||
# Query metadata directly from database
|
||||
cursor = api.get_cursor()
|
||||
cursor.execute("""
|
||||
SELECT * FROM metadata WHERE hash = ?
|
||||
""", (file_hash_result,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
|
||||
metadata = dict(row)
|
||||
|
||||
# Canonicalize metadata keys (no legacy aliases)
|
||||
if "file_path" in metadata and "path" not in metadata:
|
||||
metadata["path"] = metadata.get("file_path")
|
||||
metadata.pop("file_path", None)
|
||||
|
||||
# Parse JSON fields
|
||||
for field in ['url', 'relationships']:
|
||||
if metadata.get(field):
|
||||
try:
|
||||
metadata[field] = json.loads(metadata[field])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
metadata[field] = [] if field == 'url' else []
|
||||
|
||||
return metadata
|
||||
except Exception as exc:
|
||||
debug(f"Failed to get metadata for hash {file_hash}: {exc}")
|
||||
return None
|
||||
|
||||
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
||||
"""Get tags for a local file by hash.
|
||||
|
||||
Returns:
|
||||
Tuple of (tags_list, store_name) where store_name is the actual store name
|
||||
"""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_hash = file_identifier
|
||||
if self._location:
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
db_tags = db.get_tags(file_hash)
|
||||
if db_tags:
|
||||
# Return actual store name instead of generic "local_db"
|
||||
store_name = self._name if self._name else "local"
|
||||
return list(db_tags), store_name
|
||||
except Exception as exc:
|
||||
debug(f"Local DB lookup failed: {exc}")
|
||||
return [], "unknown"
|
||||
except Exception as exc:
|
||||
debug(f"get_tags failed for local file: {exc}")
|
||||
return [], "unknown"
|
||||
|
||||
def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool:
|
||||
"""Add tags to a local file by hash (via API_folder_store).
|
||||
|
||||
Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags.
|
||||
Returns True if tags were successfully added.
|
||||
"""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
if not self._location:
|
||||
return False
|
||||
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
# Get existing tags
|
||||
existing_tags = list(db.get_tags(hash) or [])
|
||||
original_tags_lower = {t.lower() for t in existing_tags}
|
||||
|
||||
# Merge new tags, handling namespace overwrites
|
||||
for new_tag in tag:
|
||||
if ':' in new_tag:
|
||||
namespace = new_tag.split(':', 1)[0]
|
||||
# Remove existing tags in same namespace
|
||||
existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
|
||||
# Add new tag if not already present (case-insensitive check)
|
||||
if new_tag.lower() not in original_tags_lower:
|
||||
existing_tags.append(new_tag)
|
||||
|
||||
# Save merged tags
|
||||
db.add_tags_to_hash(hash, existing_tags)
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Local DB add_tags failed: {exc}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
debug(f"add_tag failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
||||
"""Remove tags from a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_hash = file_identifier
|
||||
if self._location:
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
db.remove_tags_from_hash(file_hash, list(tags))
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Local DB remove_tags failed: {exc}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
debug(f"delete_tag failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
||||
"""Get known url for a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_hash = file_identifier
|
||||
if self._location:
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
meta = db.get_metadata(file_hash) or {}
|
||||
return list(meta.get("url") or [])
|
||||
except Exception as exc:
|
||||
debug(f"Local DB get_metadata failed: {exc}")
|
||||
return []
|
||||
except Exception as exc:
|
||||
debug(f"get_url failed for local file: {exc}")
|
||||
return []
|
||||
|
||||
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||||
"""Add known url to a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_hash = file_identifier
|
||||
if self._location:
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
meta = db.get_metadata(file_hash) or {}
|
||||
existing_urls = list(meta.get("url") or [])
|
||||
changed = False
|
||||
for u in list(url or []):
|
||||
if not u:
|
||||
continue
|
||||
if u not in existing_urls:
|
||||
existing_urls.append(u)
|
||||
changed = True
|
||||
if changed:
|
||||
db.update_metadata_by_hash(file_hash, {"url": existing_urls})
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Local DB add_url failed: {exc}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
debug(f"add_url failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||||
"""Delete known url from a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_hash = file_identifier
|
||||
if self._location:
|
||||
try:
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
meta = db.get_metadata(file_hash) or {}
|
||||
existing_urls = list(meta.get("url") or [])
|
||||
remove_set = {u for u in (url or []) if u}
|
||||
if not remove_set:
|
||||
return False
|
||||
new_urls = [u for u in existing_urls if u not in remove_set]
|
||||
if new_urls != existing_urls:
|
||||
db.update_metadata_by_hash(file_hash, {"url": new_urls})
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Local DB delete_url failed: {exc}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
debug(f"delete_url failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
||||
"""Delete a file from the folder store.
|
||||
|
||||
Args:
|
||||
file_identifier: The file path (as string) or hash of the file to delete
|
||||
**kwargs: Optional parameters
|
||||
|
||||
Returns:
|
||||
True if deletion succeeded, False otherwise
|
||||
"""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
file_path = Path(file_identifier)
|
||||
|
||||
# Delete from database
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
db.delete_file(file_path)
|
||||
|
||||
# Delete the actual file from disk
|
||||
if file_path.exists():
|
||||
file_path.unlink()
|
||||
debug(f"Deleted file: {file_path}")
|
||||
return True
|
||||
else:
|
||||
debug(f"File not found on disk: {file_path}")
|
||||
return True # Already gone
|
||||
except Exception as exc:
|
||||
debug(f"delete_file failed: {exc}")
|
||||
return False
|
||||
Reference in New Issue
Block a user