Medios-Macina/Store/Folder.py

from __future__ import annotations

import json
import re
import shutil
import sys
from fnmatch import translate
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from SYS.logger import debug, log
from SYS.utils import sha256_file

from Store._base import Store


def _normalize_hash(value: Any) -> Optional[str]:
    candidate = str(value or '').strip().lower()
    if len(candidate) != 64:
        return None
    if any(ch not in '0123456789abcdef' for ch in candidate):
        return None
    return candidate


def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]:
    normalized = _normalize_hash(db_hash) if db_hash else None
    if normalized:
        return normalized
    return _normalize_hash(file_path.stem)


class Folder(Store):
    """"""
    # Track which locations have already been migrated to avoid repeated migrations
    _migrated_locations = set()
    
    def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None:
        self._location = location
        self._name = name
        
        if self._location:
            try:
                from API.folder import API_folder_store
                from pathlib import Path
                location_path = Path(self._location).expanduser()
                
                # Use context manager to ensure connection is properly closed
                with API_folder_store(location_path) as db:
                    if db.connection:
                        db.connection.commit()
                
                # Call migration and discovery at startup
                Folder.migrate_location(self._location)
            except Exception as exc:
                debug(f"Failed to initialize database for '{name}': {exc}")
    
    @classmethod
    def migrate_location(cls, location: Optional[str]) -> None:
        """Migrate a location to hash-based storage (one-time operation, call explicitly at startup)."""
        if not location:
            return
        
        from pathlib import Path
        location_path = Path(location).expanduser()
        location_str = str(location_path)
        
        # Only migrate once per location
        if location_str in cls._migrated_locations:
            return
        
        cls._migrated_locations.add(location_str)
        
        # Create a temporary instance just to call the migration
        temp_instance = cls(location=location)
        temp_instance._migrate_to_hash_storage(location_path)

    def _migrate_to_hash_storage(self, location_path: Path) -> None:
        """Migrate existing files from filename-based to hash-based storage.
        
        Checks for sidecars (.metadata, .tag) and imports them before renaming.
        Also ensures all files have a title: tag.
        """
        from API.folder import API_folder_store, read_sidecar, write_sidecar, find_sidecar
        
        try:
            with API_folder_store(location_path) as db:
                cursor = db.connection.cursor()
                
                # First pass: migrate filename-based files and add title tags
                # Scan all files in the storage directory
                for file_path in sorted(location_path.iterdir()):
                    if not file_path.is_file():
                        continue
                    
                    # Skip database files and sidecars
                    if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'):
                        continue
                    # Also skip if the file ends with -shm or -wal (SQLite journal files)
                    if file_path.name.endswith(('-shm', '-wal')):
                        continue
                    
                    # Check if filename is already a hash (without extension)
                    if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()):
                        continue  # Already migrated, will process in second pass
                    
                    try:
                        # Compute file hash
                        file_hash = sha256_file(file_path)
                        # Preserve extension in the hash-based filename
                        file_ext = file_path.suffix  # e.g., '.mp4'
                        hash_filename = file_hash + file_ext if file_ext else file_hash
                        hash_path = location_path / hash_filename
                        
                        # Check for sidecars and import them
                        sidecar_path = find_sidecar(file_path)
                        tags_to_add = []
                        url_to_add = []
                        has_title_tag = False
                        
                        if sidecar_path and sidecar_path.exists():
                            try:
                                _, tags, url = read_sidecar(sidecar_path)
                                if tags:
                                    tags_to_add = list(tags)
                                    # Check if title tag exists
                                    has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add)
                                if url:
                                    url_to_add = list(url)
                                debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr)
                                # Delete the sidecar after importing
                                sidecar_path.unlink()
                            except Exception as exc:
                                debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr)
                        
                        # Ensure there's a title tag (use original filename if not present)
                        if not has_title_tag:
                            tags_to_add.append(f"title:{file_path.name}")
                        
                        # Rename file to hash if needed
                        if hash_path != file_path and not hash_path.exists():
                            debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr)
                            file_path.rename(hash_path)
                            
                            # Create or update database entry
                            db.get_or_create_file_entry(hash_path)
                            
                            # Save extension metadata
                            ext_clean = file_ext.lstrip('.') if file_ext else ''
                            db.save_metadata(hash_path, {
                                'hash': file_hash,
                                'ext': ext_clean,
                                'size': hash_path.stat().st_size
                            })
                            
                            # Add all tags (including title tag)
                            if tags_to_add:
                                db.save_tags(hash_path, tags_to_add)
                                debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr)
                            
                            # Note: url would need a separate table if you want to store them
                            # For now, we're just noting them in debug
                            if url_to_add:
                                debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr)
                    
                    except Exception as exc:
                        debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr)
                
                # Second pass: ensure all files in database have a title: tag
                db.connection.commit()
                cursor.execute('''
                    SELECT f.hash, f.file_path 
                    FROM files f 
                    WHERE NOT EXISTS (
                        SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%'
                    )
                ''')
                files_without_title = cursor.fetchall()
                
                for file_hash, file_path_str in files_without_title:
                    try:
                        file_path = Path(file_path_str)
                        if file_path.exists():
                            # Use the filename as the title
                            title_tag = f"title:{file_path.name}"
                            db.save_tags(file_path, [title_tag])
                            debug(f"Added title tag to {file_path.name}", file=sys.stderr)
                    except Exception as exc:
                        debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr)
                
                db.connection.commit()
                
                # Third pass: discover files on disk that aren't in the database yet
                # These are hash-named files that were added after initial indexing
                cursor.execute('SELECT LOWER(hash) FROM files')
                db_hashes = {row[0] for row in cursor.fetchall()}
                
                discovered = 0
                for file_path in sorted(location_path.rglob("*")):
                    if file_path.is_file():
                        # Check if file name (without extension) is a 64-char hex hash
                        name_without_ext = file_path.stem
                        if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()):
                            file_hash = name_without_ext.lower()
                            
                            # Skip if already in DB
                            if file_hash in db_hashes:
                                continue
                            
                            try:
                                # Add file to DB (creates entry and auto-adds title: tag)
                                db.get_or_create_file_entry(file_path)
                                
                                # Save extension metadata
                                file_ext = file_path.suffix
                                ext_clean = file_ext.lstrip('.') if file_ext else ''
                                db.save_metadata(file_path, {
                                    'hash': file_hash,
                                    'ext': ext_clean,
                                    'size': file_path.stat().st_size
                                })
                                
                                discovered += 1
                            except Exception as e:
                                debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr)
                
                if discovered > 0:
                    debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr)
                    db.connection.commit()
        except Exception as exc:
            debug(f"Migration to hash storage failed: {exc}", file=sys.stderr)


    def location(self) -> str:
        return self._location

    def name(self) -> str:
        return self._name

    def add_file(self, file_path: Path, **kwargs: Any) -> str:
        """Add file to local folder storage with full metadata support.
        
        Args:
            file_path: Path to the file to add
            move: If True, move file instead of copy (default: False)
            tag: Optional list of tag values to add
            url: Optional list of url to associate with the file
            title: Optional title (will be added as 'title:value' tag)
            
        Returns:
            File hash (SHA256 hex string) as identifier
        """
        move_file = bool(kwargs.get("move"))
        tag_list = kwargs.get("tag", [])
        url = kwargs.get("url", [])
        title = kwargs.get("title")
        
        # Extract title from tags if not explicitly provided
        if not title:
            for candidate in tag_list:
                if isinstance(candidate, str) and candidate.lower().startswith("title:"):
                    title = candidate.split(":", 1)[1].strip()
                    break
        
        # Fallback to filename if no title
        if not title:
            title = file_path.name
        
        # Ensure title is in tags
        title_tag = f"title:{title}"
        if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
            tag_list = [title_tag] + list(tag_list)
        
        try:
            file_hash = sha256_file(file_path)
            debug(f"File hash: {file_hash}", file=sys.stderr)
            
            # Preserve extension in the stored filename
            file_ext = file_path.suffix  # e.g., '.mp4'
            save_filename = file_hash + file_ext if file_ext else file_hash
            save_file = Path(self._location) / save_filename
            
            # Check if file already exists
            from API.folder import API_folder_store
            with API_folder_store(Path(self._location)) as db:
                existing_path = db.search_hash(file_hash)
                if existing_path and existing_path.exists():
                    log(
                        f"✓ File already in local storage: {existing_path}",
                        file=sys.stderr,
                    )
                    # Still add tags and url if provided
                    if tag_list:
                        self.add_tag(file_hash, tag_list)
                    if url:
                        self.add_url(file_hash, url)
                    return file_hash

            # Move or copy file
            if move_file:
                shutil.move(str(file_path), str(save_file))
                debug(f"Local move: {save_file}", file=sys.stderr)
            else:
                shutil.copy2(str(file_path), str(save_file))
                debug(f"Local copy: {save_file}", file=sys.stderr)
                
            # Save to database
            with API_folder_store(Path(self._location)) as db:
                db.get_or_create_file_entry(save_file)
                # Save metadata including extension
                ext_clean = file_ext.lstrip('.') if file_ext else ''
                db.save_metadata(save_file, {
                    'hash': file_hash,
                    'ext': ext_clean,
                    'size': file_path.stat().st_size
                })
            
            # Add tags if provided
            if tag_list:
                self.add_tag(file_hash, tag_list)
            
            # Add url if provided
            if url:
                self.add_url(file_hash, url)
            
            log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr)
            return file_hash
            
        except Exception as exc:
            log(f"❌ Local storage failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search local database for files by title tag or filename."""
        from fnmatch import fnmatch
        from API.folder import DatabaseAPI

        limit = kwargs.get("limit")
        try:
            limit = int(limit) if limit is not None else None
        except (TypeError, ValueError):
            limit = None
        if isinstance(limit, int) and limit <= 0:
            limit = None

        query = query.lower()
        query_lower = query  # Ensure query_lower is defined for all code paths
        match_all = query == "*"
        results = []
        search_dir = Path(self._location).expanduser()

        tokens = [t.strip() for t in query.split(',') if t.strip()]

        if not match_all and len(tokens) == 1 and _normalize_hash(query):
            debug("Hash queries require 'hash:' prefix for local search")
            return results

        if not match_all and _normalize_hash(query):
            debug("Hash queries require 'hash:' prefix for local search")
            return results

        def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
            path_str = str(file_path)
            # Get title from tags if available, otherwise use hash as fallback
            title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
            if not title:
                # Fallback to hash if no title tag exists
                hash_value = _resolve_file_hash(db_hash, file_path)
                title = hash_value if hash_value else file_path.stem
            
            # Extract extension from file path
            ext = file_path.suffix.lstrip('.')
            if not ext:
                # Fallback: try to extract from title (original filename might be in title)
                title_path = Path(title)
                ext = title_path.suffix.lstrip('.')
            
            # Build clean entry with only necessary fields
            hash_value = _resolve_file_hash(db_hash, file_path)
            entry = {
                "title": title,
                "ext": ext,
                "path": path_str,
                "target": path_str,
                "store": self._name,
                "size": size_bytes,
                "hash": hash_value,
                "tag": tags,
            }
            return entry

        try:
            if not search_dir.exists():
                debug(f"Search directory does not exist: {search_dir}")
                return results

            try:
                with DatabaseAPI(search_dir) as api:
                    if tokens and len(tokens) > 1:
                        def _like_pattern(term: str) -> str:
                            return term.replace('*', '%').replace('?', '_')

                        def _ids_for_token(token: str) -> set[int]:
                            token = token.strip()
                            if not token:
                                return set()

                            if ':' in token and not token.startswith(':'):
                                namespace, pattern = token.split(':', 1)
                                namespace = namespace.strip().lower()
                                pattern = pattern.strip().lower()

                                if namespace == 'hash':
                                    normalized_hash = _normalize_hash(pattern)
                                    if not normalized_hash:
                                        return set()
                                    h = api.get_file_hash_by_hash(normalized_hash)
                                    return {h} if h else set()

                                if namespace == 'store':
                                    if pattern not in {'local', 'file', 'filesystem'}:
                                        return set()
                                    return api.get_all_file_hashes()

                                query_pattern = f"{namespace}:%"
                                tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern)
                                matched: set[str] = set()
                                for file_hash, tag_val in tag_rows:
                                    if not tag_val:
                                        continue
                                    tag_lower = str(tag_val).lower()
                                    if not tag_lower.startswith(f"{namespace}:"):
                                        continue
                                    value = tag_lower[len(namespace)+1:]
                                    if fnmatch(value, pattern):
                                        matched.add(file_hash)
                                return matched

                            term = token.lower()
                            like_pattern = f"%{_like_pattern(term)}%"
                            hashes = api.get_file_hashes_by_path_pattern(like_pattern)
                            hashes.update(api.get_file_hashes_by_tag_substring(like_pattern))
                            return hashes

                        try:
                            matching_hashes: set[str] | None = None
                            for token in tokens:
                                hashes = _ids_for_token(token)
                                matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes
                                if not matching_hashes:
                                    return results

                            if not matching_hashes:
                                return results

                            rows = api.get_file_metadata(matching_hashes, limit)
                            for file_hash, file_path_str, size_bytes, ext in rows:
                                if not file_path_str:
                                    continue
                                file_path = Path(file_path_str)
                                if not file_path.exists():
                                    continue
                                if size_bytes is None:
                                    try:
                                        size_bytes = file_path.stat().st_size
                                    except OSError:
                                        size_bytes = None
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results
                        except Exception as exc:
                            log(f"⚠️  AND search failed: {exc}", file=sys.stderr)
                            debug(f"AND search exception details: {exc}")
                            return []

                    if ":" in query and not query.startswith(":"):
                        namespace, pattern = query.split(":", 1)
                        namespace = namespace.strip().lower()
                        pattern = pattern.strip().lower()
                        debug(f"Performing namespace search: {namespace}:{pattern}")

                        if namespace == "hash":
                            normalized_hash = _normalize_hash(pattern)
                            if not normalized_hash:
                                return results
                            h = api.get_file_hash_by_hash(normalized_hash)
                            hashes = {h} if h else set()
                            rows = api.get_file_metadata(hashes, limit)
                            for file_hash, file_path_str, size_bytes, ext in rows:
                                if not file_path_str:
                                    continue
                                file_path = Path(file_path_str)
                                if not file_path.exists():
                                    continue
                                if size_bytes is None:
                                    try:
                                        size_bytes = file_path.stat().st_size
                                    except OSError:
                                        size_bytes = None
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results
                        
                        query_pattern = f"{namespace}:%"
                        rows = api.get_files_by_namespace_pattern(query_pattern, limit)
                        debug(f"Found {len(rows)} potential matches in DB")
                        
                        for file_hash, file_path_str, size_bytes, ext in rows:
                            if not file_path_str:
                                continue
                            
                            tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern)
                            
                            for tag in tags:
                                tag_lower = tag.lower()
                                if tag_lower.startswith(f"{namespace}:"):
                                    value = tag_lower[len(namespace)+1:]
                                    if fnmatch(value, pattern):
                                        file_path = Path(file_path_str)
                                        if file_path.exists():
                                            if size_bytes is None:
                                                size_bytes = file_path.stat().st_size
                                            all_tags = api.get_tags_for_file(file_hash)
                                            entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
                                            results.append(entry)
                                        else:
                                            debug(f"File missing on disk: {file_path}")
                                        break
                            
                            if limit is not None and len(results) >= limit:
                                return results
                    elif not match_all:
                        terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
                        if not terms:
                            terms = [query_lower]
                        
                        debug(f"Performing filename/tag search for terms: {terms}")
                        
                        fetch_limit = (limit or 45) * 50
                        
                        conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
                        params = [f"%{t}%" for t in terms]
                        
                        rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
                        debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
                        
                        word_regex = None
                        if len(terms) == 1:
                            term = terms[0]
                            has_wildcard = '*' in term or '?' in term
                            
                            if has_wildcard:
                                try:
                                    from fnmatch import translate
                                    word_regex = re.compile(translate(term), re.IGNORECASE)
                                except Exception:
                                    word_regex = None
                            else:
                                try:
                                    pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
                                    word_regex = re.compile(pattern, re.IGNORECASE)
                                except Exception:
                                    word_regex = None

                        seen_files = set()
                        for file_id, file_path_str, size_bytes, file_hash in rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue
                            
                            if word_regex:
                                p = Path(file_path_str)
                                if not word_regex.search(p.name):
                                    continue
                            seen_files.add(file_path_str)
                            
                            file_path = Path(file_path_str)
                            if file_path.exists():
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size
                                
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results

                        if terms:
                            title_hits: dict[str, dict[str, Any]] = {}
                            for term in terms:
                                title_pattern = f"title:%{term}%"
                                title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
                                for file_hash, file_path_str, size_bytes, ext in title_rows:
                                    if not file_path_str:
                                        continue
                                    entry = title_hits.get(file_hash)
                                    if entry:
                                        entry["count"] += 1
                                        if size_bytes is not None:
                                            entry["size"] = size_bytes
                                    else:
                                        title_hits[file_hash] = {
                                            "path": file_path_str,
                                            "size": size_bytes,
                                            "hash": file_hash,
                                            "count": 1,
                                        }

                            if title_hits:
                                required = len(terms)
                                for file_hash, info in title_hits.items():
                                    if info.get("count") != required:
                                        continue
                                    file_path_str = info.get("path")
                                    if not file_path_str or file_path_str in seen_files:
                                        continue
                                    file_path = Path(file_path_str)
                                    if not file_path.exists():
                                        continue
                                    seen_files.add(file_path_str)

                                    size_bytes = info.get("size")
                                    if size_bytes is None:
                                        try:
                                            size_bytes = file_path.stat().st_size
                                        except OSError:
                                            size_bytes = None

                                    tags = api.get_tags_for_file(file_hash)
                                    entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
                                    results.append(entry)
                                    if limit is not None and len(results) >= limit:
                                        return results
                        
                        query_pattern = f"%{query_lower}%"
                        tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
                        
                        for file_hash, file_path_str, size_bytes, ext in tag_rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue
                            seen_files.add(file_path_str)
                            
                            file_path = Path(file_path_str)
                            if file_path.exists():
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size
                                
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                
                                if limit is not None and len(results) >= limit:
                                    return results
                    
                    else:
                        rows = api.get_all_files(limit)
                        for file_hash, file_path_str, size_bytes, ext in rows:
                            if file_path_str:
                                file_path = Path(file_path_str)
                                if file_path.exists():
                                    if size_bytes is None:
                                        size_bytes = file_path.stat().st_size
                                    
                                    tags = api.get_tags_for_file(file_hash)
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                    results.append(entry)
                    
                    if results:
                        debug(f"Returning {len(results)} results from DB")
                    else:
                        debug("No results found in DB")
                    return results
                    
            except Exception as e:
                log(f"⚠️  Database search failed: {e}", file=sys.stderr)
                debug(f"DB search exception details: {e}")
                return []
        
        except Exception as exc:
            log(f"❌ Local search failed: {exc}", file=sys.stderr)
            raise


    def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]:
        """Return the library root containing medios-macina.db.

        Prefer the store's configured location, then config override, then walk parents
        of the file path to find a directory with medios-macina.db."""
        candidates: list[Path] = []
        if self._location:
            candidates.append(Path(self._location).expanduser())
        cfg_root = get_local_storage_path(config) if config else None
        if cfg_root:
            candidates.append(Path(cfg_root).expanduser())

        for root in candidates:
            db_path = root / "medios-macina.db"
            if db_path.exists():
                return root

        try:
            for parent in [file_path] + list(file_path.parents):
                db_path = parent / "medios-macina.db"
                if db_path.exists():
                    return parent
        except Exception:
            pass
        return None

    def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]:
        """Retrieve file by hash, returning path to the file.
        
        Args:
            file_hash: SHA256 hash of the file (64-char hex string)
            
        Returns:
            Path to the file or None if not found
        """
        try:
            # Normalize the hash
            normalized_hash = _normalize_hash(file_hash)
            if not normalized_hash:
                return None
            
            search_dir = Path(self._location).expanduser()
            from API.folder import API_folder_store
            
            with API_folder_store(search_dir) as db:
                # Search for file by hash
                file_path = db.search_hash(normalized_hash)
                
                if file_path and file_path.exists():
                    return file_path
                
                return None
                
        except Exception as exc:
            debug(f"Failed to get file for hash {file_hash}: {exc}")
            return None

    def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]:
        """Get metadata for a file from the database by hash.
        
        Args:
            file_hash: SHA256 hash of the file (64-char hex string)
            
        Returns:
            Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found
        """
        try:
            # Normalize the hash
            normalized_hash = _normalize_hash(file_hash)
            if not normalized_hash:
                return None
            
            search_dir = Path(self._location).expanduser()
            from API.folder import DatabaseAPI
            
            with DatabaseAPI(search_dir) as api:
                # Get file hash
                file_hash_result = api.get_file_hash_by_hash(normalized_hash)
                if not file_hash_result:
                    return None
                
                # Query metadata directly from database
                cursor = api.get_cursor()
                cursor.execute("""
                    SELECT * FROM metadata WHERE hash = ?
                """, (file_hash_result,))
                
                row = cursor.fetchone()
                if not row:
                    return None
                
                metadata = dict(row)

                # Canonicalize metadata keys (no legacy aliases)
                if "file_path" in metadata and "path" not in metadata:
                    metadata["path"] = metadata.get("file_path")
                metadata.pop("file_path", None)
                
                # Parse JSON fields
                for field in ['url', 'relationships']:
                    if metadata.get(field):
                        try:
                            metadata[field] = json.loads(metadata[field])
                        except (json.JSONDecodeError, TypeError):
                            metadata[field] = [] if field == 'url' else []
                
                return metadata
        except Exception as exc:
            debug(f"Failed to get metadata for hash {file_hash}: {exc}")
            return None

    def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
        """Get tags for a local file by hash.
        
        Returns:
            Tuple of (tags_list, store_name) where store_name is the actual store name
        """
        from API.folder import API_folder_store
        try:
            file_hash = file_identifier
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
                        db_tags = db.get_tags(file_hash)
                        if db_tags:
                            # Return actual store name instead of generic "local_db"
                            store_name = self._name if self._name else "local"
                            return list(db_tags), store_name
                except Exception as exc:
                    debug(f"Local DB lookup failed: {exc}")
            return [], "unknown"
        except Exception as exc:
            debug(f"get_tags failed for local file: {exc}")
            return [], "unknown"

    def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool:
        """Add tags to a local file by hash (via API_folder_store).
        
        Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags.
        Returns True if tags were successfully added.
        """
        from API.folder import API_folder_store
        try:
            if not self._location:
                return False
            
            try:
                with API_folder_store(Path(self._location)) as db:
                    # Get existing tags
                    existing_tags = list(db.get_tags(hash) or [])
                    original_tags_lower = {t.lower() for t in existing_tags}
                    
                    # Merge new tags, handling namespace overwrites
                    for new_tag in tag:
                        if ':' in new_tag:
                            namespace = new_tag.split(':', 1)[0]
                            # Remove existing tags in same namespace
                            existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
                        # Add new tag if not already present (case-insensitive check)
                        if new_tag.lower() not in original_tags_lower:
                            existing_tags.append(new_tag)
                    
                    # Save merged tags
                    db.add_tags_to_hash(hash, existing_tags)
                    return True
            except Exception as exc:
                debug(f"Local DB add_tags failed: {exc}")
                return False
        except Exception as exc:
            debug(f"add_tag failed for local file: {exc}")
            return False

    def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
        """Remove tags from a local file by hash."""
        from API.folder import API_folder_store
        try:
            file_hash = file_identifier
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
                        db.remove_tags_from_hash(file_hash, list(tags))
                        return True
                except Exception as exc:
                    debug(f"Local DB remove_tags failed: {exc}")
            return False
        except Exception as exc:
            debug(f"delete_tag failed for local file: {exc}")
            return False

    def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
        """Get known url for a local file by hash."""
        from API.folder import API_folder_store
        try:
            file_hash = file_identifier
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
                        return list(meta.get("url") or [])
                except Exception as exc:
                    debug(f"Local DB get_metadata failed: {exc}")
            return []
        except Exception as exc:
            debug(f"get_url failed for local file: {exc}")
            return []

    def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Add known url to a local file by hash."""
        from API.folder import API_folder_store
        try:
            file_hash = file_identifier
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
                        existing_urls = list(meta.get("url") or [])
                        changed = False
                        for u in list(url or []):
                            if not u:
                                continue
                            if u not in existing_urls:
                                existing_urls.append(u)
                                changed = True
                        if changed:
                            db.update_metadata_by_hash(file_hash, {"url": existing_urls})
                            return True
                except Exception as exc:
                    debug(f"Local DB add_url failed: {exc}")
            return False
        except Exception as exc:
            debug(f"add_url failed for local file: {exc}")
            return False

    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Delete known url from a local file by hash."""
        from API.folder import API_folder_store
        try:
            file_hash = file_identifier
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
                        existing_urls = list(meta.get("url") or [])
                        remove_set = {u for u in (url or []) if u}
                        if not remove_set:
                            return False
                        new_urls = [u for u in existing_urls if u not in remove_set]
                        if new_urls != existing_urls:
                            db.update_metadata_by_hash(file_hash, {"url": new_urls})
                            return True
                except Exception as exc:
                    debug(f"Local DB delete_url failed: {exc}")
            return False
        except Exception as exc:
            debug(f"delete_url failed for local file: {exc}")
            return False

    def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
        """Delete a file from the folder store.
        
        Args:
            file_identifier: The file path (as string) or hash of the file to delete
            **kwargs: Optional parameters
            
        Returns:
            True if deletion succeeded, False otherwise
        """
        from API.folder import API_folder_store
        try:
            file_path = Path(file_identifier)
            
            # Delete from database
            with API_folder_store(Path(self._location)) as db:
                db.delete_file(file_path)
            
            # Delete the actual file from disk
            if file_path.exists():
                file_path.unlink()
                debug(f"Deleted file: {file_path}")
                return True
            else:
                debug(f"File not found on disk: {file_path}")
                return True  # Already gone
        except Exception as exc:
            debug(f"delete_file failed: {exc}")
            return False