dfdf

2025-12-20 23:57:44 -08:00
parent b75faa49a2
commit 8ca5783970
39 changed files with 4294 additions and 1722 deletions
--- a/Store/Folder.py
+++ b/Store/Folder.py
@@ -452,7 +452,44 @@ class Folder(Store):

        query = query.lower()
        query_lower = query  # Ensure query_lower is defined for all code paths
-        match_all = query == "*"
+
+        def _normalize_ext_filter(value: str) -> str:
+            v = str(value or "").strip().lower().lstrip('.')
+            v = "".join(ch for ch in v if ch.isalnum())
+            return v
+
+        def _extract_system_filetype_ext(text: str) -> Optional[str]:
+            # Match: system:filetype = png  (allow optional '=' and flexible spaces)
+            m = re.search(r"\bsystem:filetype\s*(?:=\s*)?([^\s,]+)", text)
+            if not m:
+                m = re.search(r"\bsystem:filetype\s*=\s*([^\s,]+)", text)
+            if not m:
+                return None
+            return _normalize_ext_filter(m.group(1)) or None
+
+        # Support `ext:<value>` and Hydrus-style `system:filetype = <value>` anywhere
+        # in the query (space or comma separated).
+        ext_filter: Optional[str] = None
+        try:
+            sys_ext = _extract_system_filetype_ext(query_lower)
+            if sys_ext:
+                ext_filter = sys_ext
+                query_lower = re.sub(r"\s*\bsystem:filetype\s*(?:=\s*)?[^\s,]+", " ", query_lower)
+                query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
+                query = query_lower
+
+            m = re.search(r"\bext:([^\s,]+)", query_lower)
+            if not m:
+                m = re.search(r"\bextension:([^\s,]+)", query_lower)
+            if m:
+                ext_filter = _normalize_ext_filter(m.group(1)) or None
+                query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
+                query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
+                query = query_lower
+        except Exception:
+            ext_filter = None
+
+        match_all = query == "*" or (not query and bool(ext_filter))
        results = []
        search_dir = Path(self._location).expanduser()

@@ -518,6 +555,41 @@ class Folder(Store):

            try:
                with DatabaseAPI(search_dir) as api:
+                    ext_hashes: set[str] | None = None
+                    if ext_filter:
+                        # Fetch a bounded set of hashes to intersect with other filters.
+                        ext_fetch_limit = (limit or 45) * 50
+                        ext_hashes = api.get_file_hashes_by_ext(ext_filter, limit=ext_fetch_limit)
+
+                    # ext-only search: query is empty (or coerced to match_all above).
+                    if ext_filter and (not query_lower or query_lower == "*"):
+                        rows = api.get_files_by_ext(ext_filter, limit)
+                        for file_hash, file_path_str, size_bytes, ext in rows:
+                            if not file_path_str:
+                                continue
+                            file_path = Path(file_path_str)
+                            if not file_path.exists():
+                                continue
+                            if size_bytes is None:
+                                try:
+                                    size_bytes = file_path.stat().st_size
+                                except OSError:
+                                    size_bytes = None
+                            tags = api.get_tags_for_file(file_hash)
+                            entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                            try:
+                                db_ext = str(ext or "").strip().lstrip('.')
+                                if db_ext:
+                                    entry["ext"] = db_ext
+                            except Exception:
+                                pass
+                            results.append(entry)
+                            if limit is not None and len(results) >= limit:
+                                return results
+                        backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
+                        debug(f"[folder:{backend_label}] {len(results)} result(s)")
+                        return results
+
                    if tokens and len(tokens) > 1:
                        url_fetch_limit = (limit or 45) * 50

@@ -546,6 +618,22 @@ class Folder(Store):
                                        return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
                                    return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)

+                                if namespace == 'system':
+                                    # Hydrus-compatible query: system:filetype = png
+                                    m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
+                                    if m_ft:
+                                        normalized_ext = _normalize_ext_filter(m_ft.group(1))
+                                        if not normalized_ext:
+                                            return set()
+                                        return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
+                                    return set()
+
+                                if namespace in {'ext', 'extension'}:
+                                    normalized_ext = _normalize_ext_filter(pattern)
+                                    if not normalized_ext:
+                                        return set()
+                                    return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
+
                                if namespace == 'store':
                                    if pattern not in {'local', 'file', 'filesystem'}:
                                        return set()
@@ -579,6 +667,11 @@ class Folder(Store):
                                if not matching_hashes:
                                    return results

+                            if ext_hashes is not None:
+                                matching_hashes = (matching_hashes or set()) & ext_hashes
+                                if not matching_hashes:
+                                    return results
+
                            if not matching_hashes:
                                return results

@@ -596,6 +689,12 @@ class Folder(Store):
                                        size_bytes = None
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                try:
+                                    db_ext = str(ext or "").strip().lstrip('.')
+                                    if db_ext:
+                                        entry["ext"] = db_ext
+                                except Exception:
+                                    pass
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
@@ -631,6 +730,12 @@ class Folder(Store):
                                        size_bytes = None
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                try:
+                                    db_ext = str(ext or "").strip().lstrip('.')
+                                    if db_ext:
+                                        entry["ext"] = db_ext
+                                except Exception:
+                                    pass
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
@@ -658,6 +763,67 @@ class Folder(Store):
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results
+
+                        if namespace == "system":
+                            # Hydrus-compatible query: system:filetype = png
+                            m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
+                            if m_ft:
+                                normalized_ext = _normalize_ext_filter(m_ft.group(1))
+                                if not normalized_ext:
+                                    return results
+                                rows = api.get_files_by_ext(normalized_ext, limit)
+                                for file_hash, file_path_str, size_bytes, ext in rows:
+                                    if not file_path_str:
+                                        continue
+                                    file_path = Path(file_path_str)
+                                    if not file_path.exists():
+                                        continue
+                                    if size_bytes is None:
+                                        try:
+                                            size_bytes = file_path.stat().st_size
+                                        except OSError:
+                                            size_bytes = None
+                                    tags = api.get_tags_for_file(file_hash)
+                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                    try:
+                                        db_ext = str(ext or "").strip().lstrip('.')
+                                        if db_ext:
+                                            entry["ext"] = db_ext
+                                    except Exception:
+                                        pass
+                                    results.append(entry)
+                                    if limit is not None and len(results) >= limit:
+                                        return results
+                                return results
+
+                        if namespace in {"ext", "extension"}:
+                            normalized_ext = _normalize_ext_filter(pattern)
+                            if not normalized_ext:
+                                return results
+                            rows = api.get_files_by_ext(normalized_ext, limit)
+                            for file_hash, file_path_str, size_bytes, ext in rows:
+                                if not file_path_str:
+                                    continue
+                                file_path = Path(file_path_str)
+                                if not file_path.exists():
+                                    continue
+                                if size_bytes is None:
+                                    try:
+                                        size_bytes = file_path.stat().st_size
+                                    except OSError:
+                                        size_bytes = None
+                                tags = api.get_tags_for_file(file_hash)
+                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                try:
+                                    db_ext = str(ext or "").strip().lstrip('.')
+                                    if db_ext:
+                                        entry["ext"] = db_ext
+                                except Exception:
+                                    pass
+                                results.append(entry)
+                                if limit is not None and len(results) >= limit:
+                                    return results
+                            return results
                        
                        query_pattern = f"{namespace}:%"
                        rows = api.get_files_by_namespace_pattern(query_pattern, limit)
@@ -674,12 +840,20 @@ class Folder(Store):
                                if tag_lower.startswith(f"{namespace}:"):
                                    value = tag_lower[len(namespace)+1:]
                                    if fnmatch(value, pattern):
+                                        if ext_hashes is not None and file_hash not in ext_hashes:
+                                            break
                                        file_path = Path(file_path_str)
                                        if file_path.exists():
                                            if size_bytes is None:
                                                size_bytes = file_path.stat().st_size
                                            all_tags = api.get_tags_for_file(file_hash)
                                            entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
+                                            try:
+                                                db_ext = str(ext or "").strip().lstrip('.')
+                                                if db_ext:
+                                                    entry["ext"] = db_ext
+                                            except Exception:
+                                                pass
                                            results.append(entry)
                                        else:
                                            debug(f"File missing on disk: {file_path}")
@@ -703,6 +877,8 @@ class Folder(Store):
                            for file_hash, file_path_str, size_bytes, ext in term_rows:
                                if not file_path_str:
                                    continue
+                                if ext_hashes is not None and file_hash not in ext_hashes:
+                                    continue
                                entry = hits.get(file_hash)
                                if entry:
                                    entry["count"] += 1
@@ -746,6 +922,8 @@ class Folder(Store):
                        rows = api.get_all_files(limit)
                        for file_hash, file_path_str, size_bytes, ext in rows:
                            if file_path_str:
+                                if ext_hashes is not None and file_hash not in ext_hashes:
+                                    continue
                                file_path = Path(file_path_str)
                                if file_path.exists():
                                    if size_bytes is None:
@@ -753,6 +931,12 @@ class Folder(Store):
                                    
                                    tags = api.get_tags_for_file(file_hash)
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                    try:
+                                        db_ext = str(ext or "").strip().lstrip('.')
+                                        if db_ext:
+                                            entry["ext"] = db_ext
+                                    except Exception:
+                                        pass
                                    results.append(entry)
                    
                    backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
@@ -896,7 +1080,7 @@ class Folder(Store):
                        if db_tags:
                            # Return actual store name instead of generic "local_db"
                            store_name = self._name if self._name else "local"
-                            return list(db_tags), store_name
+                            return [str(t).strip().lower() for t in db_tags if isinstance(t, str) and t.strip()], store_name
                except Exception as exc:
                    debug(f"Local DB lookup failed: {exc}")
            return [], "unknown"
@@ -917,22 +1101,30 @@ class Folder(Store):
            
            try:
                with API_folder_store(Path(self._location)) as db:
-                    # Get existing tags
-                    existing_tags = list(db.get_tags(hash) or [])
-                    original_tags_lower = {t.lower() for t in existing_tags}
-                    
-                    # Merge new tags, handling namespace overwrites
-                    for new_tag in tag:
-                        if ':' in new_tag:
-                            namespace = new_tag.split(':', 1)[0]
-                            # Remove existing tags in same namespace
-                            existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
-                        # Add new tag if not already present (case-insensitive check)
-                        if new_tag.lower() not in original_tags_lower:
-                            existing_tags.append(new_tag)
-                    
-                    # Save merged tags
-                    db.add_tags_to_hash(hash, existing_tags)
+                    existing_tags = [t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()]
+
+                    from metadata import compute_namespaced_tag_overwrite
+
+                    _to_remove, _to_add, merged = compute_namespaced_tag_overwrite(existing_tags, tag or [])
+                    if not _to_remove and not _to_add:
+                        return True
+
+                    # Folder DB tag table is case-sensitive and add_tags_to_hash() is additive.
+                    # To enforce lowercase-only tags and namespace overwrites, rewrite the full tag set.
+                    cursor = db.connection.cursor()
+                    cursor.execute("DELETE FROM tags WHERE hash = ?", (hash,))
+                    for t in merged:
+                        t = str(t).strip().lower()
+                        if t:
+                            cursor.execute(
+                                "INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)",
+                                (hash, t),
+                            )
+                    db.connection.commit()
+                    try:
+                        db._update_metadata_modified_time(hash)
+                    except Exception:
+                        pass
                    return True
            except Exception as exc:
                debug(f"Local DB add_tags failed: {exc}")
@@ -949,7 +1141,10 @@ class Folder(Store):
            if self._location:
                try:
                    with API_folder_store(Path(self._location)) as db:
-                        db.remove_tags_from_hash(file_hash, list(tags))
+                        tag_list = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
+                        if not tag_list:
+                            return True
+                        db.remove_tags_from_hash(file_hash, tag_list)
                        return True
                except Exception as exc:
                    debug(f"Local DB remove_tags failed: {exc}")
@@ -1006,6 +1201,130 @@ class Folder(Store):
            debug(f"add_url failed for local file: {exc}")
            return False

+    def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
+        """Add known urls to many local files in one DB session.
+
+        This is a performance optimization used by cmdlets that receive many PipeObjects.
+        """
+        from API.folder import API_folder_store
+        try:
+            if not self._location:
+                return False
+
+            # Normalize + coalesce duplicates per hash.
+            try:
+                from metadata import normalize_urls
+            except Exception:
+                normalize_urls = None  # type: ignore
+
+            merged_by_hash: Dict[str, List[str]] = {}
+            for file_identifier, url_list in (items or []):
+                file_hash = str(file_identifier or "").strip().lower()
+                if not file_hash:
+                    continue
+
+                incoming: List[str]
+                if normalize_urls is not None:
+                    try:
+                        incoming = normalize_urls(url_list)
+                    except Exception:
+                        incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
+                else:
+                    incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
+
+                if not incoming:
+                    continue
+
+                existing = merged_by_hash.get(file_hash) or []
+                for u in incoming:
+                    if u and u not in existing:
+                        existing.append(u)
+                merged_by_hash[file_hash] = existing
+
+            if not merged_by_hash:
+                return True
+
+            import json
+
+            with API_folder_store(Path(self._location)) as db:
+                conn = getattr(db, "connection", None)
+                if conn is None:
+                    return False
+                cursor = conn.cursor()
+
+                # Ensure metadata rows exist (may be needed for older entries).
+                for file_hash in merged_by_hash.keys():
+                    try:
+                        cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
+                    except Exception:
+                        continue
+
+                # Load existing urls for all hashes in chunks.
+                existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in merged_by_hash.keys()}
+                hashes = list(merged_by_hash.keys())
+                chunk_size = 400
+                for i in range(0, len(hashes), chunk_size):
+                    chunk = hashes[i : i + chunk_size]
+                    if not chunk:
+                        continue
+                    placeholders = ",".join(["?"] * len(chunk))
+                    try:
+                        cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
+                        rows = cursor.fetchall() or []
+                    except Exception:
+                        rows = []
+
+                    for row in rows:
+                        try:
+                            row_hash = str(row[0]).strip().lower()
+                        except Exception:
+                            continue
+                        raw_urls = None
+                        try:
+                            raw_urls = row[1]
+                        except Exception:
+                            raw_urls = None
+
+                        parsed_urls: List[str] = []
+                        if raw_urls:
+                            try:
+                                parsed = json.loads(raw_urls)
+                                if normalize_urls is not None:
+                                    parsed_urls = normalize_urls(parsed)
+                                else:
+                                    if isinstance(parsed, list):
+                                        parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
+                            except Exception:
+                                parsed_urls = []
+
+                        existing_urls_by_hash[row_hash] = parsed_urls
+
+                # Compute updates and write in one commit.
+                updates: List[tuple[str, str]] = []
+                for file_hash, incoming_urls in merged_by_hash.items():
+                    existing_urls = existing_urls_by_hash.get(file_hash) or []
+                    final = list(existing_urls)
+                    for u in incoming_urls:
+                        if u and u not in final:
+                            final.append(u)
+                    if final != existing_urls:
+                        try:
+                            updates.append((json.dumps(final), file_hash))
+                        except Exception:
+                            continue
+
+                if updates:
+                    cursor.executemany(
+                        "UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
+                        updates,
+                    )
+
+                conn.commit()
+                return True
+        except Exception as exc:
+            debug(f"add_url_bulk failed for local file: {exc}")
+            return False
+
    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Delete known url from a local file by hash."""
        from API.folder import API_folder_store
@@ -1031,6 +1350,119 @@ class Folder(Store):
            debug(f"delete_url failed for local file: {exc}")
            return False

+    def delete_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
+        """Delete known urls from many local files in one DB session."""
+        from API.folder import API_folder_store
+        try:
+            if not self._location:
+                return False
+
+            try:
+                from metadata import normalize_urls
+            except Exception:
+                normalize_urls = None  # type: ignore
+
+            remove_by_hash: Dict[str, set[str]] = {}
+            for file_identifier, url_list in (items or []):
+                file_hash = str(file_identifier or "").strip().lower()
+                if not file_hash:
+                    continue
+
+                incoming: List[str]
+                if normalize_urls is not None:
+                    try:
+                        incoming = normalize_urls(url_list)
+                    except Exception:
+                        incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
+                else:
+                    incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
+
+                remove = {u for u in incoming if u}
+                if not remove:
+                    continue
+                remove_by_hash.setdefault(file_hash, set()).update(remove)
+
+            if not remove_by_hash:
+                return True
+
+            import json
+
+            with API_folder_store(Path(self._location)) as db:
+                conn = getattr(db, "connection", None)
+                if conn is None:
+                    return False
+                cursor = conn.cursor()
+
+                # Ensure metadata rows exist.
+                for file_hash in remove_by_hash.keys():
+                    try:
+                        cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
+                    except Exception:
+                        continue
+
+                # Load existing urls for hashes in chunks.
+                existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in remove_by_hash.keys()}
+                hashes = list(remove_by_hash.keys())
+                chunk_size = 400
+                for i in range(0, len(hashes), chunk_size):
+                    chunk = hashes[i : i + chunk_size]
+                    if not chunk:
+                        continue
+                    placeholders = ",".join(["?"] * len(chunk))
+                    try:
+                        cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
+                        rows = cursor.fetchall() or []
+                    except Exception:
+                        rows = []
+
+                    for row in rows:
+                        try:
+                            row_hash = str(row[0]).strip().lower()
+                        except Exception:
+                            continue
+                        raw_urls = None
+                        try:
+                            raw_urls = row[1]
+                        except Exception:
+                            raw_urls = None
+
+                        parsed_urls: List[str] = []
+                        if raw_urls:
+                            try:
+                                parsed = json.loads(raw_urls)
+                                if normalize_urls is not None:
+                                    parsed_urls = normalize_urls(parsed)
+                                else:
+                                    if isinstance(parsed, list):
+                                        parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
+                            except Exception:
+                                parsed_urls = []
+
+                        existing_urls_by_hash[row_hash] = parsed_urls
+
+                # Apply removals + write updates.
+                updates: List[tuple[str, str]] = []
+                for file_hash, remove_set in remove_by_hash.items():
+                    existing_urls = existing_urls_by_hash.get(file_hash) or []
+                    new_urls = [u for u in existing_urls if u not in remove_set]
+                    if new_urls != existing_urls:
+                        try:
+                            updates.append((json.dumps(new_urls), file_hash))
+                        except Exception:
+                            continue
+
+                if updates:
+                    cursor.executemany(
+                        "UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
+                        updates,
+                    )
+
+                conn.commit()
+                return True
+        except Exception as exc:
+            debug(f"delete_url_bulk failed for local file: {exc}")
+            return False
+
    def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
        """Get notes for a local file by hash."""
        from API.folder import API_folder_store
@@ -1077,6 +1509,94 @@ class Folder(Store):
            debug(f"set_note failed for local file: {exc}")
            return False

+    def set_note_bulk(self, items: List[tuple[str, str, str]], **kwargs: Any) -> bool:
+        """Set notes for many local files in one DB session.
+
+        Preserves existing semantics by only setting notes for hashes that still
+        map to a file path that exists on disk.
+        """
+        from API.folder import API_folder_store
+        try:
+            if not self._location:
+                return False
+
+            # Normalize input.
+            normalized: List[tuple[str, str, str]] = []
+            for file_identifier, name, text in (items or []):
+                file_hash = str(file_identifier or "").strip().lower()
+                note_name = str(name or "").strip()
+                note_text = str(text or "")
+                if not file_hash or not _normalize_hash(file_hash) or not note_name:
+                    continue
+                normalized.append((file_hash, note_name, note_text))
+
+            if not normalized:
+                return True
+
+            with API_folder_store(Path(self._location)) as db:
+                conn = getattr(db, "connection", None)
+                if conn is None:
+                    return False
+                cursor = conn.cursor()
+
+                # Look up file paths for hashes in chunks (to verify existence).
+                wanted_hashes = sorted({h for (h, _n, _t) in normalized})
+                hash_to_path: Dict[str, str] = {}
+                chunk_size = 400
+                for i in range(0, len(wanted_hashes), chunk_size):
+                    chunk = wanted_hashes[i : i + chunk_size]
+                    if not chunk:
+                        continue
+                    placeholders = ",".join(["?"] * len(chunk))
+                    try:
+                        cursor.execute(f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})", chunk)
+                        rows = cursor.fetchall() or []
+                    except Exception:
+                        rows = []
+                    for row in rows:
+                        try:
+                            h = str(row[0]).strip().lower()
+                            p = str(row[1]).strip()
+                        except Exception:
+                            continue
+                        if h and p:
+                            hash_to_path[h] = p
+
+                # Ensure notes rows exist and only write for existing files.
+                inserts: List[tuple[str, str, str]] = []
+                for h, note_name, note_text in normalized:
+                    p = hash_to_path.get(h)
+                    if not p:
+                        continue
+                    try:
+                        if not Path(p).exists():
+                            continue
+                    except Exception:
+                        continue
+                    inserts.append((h, note_name, note_text))
+
+                if not inserts:
+                    return False
+
+                # Prefer upsert when supported, else fall back to INSERT OR REPLACE.
+                try:
+                    cursor.executemany(
+                        "INSERT INTO notes (hash, name, note) VALUES (?, ?, ?) "
+                        "ON CONFLICT(hash, name) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP",
+                        inserts,
+                    )
+                except Exception:
+                    cursor.executemany(
+                        "INSERT OR REPLACE INTO notes (hash, name, note) VALUES (?, ?, ?)",
+                        inserts,
+                    )
+
+                conn.commit()
+                return True
+        except Exception as exc:
+            debug(f"set_note_bulk failed for local file: {exc}")
+            return False
+
    def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
        """Delete a named note for a local file by hash."""
        from API.folder import API_folder_store
--- a/Store/HydrusNetwork.py
+++ b/Store/HydrusNetwork.py
@@ -217,10 +217,13 @@ class HydrusNetwork(Store):
        
        # Add title to tags if provided and not already present
        if title:
-            title_tag = f"title:{title}"
+            title_tag = f"title:{title}".strip().lower()
            if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
                tag_list = [title_tag] + list(tag_list)

+        # Hydrus is lowercase-only tags; normalize here for consistency.
+        tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()]
+
        try:
            # Compute file hash
            file_hash = sha256_file(file_path)
@@ -445,6 +448,36 @@ class HydrusNetwork(Store):

            query_lower = query.lower().strip()

+            # Support `ext:<value>` anywhere in the query. We filter results by the
+            # Hydrus metadata extension field.
+            def _normalize_ext_filter(value: str) -> str:
+                v = str(value or "").strip().lower().lstrip('.')
+                v = "".join(ch for ch in v if ch.isalnum())
+                return v
+
+            ext_filter: str | None = None
+            ext_only: bool = False
+            try:
+                m = re.search(r"\bext:([^\s,]+)", query_lower)
+                if not m:
+                    m = re.search(r"\bextension:([^\s,]+)", query_lower)
+                if m:
+                    ext_filter = _normalize_ext_filter(m.group(1)) or None
+                    query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
+                    query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
+                    query = query_lower
+                    if ext_filter and not query_lower:
+                        query = "*"
+                        query_lower = "*"
+                        ext_only = True
+            except Exception:
+                ext_filter = None
+                ext_only = False
+
+            # Split into meaningful terms for AND logic.
+            # Avoid punctuation tokens like '-' that would make matching brittle.
+            search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
+
            # Special case: url:* and url:<value>
            metadata_list: list[dict[str, Any]] | None = None
            if ":" in query_lower and not query_lower.startswith(":"):
@@ -508,54 +541,268 @@ class HydrusNetwork(Store):
                            metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)

            # Parse the query into tags
-            # Handle both simple tags and complex queries
            # "*" means "match all" - use system:everything tag in Hydrus
+            # If query has explicit namespace, use it as a tag search.
+            # If query is free-form, search BOTH:
+            #   - title:*term*  (title: is the only namespace searched implicitly)
+            #   - *term*        (freeform tags; we will filter out other namespace matches client-side)
+            tags: list[str] = []
+            freeform_union_search: bool = False
+            title_predicates: list[str] = []
+            freeform_predicates: list[str] = []
+
            if query.strip() == "*":
-                # Use system:everything to match all files in Hydrus
                tags = ["system:everything"]
+            elif ':' in query_lower:
+                tags = [query_lower]
            else:
-                # If query doesn't have a namespace (no ':'), search all files and filter by title/tags
-                # If query has explicit namespace, use it as a tag search
-                if ':' not in query_lower:
-                    # No namespace provided: search all files, then filter by title/tags containing the query
-                    tags = ["system:everything"]
+                freeform_union_search = True
+                if search_terms:
+                    # Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
+                    # Use per-term prefix matching for both title: and freeform tags.
+                    title_predicates = [f"title:{term}*" for term in search_terms]
+                    freeform_predicates = [f"{term}*" for term in search_terms]
                else:
-                    # User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
-                    # Use it as a tag search
-                    tags = [query_lower]
-            
-            if not tags:
-                debug(f"{prefix} 0 result(s)")
-                return []
+                    # If we can't extract alnum terms, fall back to the raw query text.
+                    title_predicates = [f"title:{query_lower}*"]
+                    freeform_predicates = [f"{query_lower}*"]
            
            # Search files with the tags (unless url: search already produced metadata)
            results = []
-            # Split by comma or space for AND logic
-            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
+
+            def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
+                if not isinstance(payload, dict):
+                    return [], []
+                raw_ids = payload.get("file_ids", [])
+                raw_hashes = payload.get("hashes", [])
+                ids_out: list[int] = []
+                hashes_out: list[str] = []
+                if isinstance(raw_ids, list):
+                    for item in raw_ids:
+                        try:
+                            ids_out.append(int(item))
+                        except (TypeError, ValueError):
+                            continue
+                if isinstance(raw_hashes, list):
+                    hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
+                return ids_out, hashes_out

            if metadata_list is None:
-                search_result = client.search_files(
-                    tags=tags,
-                    return_hashes=True,
-                    return_file_ids=True
-                )
+                file_ids: list[int] = []
+                hashes: list[str] = []

-                file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
-                hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
+                if freeform_union_search:
+                    if not title_predicates and not freeform_predicates:
+                        debug(f"{prefix} 0 result(s)")
+                        return []
+
+                    payloads: list[Any] = []
+                    try:
+                        payloads.append(
+                            client.search_files(
+                                tags=title_predicates,
+                                return_hashes=True,
+                                return_file_ids=True,
+                            )
+                        )
+                    except Exception:
+                        pass
+
+                    try:
+                        payloads.append(
+                            client.search_files(
+                                tags=freeform_predicates,
+                                return_hashes=True,
+                                return_file_ids=True,
+                            )
+                        )
+                    except Exception:
+                        pass
+
+                    id_set: set[int] = set()
+                    hash_set: set[str] = set()
+                    for payload in payloads:
+                        ids_part, hashes_part = _extract_search_ids(payload)
+                        for fid in ids_part:
+                            id_set.add(fid)
+                        for hh in hashes_part:
+                            hash_set.add(hh)
+                    file_ids = list(id_set)
+                    hashes = list(hash_set)
+                else:
+                    if not tags:
+                        debug(f"{prefix} 0 result(s)")
+                        return []
+
+                    search_result = client.search_files(
+                        tags=tags,
+                        return_hashes=True,
+                        return_file_ids=True
+                    )
+                    file_ids, hashes = _extract_search_ids(search_result)
+
+                # Fast path: ext-only search. Avoid fetching metadata for an unbounded
+                # system:everything result set; fetch in chunks until we have enough.
+                if ext_only and ext_filter:
+                    results: list[dict[str, Any]] = []
+                    if not file_ids and not hashes:
+                        debug(f"{prefix} 0 result(s)")
+                        return []
+
+                    # Prefer file_ids if available.
+                    if file_ids:
+                        chunk_size = 200
+                        for start in range(0, len(file_ids), chunk_size):
+                            if len(results) >= limit:
+                                break
+                            chunk = file_ids[start : start + chunk_size]
+                            try:
+                                payload = client.fetch_file_metadata(
+                                    file_ids=chunk,
+                                    include_service_keys_to_tags=True,
+                                    include_file_url=False,
+                                    include_duration=True,
+                                    include_size=True,
+                                    include_mime=True,
+                                )
+                            except Exception:
+                                continue
+                            metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                            if not isinstance(metas, list):
+                                continue
+                            for meta in metas:
+                                if len(results) >= limit:
+                                    break
+                                if not isinstance(meta, dict):
+                                    continue
+                                mime_type = meta.get("mime")
+                                ext = str(meta.get("ext") or "").strip().lstrip('.')
+                                if not ext and mime_type:
+                                    for category in mime_maps.values():
+                                        for _ext_key, info in category.items():
+                                            if mime_type in info.get("mimes", []):
+                                                ext = str(info.get("ext", "")).strip().lstrip('.')
+                                                break
+                                        if ext:
+                                            break
+                                if _normalize_ext_filter(ext) != ext_filter:
+                                    continue
+
+                                file_id = meta.get("file_id")
+                                hash_hex = meta.get("hash")
+                                size = meta.get("size", 0)
+
+                                tags_set = meta.get("tags", {})
+                                all_tags: list[str] = []
+                                title = f"Hydrus File {file_id}"
+                                if isinstance(tags_set, dict):
+                                    def _collect(tag_list: Any) -> None:
+                                        nonlocal title
+                                        if not isinstance(tag_list, list):
+                                            return
+                                        for tag in tag_list:
+                                            tag_text = str(tag) if tag else ""
+                                            if not tag_text:
+                                                continue
+                                            tag_l = tag_text.strip().lower()
+                                            if not tag_l:
+                                                continue
+                                            all_tags.append(tag_l)
+                                            if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
+                                                title = tag_l.split(":", 1)[1].strip()
+
+                                    for _service_name, service_tags in tags_set.items():
+                                        if not isinstance(service_tags, dict):
+                                            continue
+                                        storage_tags = service_tags.get("storage_tags", {})
+                                        if isinstance(storage_tags, dict):
+                                            for tag_list in storage_tags.values():
+                                                _collect(tag_list)
+                                        display_tags = service_tags.get("display_tags", [])
+                                        _collect(display_tags)
+
+                                file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
+                                results.append(
+                                    {
+                                        "hash": hash_hex,
+                                        "url": file_url,
+                                        "name": title,
+                                        "title": title,
+                                        "size": size,
+                                        "size_bytes": size,
+                                        "store": self.NAME,
+                                        "tag": all_tags,
+                                        "file_id": file_id,
+                                        "mime": mime_type,
+                                        "ext": ext,
+                                    }
+                                )
+
+                        debug(f"{prefix} {len(results)} result(s)")
+                        return results[:limit]
+
+                    # If we only got hashes, fall back to the normal flow below.

                if not file_ids and not hashes:
                    debug(f"{prefix} 0 result(s)")
                    return []

                if file_ids:
-                    metadata = client.fetch_file_metadata(file_ids=file_ids)
+                    metadata = client.fetch_file_metadata(
+                        file_ids=file_ids,
+                        include_service_keys_to_tags=True,
+                        include_file_url=False,
+                        include_duration=True,
+                        include_size=True,
+                        include_mime=True,
+                    )
                    metadata_list = metadata.get("metadata", [])
                elif hashes:
-                    metadata = client.fetch_file_metadata(hashes=hashes)
+                    metadata = client.fetch_file_metadata(
+                        hashes=hashes,
+                        include_service_keys_to_tags=True,
+                        include_file_url=False,
+                        include_duration=True,
+                        include_size=True,
+                        include_mime=True,
+                    )
                    metadata_list = metadata.get("metadata", [])
                else:
                    metadata_list = []

+                # If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
+                if (not metadata_list) and (query_lower != "*") and (":" not in query_lower):
+                    try:
+                        search_result = client.search_files(
+                            tags=["system:everything"],
+                            return_hashes=True,
+                            return_file_ids=True,
+                        )
+                        file_ids, hashes = _extract_search_ids(search_result)
+
+                        if file_ids:
+                            metadata = client.fetch_file_metadata(
+                                file_ids=file_ids,
+                                include_service_keys_to_tags=True,
+                                include_file_url=False,
+                                include_duration=True,
+                                include_size=True,
+                                include_mime=True,
+                            )
+                            metadata_list = metadata.get("metadata", [])
+                        elif hashes:
+                            metadata = client.fetch_file_metadata(
+                                hashes=hashes,
+                                include_service_keys_to_tags=True,
+                                include_file_url=False,
+                                include_duration=True,
+                                include_size=True,
+                                include_mime=True,
+                            )
+                            metadata_list = metadata.get("metadata", [])
+                    except Exception:
+                        pass
+
            if not isinstance(metadata_list, list):
                metadata_list = []

@@ -585,10 +832,13 @@ class HydrusNetwork(Store):
                                tag_text = str(tag) if tag else ""
                                if not tag_text:
                                    continue
-                                all_tags.append(tag_text)
-                                all_tags_str += " " + tag_text.lower()
-                                if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
-                                    title = tag_text.split(":", 1)[1].strip()
+                                tag_l = tag_text.strip().lower()
+                                if not tag_l:
+                                    continue
+                                all_tags.append(tag_l)
+                                all_tags_str += " " + tag_l
+                                if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
+                                    title = tag_l.split(":", 1)[1].strip()

                        for _service_name, service_tags in tags_set.items():
                            if not isinstance(service_tags, dict):
@@ -641,20 +891,15 @@ class HydrusNetwork(Store):
                            "ext": ext,
                        })
                    else:
-                        # Free-form search: check if search terms match the title or tags
-                        # Match if ALL search terms are found in title or tags (AND logic)
-                        # AND use whole word matching
-                        
-                        # Combine title and tags for searching
-                        searchable_text = (title + " " + all_tags_str).lower()
-                        
+                        # Free-form search: check if search terms match title or FREEFORM tags.
+                        # Do NOT implicitly match other namespace tags (except title:).
+                        freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)]
+                        searchable_text = (title + " " + " ".join(freeform_tags)).lower()
+
                        match = True
-                        if query_lower != "*":
+                        if query_lower != "*" and search_terms:
                            for term in search_terms:
-                                # Regex for whole word: \bterm\b
-                                # Escape term to handle special chars
-                                pattern = r'\b' + re.escape(term) + r'\b'
-                                if not re.search(pattern, searchable_text):
+                                if term not in searchable_text:
                                    match = False
                                    break
                        
@@ -675,6 +920,17 @@ class HydrusNetwork(Store):
                            })
            
            debug(f"{prefix} {len(results)} result(s)")
+            if ext_filter:
+                wanted = ext_filter
+                filtered: list[dict[str, Any]] = []
+                for item in results:
+                    try:
+                        if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
+                            filtered.append(item)
+                    except Exception:
+                        continue
+                results = filtered
+
            return results[:limit]

        except Exception as exc:
@@ -903,8 +1159,8 @@ class HydrusNetwork(Store):
            
            # Extract tags from metadata
            tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
-            
-            return tags, "hydrus"
+
+            return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus"
        
        except Exception as exc:
            debug(f"{self._log_prefix()} get_tags failed: {exc}")
@@ -924,12 +1180,38 @@ class HydrusNetwork(Store):
                debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
                return False
            service_name = kwargs.get("service_name") or "my tags"
-            # Ensure tags is a list
-            tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
-            if not tag_list:
-                return False
-            client.add_tag(file_hash, tag_list, service_name)
-            return True
+
+            incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
+            if not incoming_tags:
+                return True
+
+            try:
+                existing_tags, _src = self.get_tag(file_hash)
+            except Exception:
+                existing_tags = []
+
+            from metadata import compute_namespaced_tag_overwrite
+
+            tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags)
+
+            if not tags_to_add and not tags_to_remove:
+                return True
+
+            did_any = False
+            if tags_to_remove:
+                try:
+                    client.delete_tag(file_hash, tags_to_remove, service_name)
+                    did_any = True
+                except Exception as exc:
+                    debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}")
+            if tags_to_add:
+                try:
+                    client.add_tag(file_hash, tags_to_add, service_name)
+                    did_any = True
+                except Exception as exc:
+                    debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}")
+
+            return did_any
        except Exception as exc:
            debug(f"{self._log_prefix()} add_tag failed: {exc}")
            return False
@@ -948,7 +1230,8 @@ class HydrusNetwork(Store):
                debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
                return False
            service_name = kwargs.get("service_name") or "my tags"
-            tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
+            raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
+            tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()]
            if not tag_list:
                return False
            client.delete_tag(file_hash, tag_list, service_name)
@@ -1014,6 +1297,38 @@ class HydrusNetwork(Store):
            debug(f"{self._log_prefix()} add_url failed: {exc}")
            return False

+    def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
+        """Bulk associate urls with Hydrus files.
+
+        This is a best-effort convenience wrapper used by cmdlets to batch url associations.
+        Hydrus' client API is still called per (hash,url) pair, but this consolidates the
+        cmdlet-level control flow so url association can be deferred until the end.
+        """
+        try:
+            client = self._client
+            if client is None:
+                debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
+                return False
+
+            any_success = False
+            for file_identifier, urls in (items or []):
+                h = str(file_identifier or "").strip().lower()
+                if len(h) != 64:
+                    continue
+                for u in (urls or []):
+                    s = str(u or "").strip()
+                    if not s:
+                        continue
+                    try:
+                        client.associate_url(h, s)
+                        any_success = True
+                    except Exception:
+                        continue
+            return any_success
+        except Exception as exc:
+            debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
+            return False
+
    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Delete one or more url from a Hydrus file.
        """
--- a/Store/_base.py
+++ b/Store/_base.py
@@ -50,6 +50,51 @@ class Store(ABC):
    def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        raise NotImplementedError

+    def add_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool:
+        """Optional bulk url association.
+
+        Backends may override this to batch writes (single transaction / request).
+        Default behavior is to call add_url() per file.
+        """
+        changed_any = False
+        for file_identifier, urls in (items or []):
+            try:
+                ok = self.add_url(file_identifier, urls, **kwargs)
+                changed_any = changed_any or bool(ok)
+            except Exception:
+                continue
+        return changed_any
+
+    def delete_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool:
+        """Optional bulk url deletion.
+
+        Backends may override this to batch writes (single transaction / request).
+        Default behavior is to call delete_url() per file.
+        """
+        changed_any = False
+        for file_identifier, urls in (items or []):
+            try:
+                ok = self.delete_url(file_identifier, urls, **kwargs)
+                changed_any = changed_any or bool(ok)
+            except Exception:
+                continue
+        return changed_any
+
+    def set_note_bulk(self, items: List[Tuple[str, str, str]], **kwargs: Any) -> bool:
+        """Optional bulk note set.
+
+        Backends may override this to batch writes (single transaction / request).
+        Default behavior is to call set_note() per file.
+        """
+        changed_any = False
+        for file_identifier, name, text in (items or []):
+            try:
+                ok = self.set_note(file_identifier, name, text, **kwargs)
+                changed_any = changed_any or bool(ok)
+            except Exception:
+                continue
+        return changed_any
+
    @abstractmethod
    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        raise NotImplementedError