f

2026-01-16 01:47:00 -08:00
parent 41e95d0360
commit 12436e5a6a
4 changed files with 492 additions and 130 deletions
@@ -217,6 +217,7 @@ class API_folder_store:
        self.connection: Optional[sqlite3.Connection] = None
        # Use the shared lock
        self._db_lock = self._shared_db_lock
        mm_debug(f"[folder-db] init: root={self.library_root} db={self.db_path}")
        self._init_db()
    @contextmanager
@@ -284,6 +285,7 @@ class API_folder_store:
        """Initialize database connection and create tables if needed."""
        with self._with_db_lock():
            try:
                mm_debug(f"[folder-db] opening sqlite db: {self.db_path}")
                # Ensure the library root exists; sqlite cannot create parent dirs.
                try:
                    # User safety: Folder store must be created in a blank folder/no files in it.
@@ -326,6 +328,7 @@ class API_folder_store:
                    timeout=20.0
                )
                self.connection.row_factory = sqlite3.Row
                mm_debug(f"[folder-db] sqlite connection opened: {self.db_path}")
                # Ensure busy_timeout is set immediately for all subsequent ops (including pragmas)
                try:
@@ -337,7 +340,14 @@ class API_folder_store:
                # 1. WAL mode for better concurrency and fewer locks
                self.connection.execute("PRAGMA journal_mode=WAL")
                # 2. auto_vacuum=FULL to automatically reclaim space from deleted rows/logs
-                self.connection.execute("PRAGMA auto_vacuum = FULL")
+                try:
                    self.connection.execute("PRAGMA auto_vacuum = FULL")
                except sqlite3.OperationalError as exc:
                    if "locked" not in str(exc).lower():
                        raise
                    logger.warning(
                        "Database locked; skipping PRAGMA auto_vacuum setup for this session."
                    )
                # 3. Increase page size for modern file systems
                self.connection.execute("PRAGMA page_size = 4096")
                # 4. Memory and Sync optimizations
@@ -2657,12 +2667,30 @@ class DatabaseAPI:
    def __init__(self, search_dir: Path):
        self.search_dir = expand_path(search_dir).resolve()
        self.db = API_folder_store(self.search_dir)
        try:
            mm_debug(
                f"[folder-db] DatabaseAPI init: root={self.search_dir} db={self.db.db_path}"
            )
        except Exception:
            pass
    def __enter__(self):
        try:
            mm_debug(
                f"[folder-db] DatabaseAPI enter: root={self.search_dir} db={self.db.db_path}"
            )
        except Exception:
            pass
        self.db.__enter__()
        return self
    def __exit__(self, *args):
        try:
            mm_debug(
                f"[folder-db] DatabaseAPI exit: root={self.search_dir} db={self.db.db_path}"
            )
        except Exception:
            pass
        return self.db.__exit__(*args)
    def get_cursor(self):
@@ -2730,6 +2758,9 @@ class DatabaseAPI:
    def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
        """Get hashes of files that have any non-empty URL metadata."""
        mm_debug(
            f"[folder-db] get_file_hashes_with_any_url start: limit={limit or 10000}"
        )
        cursor = self.get_cursor()
        cursor.execute(
            """
@@ -2744,8 +2775,11 @@ class DatabaseAPI:
            (limit or 10000,
             ),
        )
-        return {row[0]
+        rows = cursor.fetchall()
-                for row in cursor.fetchall()}
+        mm_debug(
            f"[folder-db] get_file_hashes_with_any_url done: {len(rows)} row(s)"
        )
        return {row[0] for row in rows}
    def get_file_hashes_by_url_like(
        self,
@@ -2753,6 +2787,9 @@ class DatabaseAPI:
        limit: Optional[int] = None
    ) -> Set[str]:
        """Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
        mm_debug(
            f"[folder-db] get_file_hashes_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
        )
        cursor = self.get_cursor()
        cursor.execute(
            """
@@ -2766,8 +2803,11 @@ class DatabaseAPI:
            (like_pattern.lower(),
             limit or 10000),
        )
-        return {row[0]
+        rows = cursor.fetchall()
-                for row in cursor.fetchall()}
+        mm_debug(
            f"[folder-db] get_file_hashes_by_url_like done: {len(rows)} row(s)"
        )
        return {row[0] for row in rows}
    def get_file_hashes_by_ext(self,
                               ext_value: str,
@@ -2847,14 +2887,18 @@ class DatabaseAPI:
    def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
        """Get files that have any non-empty URL metadata.
-        Returns (hash, file_path, size, ext) tuples.
+        Returns (hash, file_path, size, ext, url) tuples.
        """
        mm_debug(
            f"[folder-db] get_files_with_any_url start: limit={limit or 10000}"
        )
        cursor = self.get_cursor()
        cursor.execute(
            """
-            SELECT f.hash, f.file_path,
+                 SELECT f.hash, f.file_path,
-                   COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
+                     COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
-                   COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
+                     COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
                     COALESCE(m.url, '') as url
                 FROM file f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
@@ -2866,21 +2910,29 @@ class DatabaseAPI:
            (limit or 10000,
             ),
        )
-        return cursor.fetchall()
+        rows = cursor.fetchall()
        mm_debug(
            f"[folder-db] get_files_with_any_url done: {len(rows)} row(s)"
        )
        return rows
    def get_files_by_url_like(self,
                              like_pattern: str,
                              limit: Optional[int] = None) -> List[tuple]:
        """Get files whose URL metadata contains a substring (case-insensitive).
-        Returns (hash, file_path, size, ext) tuples.
+        Returns (hash, file_path, size, ext, url) tuples.
        """
        mm_debug(
            f"[folder-db] get_files_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
        )
        cursor = self.get_cursor()
        cursor.execute(
            """
-            SELECT f.hash, f.file_path,
+                 SELECT f.hash, f.file_path,
-                   COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
+                     COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
-                   COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
+                     COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
                     COALESCE(m.url, '') as url
                        FROM file f
            JOIN metadata m ON f.hash = m.hash
            WHERE m.url IS NOT NULL
@@ -2891,7 +2943,11 @@ class DatabaseAPI:
            (like_pattern.lower(),
             limit or 10000),
        )
-        return cursor.fetchall()
+        rows = cursor.fetchall()
        mm_debug(
            f"[folder-db] get_files_by_url_like done: {len(rows)} row(s)"
        )
        return rows
    def get_file_metadata(self,
                          file_hashes: Set[str],
@@ -2899,6 +2955,9 @@ class DatabaseAPI:
        """Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
        if not file_hashes:
            return []
        mm_debug(
            f"[folder-db] get_file_metadata start: hashes={len(file_hashes)} limit={limit or len(file_hashes)}"
        )
        cursor = self.get_cursor()
        placeholders = ",".join(["?"] * len(file_hashes))
        fetch_sql = f"""
@@ -2911,7 +2970,11 @@ class DatabaseAPI:
        LIMIT ?
        """
        cursor.execute(fetch_sql, (*file_hashes, limit or len(file_hashes)))
-        return cursor.fetchall()
+        rows = cursor.fetchall()
        mm_debug(
            f"[folder-db] get_file_metadata done: {len(rows)} row(s)"
        )
        return rows
    def get_all_files(self, limit: Optional[int] = None) -> List[tuple]:
        """Get all files in database. Returns (hash, file_path, size, ext) tuples."""
@@ -2932,11 +2995,18 @@ class DatabaseAPI:
    def get_tags_for_file(self, file_hash: str) -> List[str]:
        """Get all tags for a file given its hash."""
        mm_debug(
            f"[folder-db] get_tags_for_file start: hash={file_hash}"
        )
        cursor = self.get_cursor()
        cursor.execute("SELECT tag FROM tag WHERE hash = ?",
                       (file_hash,
                        ))
-        return [row[0] for row in cursor.fetchall()]
+        rows = cursor.fetchall()
        mm_debug(
            f"[folder-db] get_tags_for_file done: {len(rows)} row(s)"
        )
        return [row[0] for row in rows]
    def get_tags_by_namespace_and_file(self,
                                       file_hash: str,
@@ -4,7 +4,7 @@ import json
 import re
 import shutil
 import sys
-from fnmatch import translate
+from fnmatch import fnmatch, translate
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
@@ -30,6 +30,28 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
    return _normalize_hash(file_path.stem)
 def _normalize_url_for_search(url: str) -> str:
    value = str(url or "").strip()
    value = re.sub(r"^[a-z][a-z0-9+.-]*://", "", value, flags=re.IGNORECASE)
    value = re.sub(r"^www\.", "", value, flags=re.IGNORECASE)
    return value.lower()
 def _match_url_pattern(url: str, pattern: str) -> bool:
    normalized_url = _normalize_url_for_search(url)
    normalized_pattern = _normalize_url_for_search(pattern)
    if not normalized_pattern:
        return False
    has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
    if has_wildcards:
        return fnmatch(normalized_url, normalized_pattern)
    normalized_url_no_slash = normalized_url.rstrip("/")
    normalized_pattern_no_slash = normalized_pattern.rstrip("/")
    if normalized_pattern_no_slash and normalized_pattern_no_slash == normalized_url_no_slash:
        return True
    return normalized_pattern in normalized_url
 class Folder(Store):
    """"""
@@ -690,6 +712,12 @@ class Folder(Store):
        match_all = query == "*" or (not query and bool(ext_filter))
        results = []
        search_dir = expand_path(self._location)
        backend_label = str(
            getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder"
        )
        debug(
            f"[folder:{backend_label}] search start: query={query} limit={limit} root={search_dir}"
        )
        def _url_like_pattern(value: str) -> str:
            # Interpret user patterns as substring matches (with optional glob wildcards).
@@ -1002,7 +1030,7 @@ class Folder(Store):
                        namespace, pattern = query.split(":", 1)
                        namespace = namespace.strip().lower()
                        pattern = pattern.strip().lower()
-                        debug(f"Performing namespace search: {namespace}:{pattern}")
+                        debug(f"[folder:{backend_label}] namespace search: {namespace}:{pattern}")
                        if namespace == "hash":
                            normalized_hash = _normalize_hash(pattern)
@@ -1041,14 +1069,50 @@ class Folder(Store):
                            return results
                        if namespace == "url":
                            pattern_hint = kwargs.get("pattern_hint")
                            def _parse_url_value(raw: Any) -> list[str]:
                                if raw is None:
                                    return []
                                if isinstance(raw, list):
                                    return [str(u).strip() for u in raw if str(u).strip()]
                                if isinstance(raw, str):
                                    text = raw.strip()
                                    if not text:
                                        return []
                                    try:
                                        parsed = json.loads(text)
                                        if isinstance(parsed, list):
                                            return [
                                                str(u).strip()
                                                for u in parsed
                                                if str(u).strip()
                                            ]
                                    except Exception:
                                        pass
                                    return [text]
                                return []
                            def _matches_pattern(url_list: list[str]) -> bool:
                                if not pattern_hint:
                                    return True
                                for candidate_url in url_list:
                                    if _match_url_pattern(candidate_url, pattern_hint):
                                        return True
                                return False
                            if not pattern or pattern == "*":
                                debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
                                rows = api.get_files_with_any_url(limit)
                            else:
                                debug(
                                    f"[folder:{backend_label}] url search: like={pattern} (limit={limit})"
                                )
                                rows = api.get_files_by_url_like(
                                    _url_like_pattern(pattern),
                                    limit
                                )
-                            for file_hash, file_path_str, size_bytes, ext in rows:
+                            for file_hash, file_path_str, size_bytes, ext, url_raw in rows:
                                if not file_path_str:
                                    continue
                                file_path = search_dir / str(file_path_str)
@@ -1059,6 +1123,9 @@ class Folder(Store):
                                        size_bytes = file_path.stat().st_size
                                    except OSError:
                                        size_bytes = None
                                urls = _parse_url_value(url_raw)
                                if not urls or not _matches_pattern(urls):
                                    continue
                                tags = api.get_tags_for_file(file_hash)
                                entry = _create_entry(
                                    file_path,
@@ -1066,6 +1133,7 @@ class Folder(Store):
                                    size_bytes,
                                    file_hash
                                )
                                entry["urls"] = urls
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
@@ -466,7 +466,9 @@ class HydrusNetwork(Store):
            def _extract_urls(meta_obj: Any) -> list[str]:
                if not isinstance(meta_obj, dict):
                    return []
-                raw = meta_obj.get("url")
+                raw = meta_obj.get("known_urls")
                if raw is None:
                    raw = meta_obj.get("url")
                if raw is None:
                    raw = meta_obj.get("urls")
                if isinstance(raw, str):
@@ -483,100 +485,178 @@ class HydrusNetwork(Store):
                    return out
                return []
            def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
                if not isinstance(payload, dict):
                    return [], []
                raw_ids = payload.get("file_ids", [])
                raw_hashes = payload.get("hashes", [])
                ids_out: list[int] = []
                hashes_out: list[str] = []
                if isinstance(raw_ids, list):
                    for item in raw_ids:
                        try:
                            if isinstance(item, (int, float)):
                                ids_out.append(int(item))
                                continue
                            if isinstance(item, str) and item.strip().isdigit():
                                ids_out.append(int(item.strip()))
                        except Exception:
                            continue
                if isinstance(raw_hashes, list):
                    for item in raw_hashes:
                        try:
                            candidate = str(item or "").strip().lower()
                            if candidate:
                                hashes_out.append(candidate)
                        except Exception:
                            continue
                return ids_out, hashes_out
            def _iter_url_filtered_metadata(
                url_value: str | None,
                want_any: bool,
-                fetch_limit: int
+                fetch_limit: int,
-            ) -> list[dict[str,
+                scan_limit: int | None = None
-                           Any]]:
+            ) -> list[dict[str, Any]]:
                """Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
                # First try a fast system predicate if Hydrus supports it.
                candidate_file_ids: list[int] = []
-                try:
+                candidate_hashes: list[str] = []
-                    if want_any:
+                seen_file_ids: set[int] = set()
                seen_hashes: set[str] = set()
                def _add_candidates(ids: list[int], hashes: list[str]) -> None:
                    for fid in ids:
                        if fid in seen_file_ids:
                            continue
                        seen_file_ids.add(fid)
                        candidate_file_ids.append(fid)
                    for hh in hashes:
                        if hh in seen_hashes:
                            continue
                        seen_hashes.add(hh)
                        candidate_hashes.append(hh)
                predicate_supported = getattr(self, "_has_url_predicate", None)
                if predicate_supported is not False:
                    try:
                        predicate = "system:has url"
                        url_search = client.search_files(
                            tags=[predicate],
-                            return_hashes=False,
+                            return_hashes=True,
-                            return_file_ids=True,
+                            return_file_ids=False,
                            return_file_count=False,
                        )
-                        ids = url_search.get("file_ids",
+                        ids, hashes = _extract_search_ids(url_search)
-                                             []) if isinstance(url_search,
+                        _add_candidates(ids, hashes)
-                                                               dict) else []
+                        self._has_url_predicate = True
-                        if isinstance(ids, list):
+                    except Exception as exc:
-                            candidate_file_ids = [
+                        try:
-                                int(x) for x in ids
+                            from API.HydrusNetwork import HydrusRequestError
                                if isinstance(x, (int, float,
                                                  str)) and str(x).strip().isdigit()
                            ]
                except Exception:
                    candidate_file_ids = []
-                if not candidate_file_ids:
+                            if isinstance(exc, HydrusRequestError) and getattr(exc, "status", None) == 400:
-                    # Fallback: scan from system:everything and filter by URL substring.
+                                self._has_url_predicate = False
                        except Exception:
                            pass
                if not candidate_file_ids and not candidate_hashes:
                    everything = client.search_files(
                        tags=["system:everything"],
-                        return_hashes=False,
+                        return_hashes=True,
-                        return_file_ids=True,
+                        return_file_ids=False,
                        return_file_count=False,
                    )
-                    ids = everything.get("file_ids",
+                    ids, hashes = _extract_search_ids(everything)
-                                         []) if isinstance(everything,
+                    _add_candidates(ids, hashes)
                                                           dict) else []
                    if isinstance(ids, list):
                        candidate_file_ids = [
                            int(x) for x in ids if isinstance(x, (int, float))
                        ]
-                if not candidate_file_ids:
+                if not candidate_file_ids and not candidate_hashes:
                    return []
                needle = (url_value or "").strip().lower()
                chunk_size = 200
                out: list[dict[str, Any]] = []
                if scan_limit is None:
                    try:
                        if not want_any and url_value:
                            scan_limit = max(200, min(int(fetch_limit), 400))
                        else:
                            scan_limit = max(int(fetch_limit) * 5, 1000)
                    except Exception:
                        scan_limit = 400 if (not want_any and url_value) else 1000
                if scan_limit is not None:
                    scan_limit = min(int(scan_limit), 10000)
                scanned = 0
-                for start in range(0, len(candidate_file_ids), chunk_size):
+                def _process_source(items: list[Any], kind: str) -> None:
                    nonlocal scanned
                    for start in range(0, len(items), chunk_size):
                        if len(out) >= fetch_limit:
                            return
                        if scan_limit is not None and scanned >= scan_limit:
                            return
                        chunk = items[start:start + chunk_size]
                        if scan_limit is not None:
                            remaining = scan_limit - scanned
                            if remaining <= 0:
                                return
                            if len(chunk) > remaining:
                                chunk = chunk[:remaining]
                        scanned += len(chunk)
                        try:
                            if kind == "hashes":
                                payload = client.fetch_file_metadata(
                                    hashes=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=True,
                                    include_duration=True,
                                    include_size=True,
                                    include_mime=True,
                                )
                            else:
                                payload = client.fetch_file_metadata(
                                    file_ids=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=True,
                                    include_duration=True,
                                    include_size=True,
                                    include_mime=True,
                                )
                        except Exception:
                            continue
                        metas = payload.get("metadata",
                                            []) if isinstance(payload,
                                                              dict) else []
                        if not isinstance(metas, list):
                            continue
                        for meta in metas:
                            if len(out) >= fetch_limit:
                                break
                            if not isinstance(meta, dict):
                                continue
                            urls = _extract_urls(meta)
                            if not urls:
                                continue
                            if want_any:
                                out.append(meta)
                                continue
                            if not needle:
                                continue
                            if any(needle in u.lower() for u in urls):
                                out.append(meta)
                                continue
                sources: list[tuple[str, list[Any]]] = []
                if candidate_hashes:
                    sources.append(("hashes", candidate_hashes))
                elif candidate_file_ids:
                    sources.append(("file_ids", candidate_file_ids))
                for kind, items in sources:
                    if len(out) >= fetch_limit:
                        break
-                    chunk = candidate_file_ids[start:start + chunk_size]
+                    _process_source(items, kind)
                    try:
                        payload = client.fetch_file_metadata(
                            file_ids=chunk,
                            include_file_url=True,
                            include_service_keys_to_tags=True,
                            include_duration=True,
                            include_size=True,
                            include_mime=True,
                        )
                    except Exception:
                        continue
                    metas = payload.get("metadata",
                                        []) if isinstance(payload,
                                                          dict) else []
                    if not isinstance(metas, list):
                        continue
                    for meta in metas:
                        if not isinstance(meta, dict):
                            continue
                        urls = _extract_urls(meta)
                        if not urls:
                            continue
                        if want_any:
                            out.append(meta)
                            if len(out) >= fetch_limit:
                                break
                            continue
                        if not needle:
                            continue
                        if any(needle in u.lower() for u in urls):
                            out.append(meta)
                            if len(out) >= fetch_limit:
                                break
                return out
@@ -618,6 +698,7 @@ class HydrusNetwork(Store):
            # Special case: url:* and url:<value>
            metadata_list: list[dict[str, Any]] | None = None
            pattern_hint = str(kwargs.get("pattern_hint") or "").strip().lower()
            if ":" in query_lower and not query_lower.startswith(":"):
                namespace, pattern = query_lower.split(":", 1)
                namespace = namespace.strip().lower()
@@ -630,6 +711,12 @@ class HydrusNetwork(Store):
                            fetch_limit=int(limit) if limit else 100
                        )
                    else:
                        def _clean_url_search_token(value: str | None) -> str:
                            token = str(value or "").strip().lower()
                            if not token:
                                return ""
                            return token.replace("*", "").replace("?", "")
                        # Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
                        try:
                            if pattern.startswith("http://") or pattern.startswith(
@@ -706,10 +793,20 @@ class HydrusNetwork(Store):
                        # Fallback: substring scan
                        if metadata_list is None:
                            search_token = _clean_url_search_token(pattern_hint or pattern)
                            scan_limit_override: int | None = None
                            if search_token:
                                is_domain_only = ("://" not in search_token and "/" not in search_token)
                                if is_domain_only:
                                    try:
                                        scan_limit_override = max(int(limit or 100) * 20, 2000)
                                    except Exception:
                                        scan_limit_override = 2000
                            metadata_list = _iter_url_filtered_metadata(
-                                pattern,
+                                search_token,
                                want_any=False,
-                                fetch_limit=int(limit) if limit else 100
+                                fetch_limit=int(limit) if limit else 100,
                                scan_limit=scan_limit_override,
                            )
            # Parse the query into tags
@@ -742,26 +839,6 @@ class HydrusNetwork(Store):
            # Search files with the tags (unless url: search already produced metadata)
            results = []
            def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
                if not isinstance(payload, dict):
                    return [], []
                raw_ids = payload.get("file_ids", [])
                raw_hashes = payload.get("hashes", [])
                ids_out: list[int] = []
                hashes_out: list[str] = []
                if isinstance(raw_ids, list):
                    for item in raw_ids:
                        try:
                            ids_out.append(int(item))
                        except (TypeError, ValueError):
                            continue
                if isinstance(raw_hashes, list):
                    hashes_out = [
                        str(h).strip() for h in raw_hashes
                        if isinstance(h, str) and str(h).strip()
                    ]
                return ids_out, hashes_out
            if metadata_list is None:
                file_ids: list[int] = []
                hashes: list[str] = []
@@ -1,5 +1,7 @@
 from __future__ import annotations
 from queue import SimpleQueue
 from threading import Thread
 from dataclasses import dataclass
 from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
 import sys
@@ -34,6 +36,8 @@ class UrlItem:
 class Get_Url(Cmdlet):
    """Get url associated with files via hash+store, or search urls by pattern."""
    STORE_SEARCH_TIMEOUT_SECONDS = 6.0
    def __init__(self) -> None:
        super().__init__(
            name="get-url",
@@ -81,8 +85,56 @@ class Get_Url(Cmdlet):
        normalized_url = Get_Url._normalize_url_for_search(url)
        normalized_pattern = Get_Url._normalize_url_for_search(pattern)
-        # Use fnmatch for wildcard matching (* and ?)
+        has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
-        return fnmatch(normalized_url, normalized_pattern)
+        if has_wildcards:
            return fnmatch(normalized_url, normalized_pattern)
        normalized_url_no_slash = normalized_url.rstrip("/")
        normalized_pattern_no_slash = normalized_pattern.rstrip("/")
        if normalized_pattern_no_slash and normalized_pattern_no_slash == normalized_url_no_slash:
            return True
        return normalized_pattern in normalized_url
    def _execute_search_with_timeout(
        self,
        backend: Any,
        query: str,
        limit: int,
        store_name: str,
        **kwargs: Any,
    ) -> Optional[List[Any]]:
        queue: SimpleQueue[tuple[str, Any]] = SimpleQueue()
        def _worker() -> None:
            try:
                queue.put(("ok", backend.search(query, limit=limit, **kwargs)))
            except Exception as exc:
                queue.put(("err", exc))
        worker = Thread(target=_worker, daemon=True)
        worker.start()
        worker.join(timeout=self.STORE_SEARCH_TIMEOUT_SECONDS)
        if worker.is_alive():
            debug(
                f"Store '{store_name}' search timed out after {self.STORE_SEARCH_TIMEOUT_SECONDS}s",
                file=sys.stderr,
            )
            return None
        if queue.empty():
            return []
        status, payload = queue.get()
        if status == "err":
            debug(
                f"Store '{store_name}' search failed: {payload}",
                file=sys.stderr,
            )
            return []
        return payload or []
    @staticmethod
    def _extract_first_url(value: Any) -> Optional[str]:
@@ -95,6 +147,35 @@ class Get_Url(Cmdlet):
                    return item.strip()
        return None
    @staticmethod
    def _extract_urls_from_hit(hit: Any) -> List[str]:
        """Extract candidate URLs directly from a search hit, if present."""
        raw = None
        try:
            raw = get_field(hit, "known_urls")
            if not raw:
                raw = get_field(hit, "urls")
            if not raw:
                raw = get_field(hit, "url")
            if not raw:
                raw = get_field(hit, "source_url") or get_field(hit, "source_urls")
        except Exception:
            raw = None
        if isinstance(raw, str):
            val = raw.strip()
            return [val] if val else []
        if isinstance(raw, (list, tuple)):
            out: list[str] = []
            for item in raw:
                if not isinstance(item, str):
                    continue
                v = item.strip()
                if v:
                    out.append(v)
            return out
        return []
    @staticmethod
    def _extract_title_from_result(result: Any) -> Optional[str]:
        # Prefer explicit title field.
@@ -219,6 +300,7 @@ class Get_Url(Cmdlet):
        """
        items: List[UrlItem] = []
        found_stores: Set[str] = set()
        MAX_RESULTS = 256
        try:
            storage = Store(config)
@@ -230,6 +312,8 @@ class Get_Url(Cmdlet):
                return items, list(found_stores)
            for store_name in store_names:
                if len(items) >= MAX_RESULTS:
                    break
                try:
                    backend = storage[store_name]
@@ -243,9 +327,12 @@ class Get_Url(Cmdlet):
                        has_wildcards = any(ch in raw_pattern for ch in ("*", "?"))
                        # If this is a Hydrus backend and the pattern is a single URL,
-                        # normalize it through the official API.
+                        # normalize it through the official API. Skip for bare domains.
                        normalized_url = None
-                        if not has_wildcards and hasattr(backend, "get_url_info"):
+                        looks_like_url = (
                            "://" in raw_pattern or raw_pattern.startswith("magnet:")
                        )
                        if not has_wildcards and looks_like_url and hasattr(backend, "get_url_info"):
                            try:
                                info = backend.get_url_info(raw_pattern)  # type: ignore[attr-defined]
                                if isinstance(info, dict):
@@ -255,13 +342,39 @@ class Get_Url(Cmdlet):
                            except Exception:
                                normalized_url = None
-                        search_query = "url:*" if has_wildcards else f"url:{normalized_url or raw_pattern}"
+                        target_pattern = normalized_url or raw_pattern
-                        try:
+                        if has_wildcards or not target_pattern:
-                            search_results = backend.search(search_query, limit=1000)
+                            search_query = "url:*"
-                        except Exception:
+                        else:
-                            search_results = []
+                            wrapped_pattern = f"*{target_pattern}*"
                            search_query = f"url:{wrapped_pattern}"
                        search_limit = max(1, min(MAX_RESULTS, 1000))
                        search_results = self._execute_search_with_timeout(
                            backend,
                            search_query,
                            search_limit,
                            store_name,
                            pattern_hint=target_pattern,
                        )
                        if search_results is None:
                            continue
                        search_results = search_results or []
                        if not search_results and target_pattern and not has_wildcards:
                            fallback_results = self._execute_search_with_timeout(
                                backend,
                                "url:*",
                                search_limit,
                                store_name,
                                pattern_hint=target_pattern,
                            )
                            if fallback_results is None:
                                continue
                            search_results = fallback_results or []
                        for hit in (search_results or []):
                            if len(items) >= MAX_RESULTS:
                                break
                            file_hash = None
                            if isinstance(hit, dict):
                                file_hash = hit.get("hash") or hit.get("file_hash")
@@ -271,25 +384,57 @@ class Get_Url(Cmdlet):
                            file_hash = str(file_hash)
                            title = title_cache.get(file_hash, "")
                            if not title:
                                try:
                                    title = (
                                        get_field(hit, "title")
                                        or get_field(hit, "name")
                                        or get_field(hit, "file_title")
                                        or ""
                                    )
                                except Exception:
                                    title = ""
                            if not title:
                                title = self._resolve_title_for_hash(backend, file_hash, hit)
-                                title_cache[file_hash] = title
+                            title_cache[file_hash] = title
                            size, ext = meta_cache.get(file_hash, (None, ""))
                            if size is None and not ext:
-                                size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
+                                try:
-                                meta_cache[file_hash] = (size, ext)
+                                    size = get_field(hit, "size")
                                    if size is None:
                                        size = get_field(hit, "size_bytes")
                                    if size is None:
                                        size = get_field(hit, "file_size")
                                    if size is None:
                                        size = get_field(hit, "filesize")
                                    size = int(size) if isinstance(size, (int, float)) else None
                                except Exception:
                                    size = None
-                            try:
+                                try:
-                                urls = backend.get_url(file_hash)
+                                    ext = get_field(hit, "ext") or get_field(hit, "extension")
-                            except Exception:
+                                    ext = str(ext).strip().lstrip(".") if isinstance(ext, str) else ""
-                                urls = []
+                                except Exception:
                                    ext = ""
                            if size is None and not ext:
                                size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
                            meta_cache[file_hash] = (size, ext)
                            urls = self._extract_urls_from_hit(hit)
                            if not urls:
                                try:
                                    urls = backend.get_url(file_hash)
                                except Exception:
                                    urls = []
                            for url in (urls or []):
                                if len(items) >= MAX_RESULTS:
                                    break
                                if not self._match_url_pattern(str(url), raw_pattern):
                                    continue
                                # Double-check it looks like a URL to avoid data leakage from dirty DBs
                                from SYS.metadata import normalize_urls
                                valid = normalize_urls([str(url)])
                                if not valid:
@@ -306,6 +451,8 @@ class Get_Url(Cmdlet):
                                    )
                                )
                            found_stores.add(str(store_name))
                            if len(items) >= MAX_RESULTS:
                                break
                    except Exception as exc:
                        debug(
                            f"Error searching store '{store_name}': {exc}",