dfdfsdd

2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions
--- a/Store/Folder.py
+++ b/Store/Folder.py
@@ -30,6 +30,8 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
    return _normalize_hash(file_path.stem)


+
+
 class Folder(Store):
    """"""
    # Track which locations have already been migrated to avoid repeated migrations
@@ -359,6 +361,17 @@ class Folder(Store):
            else:
                shutil.copy2(str(file_path), str(save_file))
                debug(f"Local copy: {save_file}", file=sys.stderr)
+
+            # Best-effort: capture duration for media
+            duration_value: float | None = None
+            try:
+                from SYS.utils import ffprobe
+                probe = ffprobe(str(save_file))
+                duration = probe.get("duration")
+                if isinstance(duration, (int, float)) and duration > 0:
+                    duration_value = float(duration)
+            except Exception:
+                duration_value = None
                
            # Save to database
            with API_folder_store(Path(self._location)) as db:
@@ -368,7 +381,8 @@ class Folder(Store):
                db.save_metadata(save_file, {
                    'hash': file_hash,
                    'ext': ext_clean,
-                    'size': file_path.stat().st_size
+                    'size': file_path.stat().st_size,
+                    'duration': duration_value,
                })
            
            # Add tags if provided
@@ -405,6 +419,21 @@ class Folder(Store):
        results = []
        search_dir = Path(self._location).expanduser()

+        def _url_like_pattern(value: str) -> str:
+            # Interpret user patterns as substring matches (with optional glob wildcards).
+            v = (value or "").strip().lower()
+            if not v or v == "*":
+                return "%"
+            v = v.replace("%", "\\%").replace("_", "\\_")
+            v = v.replace("*", "%").replace("?", "_")
+            if "%" not in v and "_" not in v:
+                return f"%{v}%"
+            if not v.startswith("%"):
+                v = "%" + v
+            if not v.endswith("%"):
+                v = v + "%"
+            return v
+
        tokens = [t.strip() for t in query.split(',') if t.strip()]

        if not match_all and len(tokens) == 1 and _normalize_hash(query):
@@ -453,6 +482,8 @@ class Folder(Store):
            try:
                with DatabaseAPI(search_dir) as api:
                    if tokens and len(tokens) > 1:
+                        url_fetch_limit = (limit or 45) * 50
+
                        def _like_pattern(term: str) -> str:
                            return term.replace('*', '%').replace('?', '_')

@@ -473,6 +504,11 @@ class Folder(Store):
                                    h = api.get_file_hash_by_hash(normalized_hash)
                                    return {h} if h else set()

+                                if namespace == 'url':
+                                    if not pattern or pattern == '*':
+                                        return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
+                                    return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
+
                                if namespace == 'store':
                                    if pattern not in {'local', 'file', 'filesystem'}:
                                        return set()
@@ -562,6 +598,29 @@ class Folder(Store):
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results
+
+                        if namespace == "url":
+                            if not pattern or pattern == "*":
+                                rows = api.get_files_with_any_url(limit)
+                            else:
+                                rows = api.get_files_by_url_like(_url_like_pattern(pattern), limit)
+                            for file_hash, file_path_str, size_bytes, ext in rows:
+                                if not file_path_str:
+                                    continue
+                                file_path = Path(file_path_str)
+                                if not file_path.exists():
+                                    continue
+                                if size_bytes is None:
+                                    try:
+                                        size_bytes = file_path.stat().st_size
+                                    except OSError:
+                                        size_bytes = None
+                                tags = api.get_tags_for_file(file_hash)
+                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
+                                results.append(entry)
+                                if limit is not None and len(results) >= limit:
+                                    return results
+                            return results
                        
                        query_pattern = f"{namespace}:%"
                        rows = api.get_files_by_namespace_pattern(query_pattern, limit)
@@ -592,126 +651,59 @@ class Folder(Store):
                            if limit is not None and len(results) >= limit:
                                return results
                    elif not match_all:
+                        # Strict tag-based search only (no filename/path searching).
                        terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
                        if not terms:
                            terms = [query_lower]
-                        
-                        debug(f"Performing filename/tag search for terms: {terms}")
-                        
+
                        fetch_limit = (limit or 45) * 50
-                        
-                        conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
-                        params = [f"%{t}%" for t in terms]
-                        
-                        rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
-                        debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
-                        
-                        word_regex = None
-                        if len(terms) == 1:
-                            term = terms[0]
-                            has_wildcard = '*' in term or '?' in term
-                            
-                            if has_wildcard:
-                                try:
-                                    from fnmatch import translate
-                                    word_regex = re.compile(translate(term), re.IGNORECASE)
-                                except Exception:
-                                    word_regex = None
-                            else:
-                                try:
-                                    pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
-                                    word_regex = re.compile(pattern, re.IGNORECASE)
-                                except Exception:
-                                    word_regex = None

-                        seen_files = set()
-                        for file_id, file_path_str, size_bytes, file_hash in rows:
-                            if not file_path_str or file_path_str in seen_files:
-                                continue
-                            
-                            if word_regex:
-                                p = Path(file_path_str)
-                                if not word_regex.search(p.name):
+                        # AND semantics across terms: each term must match at least one tag.
+                        hits: dict[str, dict[str, Any]] = {}
+                        for term in terms:
+                            tag_pattern = f"%{term}%"
+                            term_rows = api.get_files_by_namespace_pattern(tag_pattern, fetch_limit)
+                            for file_hash, file_path_str, size_bytes, ext in term_rows:
+                                if not file_path_str:
                                    continue
-                            seen_files.add(file_path_str)
-                            
-                            file_path = Path(file_path_str)
-                            if file_path.exists():
-                                if size_bytes is None:
-                                    size_bytes = file_path.stat().st_size
-                                
-                                tags = api.get_tags_for_file(file_hash)
-                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
-                                results.append(entry)
-                                if limit is not None and len(results) >= limit:
-                                    return results
+                                entry = hits.get(file_hash)
+                                if entry:
+                                    entry["count"] += 1
+                                    if size_bytes is not None:
+                                        entry["size"] = size_bytes
+                                else:
+                                    hits[file_hash] = {
+                                        "path": file_path_str,
+                                        "size": size_bytes,
+                                        "hash": file_hash,
+                                        "count": 1,
+                                    }

-                        if terms:
-                            title_hits: dict[str, dict[str, Any]] = {}
-                            for term in terms:
-                                title_pattern = f"title:%{term}%"
-                                title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
-                                for file_hash, file_path_str, size_bytes, ext in title_rows:
-                                    if not file_path_str:
-                                        continue
-                                    entry = title_hits.get(file_hash)
-                                    if entry:
-                                        entry["count"] += 1
-                                        if size_bytes is not None:
-                                            entry["size"] = size_bytes
-                                    else:
-                                        title_hits[file_hash] = {
-                                            "path": file_path_str,
-                                            "size": size_bytes,
-                                            "hash": file_hash,
-                                            "count": 1,
-                                        }
-
-                            if title_hits:
-                                required = len(terms)
-                                for file_hash, info in title_hits.items():
-                                    if info.get("count") != required:
-                                        continue
-                                    file_path_str = info.get("path")
-                                    if not file_path_str or file_path_str in seen_files:
-                                        continue
-                                    file_path = Path(file_path_str)
-                                    if not file_path.exists():
-                                        continue
-                                    seen_files.add(file_path_str)
-
-                                    size_bytes = info.get("size")
-                                    if size_bytes is None:
-                                        try:
-                                            size_bytes = file_path.stat().st_size
-                                        except OSError:
-                                            size_bytes = None
-
-                                    tags = api.get_tags_for_file(file_hash)
-                                    entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
-                                    results.append(entry)
-                                    if limit is not None and len(results) >= limit:
-                                        return results
-                        
-                        query_pattern = f"%{query_lower}%"
-                        tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
-                        
-                        for file_hash, file_path_str, size_bytes, ext in tag_rows:
+                        required = len(terms)
+                        seen_files: set[str] = set()
+                        for file_hash, info in hits.items():
+                            if info.get("count") != required:
+                                continue
+                            file_path_str = info.get("path")
                            if not file_path_str or file_path_str in seen_files:
                                continue
-                            seen_files.add(file_path_str)
-                            
                            file_path = Path(file_path_str)
-                            if file_path.exists():
-                                if size_bytes is None:
+                            if not file_path.exists():
+                                continue
+                            seen_files.add(file_path_str)
+
+                            size_bytes = info.get("size")
+                            if size_bytes is None:
+                                try:
                                    size_bytes = file_path.stat().st_size
-                                
-                                tags = api.get_tags_for_file(file_hash)
-                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
-                                results.append(entry)
-                                
-                                if limit is not None and len(results) >= limit:
-                                    return results
+                                except OSError:
+                                    size_bytes = None
+
+                            tags = api.get_tags_for_file(file_hash)
+                            entry_obj = _create_entry(file_path, tags, size_bytes, info.get("hash"))
+                            results.append(entry_obj)
+                            if limit is not None and len(results) >= limit:
+                                break
                    
                    else:
                        rows = api.get_all_files(limit)
@@ -726,10 +718,8 @@ class Folder(Store):
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                    results.append(entry)
                    
-                    if results:
-                        debug(f"Returning {len(results)} results from DB")
-                    else:
-                        debug("No results found in DB")
+                    backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
+                    debug(f"[folder:{backend_label}] {len(results)} result(s)")
                    return results
                    
            except Exception as e:
@@ -938,9 +928,11 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
+                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        return list(meta.get("url") or [])
+                        urls = normalize_urls(meta.get("url"))
+                        return urls
                except Exception as exc:
                    debug(f"Local DB get_metadata failed: {exc}")
            return []
@@ -955,11 +947,13 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
+                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        existing_urls = list(meta.get("url") or [])
+                        existing_urls = normalize_urls(meta.get("url"))
+                        incoming_urls = normalize_urls(url)
                        changed = False
-                        for u in list(url or []):
+                        for u in list(incoming_urls or []):
                            if not u:
                                continue
                            if u not in existing_urls:
@@ -982,10 +976,11 @@ class Folder(Store):
            file_hash = file_identifier
            if self._location:
                try:
+                    from metadata import normalize_urls
                    with API_folder_store(Path(self._location)) as db:
                        meta = db.get_metadata(file_hash) or {}
-                        existing_urls = list(meta.get("url") or [])
-                        remove_set = {u for u in (url or []) if u}
+                        existing_urls = normalize_urls(meta.get("url"))
+                        remove_set = {u for u in normalize_urls(url) if u}
                        if not remove_set:
                            return False
                        new_urls = [u for u in existing_urls if u not in remove_set]
--- a/Store/HydrusNetwork.py
+++ b/Store/HydrusNetwork.py
@@ -264,6 +264,170 @@ class HydrusNetwork(Store):

            debug(f"Searching Hydrus for: {query}")

+            def _extract_urls(meta_obj: Any) -> list[str]:
+                if not isinstance(meta_obj, dict):
+                    return []
+                raw = meta_obj.get("url")
+                if raw is None:
+                    raw = meta_obj.get("urls")
+                if isinstance(raw, str):
+                    val = raw.strip()
+                    return [val] if val else []
+                if isinstance(raw, list):
+                    out: list[str] = []
+                    for item in raw:
+                        if not isinstance(item, str):
+                            continue
+                        s = item.strip()
+                        if s:
+                            out.append(s)
+                    return out
+                return []
+
+            def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
+                """Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
+
+                # First try a fast system predicate if Hydrus supports it.
+                candidate_file_ids: list[int] = []
+                try:
+                    if want_any:
+                        predicate = "system:has url"
+                        url_search = client.search_files(
+                            tags=[predicate],
+                            return_hashes=False,
+                            return_file_ids=True,
+                            return_file_count=False,
+                        )
+                        ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
+                        if isinstance(ids, list):
+                            candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
+                except Exception:
+                    candidate_file_ids = []
+
+                if not candidate_file_ids:
+                    # Fallback: scan from system:everything and filter by URL substring.
+                    everything = client.search_files(
+                        tags=["system:everything"],
+                        return_hashes=False,
+                        return_file_ids=True,
+                        return_file_count=False,
+                    )
+                    ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
+                    if isinstance(ids, list):
+                        candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
+
+                if not candidate_file_ids:
+                    return []
+
+                needle = (url_value or "").strip().lower()
+                chunk_size = 200
+                out: list[dict[str, Any]] = []
+
+                for start in range(0, len(candidate_file_ids), chunk_size):
+                    if len(out) >= fetch_limit:
+                        break
+                    chunk = candidate_file_ids[start : start + chunk_size]
+                    try:
+                        payload = client.fetch_file_metadata(
+                            file_ids=chunk,
+                            include_file_url=True,
+                            include_service_keys_to_tags=True,
+                            include_duration=True,
+                            include_size=True,
+                            include_mime=True,
+                        )
+                    except Exception:
+                        continue
+
+                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                    if not isinstance(metas, list):
+                        continue
+
+                    for meta in metas:
+                        if not isinstance(meta, dict):
+                            continue
+                        urls = _extract_urls(meta)
+                        if not urls:
+                            continue
+                        if want_any:
+                            out.append(meta)
+                            if len(out) >= fetch_limit:
+                                break
+                            continue
+
+                        if not needle:
+                            continue
+                        if any(needle in u.lower() for u in urls):
+                            out.append(meta)
+                            if len(out) >= fetch_limit:
+                                break
+
+                return out
+
+            query_lower = query.lower().strip()
+
+            # Special case: url:* and url:<value>
+            metadata_list: list[dict[str, Any]] | None = None
+            if ":" in query_lower and not query_lower.startswith(":"):
+                namespace, pattern = query_lower.split(":", 1)
+                namespace = namespace.strip().lower()
+                pattern = pattern.strip()
+                if namespace == "url":
+                    if not pattern or pattern == "*":
+                        metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
+                    else:
+                        # Fast-path: exact URL via /add_url/get_url_files when a full URL is provided.
+                        try:
+                            if pattern.startswith("http://") or pattern.startswith("https://"):
+                                from API.HydrusNetwork import HydrusRequestSpec
+
+                                spec = HydrusRequestSpec(method="GET", endpoint="/add_url/get_url_files", query={"url": pattern})
+                                response = client._perform_request(spec)  # type: ignore[attr-defined]
+                                hashes: list[str] = []
+                                file_ids: list[int] = []
+                                if isinstance(response, dict):
+                                    raw_hashes = response.get("hashes") or response.get("file_hashes")
+                                    if isinstance(raw_hashes, list):
+                                        hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
+                                    raw_ids = response.get("file_ids")
+                                    if isinstance(raw_ids, list):
+                                        for item in raw_ids:
+                                            try:
+                                                file_ids.append(int(item))
+                                            except (TypeError, ValueError):
+                                                continue
+
+                                if file_ids:
+                                    payload = client.fetch_file_metadata(
+                                        file_ids=file_ids,
+                                        include_file_url=True,
+                                        include_service_keys_to_tags=True,
+                                        include_duration=True,
+                                        include_size=True,
+                                        include_mime=True,
+                                    )
+                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                                    if isinstance(metas, list):
+                                        metadata_list = [m for m in metas if isinstance(m, dict)]
+                                elif hashes:
+                                    payload = client.fetch_file_metadata(
+                                        hashes=hashes,
+                                        include_file_url=True,
+                                        include_service_keys_to_tags=True,
+                                        include_duration=True,
+                                        include_size=True,
+                                        include_mime=True,
+                                    )
+                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                                    if isinstance(metas, list):
+                                        metadata_list = [m for m in metas if isinstance(m, dict)]
+                        except Exception:
+                            metadata_list = None
+
+                        # Fallback: substring scan
+                        if metadata_list is None:
+                            metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
+
            # Parse the query into tags
            # Handle both simple tags and complex queries
            # "*" means "match all" - use system:everything tag in Hydrus
@@ -271,7 +435,6 @@ class HydrusNetwork(Store):
                # Use system:everything to match all files in Hydrus
                tags = ["system:everything"]
            else:
-                query_lower = query.lower().strip()
                # If query doesn't have a namespace (no ':'), search all files and filter by title/tags
                # If query has explicit namespace, use it as a tag search
                if ':' not in query_lower:
@@ -286,30 +449,36 @@ class HydrusNetwork(Store):
                debug(f"Found 0 result(s)")
                return []
            
-            # Search files with the tags
-            search_result = client.search_files(
-                tags=tags,
-                return_hashes=True,
-                return_file_ids=True
-            )
-            
-            # Extract file IDs from search result
-            file_ids = search_result.get("file_ids", [])
-            hashes = search_result.get("hashes", [])
-            
-            if not file_ids and not hashes:
-                debug(f"Found 0 result(s)")
-                return []
-            
-            # Fetch metadata for the found files
+            # Search files with the tags (unless url: search already produced metadata)
            results = []
-            query_lower = query.lower().strip()
            # Split by comma or space for AND logic
            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
-            
-            if file_ids:
-                metadata = client.fetch_file_metadata(file_ids=file_ids)
-                metadata_list = metadata.get("metadata", [])
+
+            if metadata_list is None:
+                search_result = client.search_files(
+                    tags=tags,
+                    return_hashes=True,
+                    return_file_ids=True
+                )
+
+                file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
+                hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
+
+                if not file_ids and not hashes:
+                    debug(f"Found 0 result(s)")
+                    return []
+
+                if file_ids:
+                    metadata = client.fetch_file_metadata(file_ids=file_ids)
+                    metadata_list = metadata.get("metadata", [])
+                elif hashes:
+                    metadata = client.fetch_file_metadata(hashes=hashes)
+                    metadata_list = metadata.get("metadata", [])
+                else:
+                    metadata_list = []
+
+            if not isinstance(metadata_list, list):
+                metadata_list = []
                
                for meta in metadata_list:
                    if len(results) >= limit:
--- a/Store/registry.py
+++ b/Store/registry.py
@@ -119,6 +119,37 @@ class Store:
        self._backend_errors: Dict[str, str] = {}
        self._load_backends()

+    def _maybe_register_temp_alias(self, store_type: str, backend_name: str, kwargs: Dict[str, Any], backend: BaseStore) -> None:
+        """If a folder backend points at config['temp'], also expose it as the 'temp' backend.
+
+        This keeps config compatibility (e.g. existing 'default') while presenting the temp
+        directory under a clearer name.
+        """
+        try:
+            if _normalize_store_type(store_type) != "folder":
+                return
+            temp_value = self._config.get("temp")
+            if not temp_value:
+                return
+            path_value = kwargs.get("PATH") or kwargs.get("path")
+            if not path_value:
+                return
+
+            temp_path = Path(str(temp_value)).expanduser().resolve()
+            backend_path = Path(str(path_value)).expanduser().resolve()
+            if backend_path != temp_path:
+                return
+
+            # If the user already has a dedicated temp backend, do nothing.
+            if "temp" in self._backends:
+                return
+
+            # Keep original name working, but add an alias.
+            if backend_name != "temp":
+                self._backends["temp"] = backend
+        except Exception:
+            return
+
    def _load_backends(self) -> None:
        store_cfg = self._config.get("store")
        if not isinstance(store_cfg, dict):
@@ -161,6 +192,9 @@ class Store:

                    backend_name = str(kwargs.get("NAME") or instance_name)
                    self._backends[backend_name] = backend
+
+                    # If this is the configured temp directory, also alias it as 'temp'.
+                    self._maybe_register_temp_alias(store_type, backend_name, kwargs, backend)
                except Exception as exc:
                    err_text = str(exc)
                    self._backend_errors[str(instance_name)] = err_text
@@ -177,11 +211,24 @@ class Store:
        return sorted(self._backends.keys())

    def list_searchable_backends(self) -> list[str]:
-        searchable: list[str] = []
+        # De-duplicate backends by instance (aliases can point at the same object).
+        def _rank(name: str) -> int:
+            n = str(name or "").strip().lower()
+            if n == "temp":
+                return 0
+            if n == "default":
+                return 2
+            return 1
+
+        chosen: Dict[int, str] = {}
        for name, backend in self._backends.items():
-            if type(backend).search is not BaseStore.search:
-                searchable.append(name)
-        return sorted(searchable)
+            if type(backend).search is BaseStore.search:
+                continue
+            key = id(backend)
+            prev = chosen.get(key)
+            if prev is None or _rank(name) < _rank(prev):
+                chosen[key] = name
+        return sorted(chosen.values())

    def __getitem__(self, backend_name: str) -> BaseStore:
        if backend_name not in self._backends: