dfdfsdd

2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions
--- a/Store/HydrusNetwork.py
+++ b/Store/HydrusNetwork.py
@@ -264,6 +264,170 @@ class HydrusNetwork(Store):

            debug(f"Searching Hydrus for: {query}")

+            def _extract_urls(meta_obj: Any) -> list[str]:
+                if not isinstance(meta_obj, dict):
+                    return []
+                raw = meta_obj.get("url")
+                if raw is None:
+                    raw = meta_obj.get("urls")
+                if isinstance(raw, str):
+                    val = raw.strip()
+                    return [val] if val else []
+                if isinstance(raw, list):
+                    out: list[str] = []
+                    for item in raw:
+                        if not isinstance(item, str):
+                            continue
+                        s = item.strip()
+                        if s:
+                            out.append(s)
+                    return out
+                return []
+
+            def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
+                """Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
+
+                # First try a fast system predicate if Hydrus supports it.
+                candidate_file_ids: list[int] = []
+                try:
+                    if want_any:
+                        predicate = "system:has url"
+                        url_search = client.search_files(
+                            tags=[predicate],
+                            return_hashes=False,
+                            return_file_ids=True,
+                            return_file_count=False,
+                        )
+                        ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
+                        if isinstance(ids, list):
+                            candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
+                except Exception:
+                    candidate_file_ids = []
+
+                if not candidate_file_ids:
+                    # Fallback: scan from system:everything and filter by URL substring.
+                    everything = client.search_files(
+                        tags=["system:everything"],
+                        return_hashes=False,
+                        return_file_ids=True,
+                        return_file_count=False,
+                    )
+                    ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
+                    if isinstance(ids, list):
+                        candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
+
+                if not candidate_file_ids:
+                    return []
+
+                needle = (url_value or "").strip().lower()
+                chunk_size = 200
+                out: list[dict[str, Any]] = []
+
+                for start in range(0, len(candidate_file_ids), chunk_size):
+                    if len(out) >= fetch_limit:
+                        break
+                    chunk = candidate_file_ids[start : start + chunk_size]
+                    try:
+                        payload = client.fetch_file_metadata(
+                            file_ids=chunk,
+                            include_file_url=True,
+                            include_service_keys_to_tags=True,
+                            include_duration=True,
+                            include_size=True,
+                            include_mime=True,
+                        )
+                    except Exception:
+                        continue
+
+                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                    if not isinstance(metas, list):
+                        continue
+
+                    for meta in metas:
+                        if not isinstance(meta, dict):
+                            continue
+                        urls = _extract_urls(meta)
+                        if not urls:
+                            continue
+                        if want_any:
+                            out.append(meta)
+                            if len(out) >= fetch_limit:
+                                break
+                            continue
+
+                        if not needle:
+                            continue
+                        if any(needle in u.lower() for u in urls):
+                            out.append(meta)
+                            if len(out) >= fetch_limit:
+                                break
+
+                return out
+
+            query_lower = query.lower().strip()
+
+            # Special case: url:* and url:<value>
+            metadata_list: list[dict[str, Any]] | None = None
+            if ":" in query_lower and not query_lower.startswith(":"):
+                namespace, pattern = query_lower.split(":", 1)
+                namespace = namespace.strip().lower()
+                pattern = pattern.strip()
+                if namespace == "url":
+                    if not pattern or pattern == "*":
+                        metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
+                    else:
+                        # Fast-path: exact URL via /add_url/get_url_files when a full URL is provided.
+                        try:
+                            if pattern.startswith("http://") or pattern.startswith("https://"):
+                                from API.HydrusNetwork import HydrusRequestSpec
+
+                                spec = HydrusRequestSpec(method="GET", endpoint="/add_url/get_url_files", query={"url": pattern})
+                                response = client._perform_request(spec)  # type: ignore[attr-defined]
+                                hashes: list[str] = []
+                                file_ids: list[int] = []
+                                if isinstance(response, dict):
+                                    raw_hashes = response.get("hashes") or response.get("file_hashes")
+                                    if isinstance(raw_hashes, list):
+                                        hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
+                                    raw_ids = response.get("file_ids")
+                                    if isinstance(raw_ids, list):
+                                        for item in raw_ids:
+                                            try:
+                                                file_ids.append(int(item))
+                                            except (TypeError, ValueError):
+                                                continue
+
+                                if file_ids:
+                                    payload = client.fetch_file_metadata(
+                                        file_ids=file_ids,
+                                        include_file_url=True,
+                                        include_service_keys_to_tags=True,
+                                        include_duration=True,
+                                        include_size=True,
+                                        include_mime=True,
+                                    )
+                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                                    if isinstance(metas, list):
+                                        metadata_list = [m for m in metas if isinstance(m, dict)]
+                                elif hashes:
+                                    payload = client.fetch_file_metadata(
+                                        hashes=hashes,
+                                        include_file_url=True,
+                                        include_service_keys_to_tags=True,
+                                        include_duration=True,
+                                        include_size=True,
+                                        include_mime=True,
+                                    )
+                                    metas = payload.get("metadata", []) if isinstance(payload, dict) else []
+                                    if isinstance(metas, list):
+                                        metadata_list = [m for m in metas if isinstance(m, dict)]
+                        except Exception:
+                            metadata_list = None
+
+                        # Fallback: substring scan
+                        if metadata_list is None:
+                            metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
+
            # Parse the query into tags
            # Handle both simple tags and complex queries
            # "*" means "match all" - use system:everything tag in Hydrus
@@ -271,7 +435,6 @@ class HydrusNetwork(Store):
                # Use system:everything to match all files in Hydrus
                tags = ["system:everything"]
            else:
-                query_lower = query.lower().strip()
                # If query doesn't have a namespace (no ':'), search all files and filter by title/tags
                # If query has explicit namespace, use it as a tag search
                if ':' not in query_lower:
@@ -286,30 +449,36 @@ class HydrusNetwork(Store):
                debug(f"Found 0 result(s)")
                return []
            
-            # Search files with the tags
-            search_result = client.search_files(
-                tags=tags,
-                return_hashes=True,
-                return_file_ids=True
-            )
-            
-            # Extract file IDs from search result
-            file_ids = search_result.get("file_ids", [])
-            hashes = search_result.get("hashes", [])
-            
-            if not file_ids and not hashes:
-                debug(f"Found 0 result(s)")
-                return []
-            
-            # Fetch metadata for the found files
+            # Search files with the tags (unless url: search already produced metadata)
            results = []
-            query_lower = query.lower().strip()
            # Split by comma or space for AND logic
            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
-            
-            if file_ids:
-                metadata = client.fetch_file_metadata(file_ids=file_ids)
-                metadata_list = metadata.get("metadata", [])
+
+            if metadata_list is None:
+                search_result = client.search_files(
+                    tags=tags,
+                    return_hashes=True,
+                    return_file_ids=True
+                )
+
+                file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
+                hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
+
+                if not file_ids and not hashes:
+                    debug(f"Found 0 result(s)")
+                    return []
+
+                if file_ids:
+                    metadata = client.fetch_file_metadata(file_ids=file_ids)
+                    metadata_list = metadata.get("metadata", [])
+                elif hashes:
+                    metadata = client.fetch_file_metadata(hashes=hashes)
+                    metadata_list = metadata.get("metadata", [])
+                else:
+                    metadata_list = []
+
+            if not isinstance(metadata_list, list):
+                metadata_list = []
                
                for meta in metadata_list:
                    if len(results) >= limit: