huy

2026-03-18 12:24:37 -07:00
parent b0e89ff950
commit 7c526784a8
6 changed files with 729 additions and 245 deletions
@@ -684,6 +684,30 @@ class HydrusNetwork(Store):
                            continue
                return ids_out, hashes_out

+            def _fetch_search_metadata(
+                *,
+                file_ids: Optional[Sequence[Any]] = None,
+                hashes: Optional[Sequence[Any]] = None,
+                include_tags: bool = True,
+                include_urls: bool = True,
+                include_mime: bool = True,
+            ) -> list[dict[str, Any]]:
+                try:
+                    payload = client.fetch_file_metadata(
+                        file_ids=file_ids,
+                        hashes=hashes,
+                        include_service_keys_to_tags=include_tags,
+                        include_file_url=include_urls,
+                        include_duration=False,
+                        include_size=True,
+                        include_mime=include_mime,
+                    )
+                except Exception:
+                    return []
+
+                metadata = payload.get("metadata", []) if isinstance(payload, dict) else []
+                return metadata if isinstance(metadata, list) else []
+
            def _iter_url_filtered_metadata(
                url_value: str | None,
                want_any: bool,
@@ -927,6 +951,55 @@ class HydrusNetwork(Store):

                return metas_out[:fetch_limit]

+            def _cap_metadata_candidates(
+                file_ids_in: list[int],
+                hashes_in: list[str],
+                *,
+                requested_limit: Any,
+                freeform_mode: bool = False,
+                fallback_scan: bool = False,
+            ) -> tuple[list[int], list[str]]:
+                """Cap metadata hydration to a sane subset of Hydrus hits.
+
+                Hydrus native tag search is fast, but fetching metadata for every
+                matched file can explode for broad queries. Keep the native search,
+                but only hydrate a bounded working set and let downstream filtering
+                stop once enough display rows are collected.
+                """
+
+                try:
+                    base_limit = int(requested_limit or 100)
+                except Exception:
+                    base_limit = 100
+                if base_limit <= 0:
+                    base_limit = 100
+
+                hydrate_limit = base_limit
+                if freeform_mode:
+                    hydrate_limit = max(hydrate_limit * 4, 200)
+                if fallback_scan:
+                    hydrate_limit = max(hydrate_limit * 2, 200)
+                hydrate_limit = min(hydrate_limit, 1000)
+
+                ids_out = list(file_ids_in or [])
+                hashes_out = list(hashes_in or [])
+                total_candidates = len(ids_out) + len(hashes_out)
+                if total_candidates <= hydrate_limit:
+                    return ids_out, hashes_out
+
+                debug(
+                    f"{prefix} limiting metadata hydration to {hydrate_limit} of {total_candidates} candidate(s)"
+                )
+
+                if ids_out:
+                    ids_out = ids_out[:hydrate_limit]
+                    remaining = max(0, hydrate_limit - len(ids_out))
+                    hashes_out = hashes_out[:remaining] if remaining > 0 else []
+                else:
+                    hashes_out = hashes_out[:hydrate_limit]
+
+                return ids_out, hashes_out
+
            query_lower = query.lower().strip()

            # Support `ext:<value>` anywhere in the query. We filter results by the
@@ -1172,7 +1245,7 @@ class HydrusNetwork(Store):
                        payloads.append(
                            client.search_files(
                                tags=title_predicates,
-                                return_hashes=True,
+                                return_hashes=False,
                                return_file_ids=True,
                            )
                        )
@@ -1187,7 +1260,7 @@ class HydrusNetwork(Store):
                                payloads.append(
                                    client.search_files(
                                        tags=[f"title:{query_lower}*"],
-                                        return_hashes=True,
+                                        return_hashes=False,
                                        return_file_ids=True,
                                    )
                                )
@@ -1198,7 +1271,7 @@ class HydrusNetwork(Store):
                        payloads.append(
                            client.search_files(
                                tags=freeform_predicates,
-                                return_hashes=True,
+                                return_hashes=False,
                                return_file_ids=True,
                            )
                        )
@@ -1206,15 +1279,12 @@ class HydrusNetwork(Store):
                        pass

                    id_set: set[int] = set()
-                    hash_set: set[str] = set()
                    for payload in payloads:
-                        ids_part, hashes_part = _extract_search_ids(payload)
+                        ids_part, _ = _extract_search_ids(payload)
                        for fid in ids_part:
                            id_set.add(fid)
-                        for hh in hashes_part:
-                            hash_set.add(hh)
                    file_ids = list(id_set)
-                    hashes = list(hash_set)
+                    hashes = []
                else:
                    if not tags:
                        debug(f"{prefix} 0 result(s)")
@@ -1222,10 +1292,11 @@ class HydrusNetwork(Store):

                    search_result = client.search_files(
                        tags=tags,
-                        return_hashes=True,
+                        return_hashes=False,
                        return_file_ids=True
                    )
-                    file_ids, hashes = _extract_search_ids(search_result)
+                    file_ids, _ = _extract_search_ids(search_result)
+                    hashes = []

                # Fast path: ext-only search. Avoid fetching metadata for an unbounded
                # system:everything result set; fetch in chunks until we have enough.
@@ -1242,21 +1313,13 @@ class HydrusNetwork(Store):
                            if len(results) >= limit:
                                break
                            chunk = file_ids[start:start + chunk_size]
-                            try:
-                                payload = client.fetch_file_metadata(
-                                    file_ids=chunk,
-                                    include_service_keys_to_tags=True,
-                                    include_file_url=True,
-                                    include_duration=True,
-                                    include_size=True,
-                                    include_mime=True,
-                                )
-                            except Exception:
-                                continue
-                            metas = payload.get("metadata",
-                                                []) if isinstance(payload,
-                                                                  dict) else []
-                            if not isinstance(metas, list):
+                            metas = _fetch_search_metadata(
+                                file_ids=chunk,
+                                include_tags=True,
+                                include_urls=True,
+                                include_mime=True,
+                            )
+                            if not metas:
                                continue
                            for meta in metas:
                                if len(results) >= limit:
@@ -1312,26 +1375,27 @@ class HydrusNetwork(Store):
                    debug(f"{prefix} 0 result(s)")
                    return []

+                file_ids, hashes = _cap_metadata_candidates(
+                    file_ids,
+                    hashes,
+                    requested_limit=limit,
+                    freeform_mode=freeform_union_search,
+                )
+
                if file_ids:
-                    metadata = client.fetch_file_metadata(
+                    metadata_list = _fetch_search_metadata(
                        file_ids=file_ids,
-                        include_service_keys_to_tags=True,
-                        include_file_url=True,
-                        include_duration=True,
-                        include_size=True,
+                        include_tags=True,
+                        include_urls=True,
                        include_mime=True,
                    )
-                    metadata_list = metadata.get("metadata", [])
                elif hashes:
-                    metadata = client.fetch_file_metadata(
+                    metadata_list = _fetch_search_metadata(
                        hashes=hashes,
-                        include_service_keys_to_tags=True,
-                        include_file_url=True,
-                        include_duration=True,
-                        include_size=True,
+                        include_tags=True,
+                        include_urls=True,
                        include_mime=True,
                    )
-                    metadata_list = metadata.get("metadata", [])
                else:
                    metadata_list = []

@@ -1341,31 +1405,34 @@ class HydrusNetwork(Store):
                    try:
                        search_result = client.search_files(
                            tags=["system:everything"],
-                            return_hashes=True,
+                            return_hashes=False,
                            return_file_ids=True,
                        )
-                        file_ids, hashes = _extract_search_ids(search_result)
+                        file_ids, _ = _extract_search_ids(search_result)
+                        hashes = []
+
+                        file_ids, hashes = _cap_metadata_candidates(
+                            file_ids,
+                            hashes,
+                            requested_limit=limit,
+                            freeform_mode=True,
+                            fallback_scan=True,
+                        )

                        if file_ids:
-                            metadata = client.fetch_file_metadata(
+                            metadata_list = _fetch_search_metadata(
                                file_ids=file_ids,
-                                include_service_keys_to_tags=True,
-                                include_file_url=True,
-                                include_duration=True,
-                                include_size=True,
+                                include_tags=True,
+                                include_urls=True,
                                include_mime=True,
                            )
-                            metadata_list = metadata.get("metadata", [])
                        elif hashes:
-                            metadata = client.fetch_file_metadata(
+                            metadata_list = _fetch_search_metadata(
                                hashes=hashes,
-                                include_service_keys_to_tags=True,
-                                include_file_url=True,
-                                include_duration=True,
-                                include_size=True,
+                                include_tags=True,
+                                include_urls=True,
                                include_mime=True,
                            )
-                            metadata_list = metadata.get("metadata", [])
                    except Exception:
                        pass