This commit is contained in:
2026-03-18 12:24:37 -07:00
parent b0e89ff950
commit 7c526784a8
6 changed files with 729 additions and 245 deletions

View File

@@ -684,6 +684,30 @@ class HydrusNetwork(Store):
continue
return ids_out, hashes_out
def _fetch_search_metadata(
*,
file_ids: Optional[Sequence[Any]] = None,
hashes: Optional[Sequence[Any]] = None,
include_tags: bool = True,
include_urls: bool = True,
include_mime: bool = True,
) -> list[dict[str, Any]]:
try:
payload = client.fetch_file_metadata(
file_ids=file_ids,
hashes=hashes,
include_service_keys_to_tags=include_tags,
include_file_url=include_urls,
include_duration=False,
include_size=True,
include_mime=include_mime,
)
except Exception:
return []
metadata = payload.get("metadata", []) if isinstance(payload, dict) else []
return metadata if isinstance(metadata, list) else []
def _iter_url_filtered_metadata(
url_value: str | None,
want_any: bool,
@@ -927,6 +951,55 @@ class HydrusNetwork(Store):
return metas_out[:fetch_limit]
def _cap_metadata_candidates(
file_ids_in: list[int],
hashes_in: list[str],
*,
requested_limit: Any,
freeform_mode: bool = False,
fallback_scan: bool = False,
) -> tuple[list[int], list[str]]:
"""Cap metadata hydration to a sane subset of Hydrus hits.
Hydrus native tag search is fast, but fetching metadata for every
matched file can explode for broad queries. Keep the native search,
but only hydrate a bounded working set and let downstream filtering
stop once enough display rows are collected.
"""
try:
base_limit = int(requested_limit or 100)
except Exception:
base_limit = 100
if base_limit <= 0:
base_limit = 100
hydrate_limit = base_limit
if freeform_mode:
hydrate_limit = max(hydrate_limit * 4, 200)
if fallback_scan:
hydrate_limit = max(hydrate_limit * 2, 200)
hydrate_limit = min(hydrate_limit, 1000)
ids_out = list(file_ids_in or [])
hashes_out = list(hashes_in or [])
total_candidates = len(ids_out) + len(hashes_out)
if total_candidates <= hydrate_limit:
return ids_out, hashes_out
debug(
f"{prefix} limiting metadata hydration to {hydrate_limit} of {total_candidates} candidate(s)"
)
if ids_out:
ids_out = ids_out[:hydrate_limit]
remaining = max(0, hydrate_limit - len(ids_out))
hashes_out = hashes_out[:remaining] if remaining > 0 else []
else:
hashes_out = hashes_out[:hydrate_limit]
return ids_out, hashes_out
query_lower = query.lower().strip()
# Support `ext:<value>` anywhere in the query. We filter results by the
@@ -1172,7 +1245,7 @@ class HydrusNetwork(Store):
payloads.append(
client.search_files(
tags=title_predicates,
return_hashes=True,
return_hashes=False,
return_file_ids=True,
)
)
@@ -1187,7 +1260,7 @@ class HydrusNetwork(Store):
payloads.append(
client.search_files(
tags=[f"title:{query_lower}*"],
return_hashes=True,
return_hashes=False,
return_file_ids=True,
)
)
@@ -1198,7 +1271,7 @@ class HydrusNetwork(Store):
payloads.append(
client.search_files(
tags=freeform_predicates,
return_hashes=True,
return_hashes=False,
return_file_ids=True,
)
)
@@ -1206,15 +1279,12 @@ class HydrusNetwork(Store):
pass
id_set: set[int] = set()
hash_set: set[str] = set()
for payload in payloads:
ids_part, hashes_part = _extract_search_ids(payload)
ids_part, _ = _extract_search_ids(payload)
for fid in ids_part:
id_set.add(fid)
for hh in hashes_part:
hash_set.add(hh)
file_ids = list(id_set)
hashes = list(hash_set)
hashes = []
else:
if not tags:
debug(f"{prefix} 0 result(s)")
@@ -1222,10 +1292,11 @@ class HydrusNetwork(Store):
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_hashes=False,
return_file_ids=True
)
file_ids, hashes = _extract_search_ids(search_result)
file_ids, _ = _extract_search_ids(search_result)
hashes = []
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
# system:everything result set; fetch in chunks until we have enough.
@@ -1242,21 +1313,13 @@ class HydrusNetwork(Store):
if len(results) >= limit:
break
chunk = file_ids[start:start + chunk_size]
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata",
[]) if isinstance(payload,
dict) else []
if not isinstance(metas, list):
metas = _fetch_search_metadata(
file_ids=chunk,
include_tags=True,
include_urls=True,
include_mime=True,
)
if not metas:
continue
for meta in metas:
if len(results) >= limit:
@@ -1312,26 +1375,27 @@ class HydrusNetwork(Store):
debug(f"{prefix} 0 result(s)")
return []
file_ids, hashes = _cap_metadata_candidates(
file_ids,
hashes,
requested_limit=limit,
freeform_mode=freeform_union_search,
)
if file_ids:
metadata = client.fetch_file_metadata(
metadata_list = _fetch_search_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_tags=True,
include_urls=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
metadata_list = _fetch_search_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_tags=True,
include_urls=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
@@ -1341,31 +1405,34 @@ class HydrusNetwork(Store):
try:
search_result = client.search_files(
tags=["system:everything"],
return_hashes=True,
return_hashes=False,
return_file_ids=True,
)
file_ids, hashes = _extract_search_ids(search_result)
file_ids, _ = _extract_search_ids(search_result)
hashes = []
file_ids, hashes = _cap_metadata_candidates(
file_ids,
hashes,
requested_limit=limit,
freeform_mode=True,
fallback_scan=True,
)
if file_ids:
metadata = client.fetch_file_metadata(
metadata_list = _fetch_search_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_tags=True,
include_urls=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
metadata_list = _fetch_search_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_tags=True,
include_urls=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
except Exception:
pass