This commit is contained in:
2026-01-17 21:32:44 -08:00
parent 193fa5aec3
commit 3f874af54a
4 changed files with 329 additions and 112 deletions

View File

@@ -1071,6 +1071,25 @@ class Folder(Store):
if namespace == "url":
pattern_hint = kwargs.get("pattern_hint")
def _pattern_candidates(raw: Any) -> List[str]:
if raw is None:
return []
if isinstance(raw, (list, tuple, set)):
out: List[str] = []
for item in raw:
text = str(item or "").strip()
if text and text not in out:
out.append(text)
return out
if isinstance(raw, str):
text = raw.strip()
return [text] if text else []
return []
pattern_candidates = _pattern_candidates(pattern_hint)
if len(pattern_candidates) > 200:
pattern_candidates = pattern_candidates[:200]
def _parse_url_value(raw: Any) -> list[str]:
if raw is None:
return []
@@ -1094,16 +1113,26 @@ class Folder(Store):
return []
def _matches_pattern(url_list: list[str]) -> bool:
if not pattern_hint:
if not pattern_candidates:
return True
for candidate_url in url_list:
if _match_url_pattern(candidate_url, pattern_hint):
return True
for pat in pattern_candidates:
if _match_url_pattern(candidate_url, pat):
return True
return False
if not pattern or pattern == "*":
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
rows = api.get_files_with_any_url(limit)
if pattern_candidates:
debug(
f"[folder:{backend_label}] url search: any-url (limit={limit}) pattern_hint={len(pattern_candidates)}"
)
rows = api.get_files_by_url_like_any(
[_url_like_pattern(p) for p in pattern_candidates],
limit,
)
else:
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
rows = api.get_files_with_any_url(limit)
else:
debug(
f"[folder:{backend_label}] url search: like={pattern} (limit={limit})"

View File

@@ -5,7 +5,7 @@ import sys
import tempfile
import shutil
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Sequence, Tuple
from urllib.parse import quote
@@ -516,7 +516,8 @@ class HydrusNetwork(Store):
url_value: str | None,
want_any: bool,
fetch_limit: int,
scan_limit: int | None = None
scan_limit: int | None = None,
needles: Optional[Sequence[str]] = None,
) -> list[dict[str, Any]]:
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
@@ -572,17 +573,29 @@ class HydrusNetwork(Store):
if not candidate_file_ids and not candidate_hashes:
return []
needle = (url_value or "").strip().lower()
needle_list: list[str] = []
if isinstance(needles, (list, tuple, set)):
for item in needles:
text = str(item or "").strip().lower()
if text and text not in needle_list:
needle_list.append(text)
if not needle_list:
needle = (url_value or "").strip().lower()
if needle:
needle_list = [needle]
chunk_size = 200
out: list[dict[str, Any]] = []
if scan_limit is None:
try:
if not want_any and url_value:
scan_limit = max(200, min(int(fetch_limit), 400))
if not want_any and needle_list:
if len(needle_list) > 1:
scan_limit = max(int(fetch_limit) * 20, 2000)
else:
scan_limit = max(200, min(int(fetch_limit), 400))
else:
scan_limit = max(int(fetch_limit) * 5, 1000)
except Exception:
scan_limit = 400 if (not want_any and url_value) else 1000
scan_limit = 400 if (not want_any and needle_list) else 1000
if scan_limit is not None:
scan_limit = min(int(scan_limit), 10000)
scanned = 0
@@ -641,9 +654,9 @@ class HydrusNetwork(Store):
if want_any:
out.append(meta)
continue
if not needle:
if not needle_list:
continue
if any(needle in u.lower() for u in urls):
if any(any(n in u.lower() for n in needle_list) for u in urls):
out.append(meta)
continue
@@ -698,18 +711,37 @@ class HydrusNetwork(Store):
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
pattern_hint = str(kwargs.get("pattern_hint") or "").strip().lower()
pattern_hint_raw = kwargs.get("pattern_hint")
pattern_hints: list[str] = []
if isinstance(pattern_hint_raw, (list, tuple, set)):
for item in pattern_hint_raw:
text = str(item or "").strip().lower()
if text and text not in pattern_hints:
pattern_hints.append(text)
elif isinstance(pattern_hint_raw, str):
text = pattern_hint_raw.strip().lower()
if text:
pattern_hints.append(text)
pattern_hint = pattern_hints[0] if pattern_hints else ""
if ":" in query_lower and not query_lower.startswith(":"):
namespace, pattern = query_lower.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
metadata_list = _iter_url_filtered_metadata(
None,
want_any=True,
fetch_limit=int(limit) if limit else 100
)
if pattern_hints:
metadata_list = _iter_url_filtered_metadata(
None,
want_any=False,
fetch_limit=int(limit) if limit else 100,
needles=pattern_hints,
)
else:
metadata_list = _iter_url_filtered_metadata(
None,
want_any=True,
fetch_limit=int(limit) if limit else 100
)
else:
def _clean_url_search_token(value: str | None) -> str:
token = str(value or "").strip().lower()
@@ -807,6 +839,7 @@ class HydrusNetwork(Store):
want_any=False,
fetch_limit=int(limit) if limit else 100,
scan_limit=scan_limit_override,
needles=pattern_hints if pattern_hints else None,
)
# Parse the query into tags