f
This commit is contained in:
@@ -1071,6 +1071,25 @@ class Folder(Store):
|
||||
if namespace == "url":
|
||||
pattern_hint = kwargs.get("pattern_hint")
|
||||
|
||||
def _pattern_candidates(raw: Any) -> List[str]:
|
||||
if raw is None:
|
||||
return []
|
||||
if isinstance(raw, (list, tuple, set)):
|
||||
out: List[str] = []
|
||||
for item in raw:
|
||||
text = str(item or "").strip()
|
||||
if text and text not in out:
|
||||
out.append(text)
|
||||
return out
|
||||
if isinstance(raw, str):
|
||||
text = raw.strip()
|
||||
return [text] if text else []
|
||||
return []
|
||||
|
||||
pattern_candidates = _pattern_candidates(pattern_hint)
|
||||
if len(pattern_candidates) > 200:
|
||||
pattern_candidates = pattern_candidates[:200]
|
||||
|
||||
def _parse_url_value(raw: Any) -> list[str]:
|
||||
if raw is None:
|
||||
return []
|
||||
@@ -1094,16 +1113,26 @@ class Folder(Store):
|
||||
return []
|
||||
|
||||
def _matches_pattern(url_list: list[str]) -> bool:
|
||||
if not pattern_hint:
|
||||
if not pattern_candidates:
|
||||
return True
|
||||
for candidate_url in url_list:
|
||||
if _match_url_pattern(candidate_url, pattern_hint):
|
||||
return True
|
||||
for pat in pattern_candidates:
|
||||
if _match_url_pattern(candidate_url, pat):
|
||||
return True
|
||||
return False
|
||||
|
||||
if not pattern or pattern == "*":
|
||||
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
|
||||
rows = api.get_files_with_any_url(limit)
|
||||
if pattern_candidates:
|
||||
debug(
|
||||
f"[folder:{backend_label}] url search: any-url (limit={limit}) pattern_hint={len(pattern_candidates)}"
|
||||
)
|
||||
rows = api.get_files_by_url_like_any(
|
||||
[_url_like_pattern(p) for p in pattern_candidates],
|
||||
limit,
|
||||
)
|
||||
else:
|
||||
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
|
||||
rows = api.get_files_with_any_url(limit)
|
||||
else:
|
||||
debug(
|
||||
f"[folder:{backend_label}] url search: like={pattern} (limit={limit})"
|
||||
|
||||
@@ -5,7 +5,7 @@ import sys
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -516,7 +516,8 @@ class HydrusNetwork(Store):
|
||||
url_value: str | None,
|
||||
want_any: bool,
|
||||
fetch_limit: int,
|
||||
scan_limit: int | None = None
|
||||
scan_limit: int | None = None,
|
||||
needles: Optional[Sequence[str]] = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
||||
|
||||
@@ -572,17 +573,29 @@ class HydrusNetwork(Store):
|
||||
if not candidate_file_ids and not candidate_hashes:
|
||||
return []
|
||||
|
||||
needle = (url_value or "").strip().lower()
|
||||
needle_list: list[str] = []
|
||||
if isinstance(needles, (list, tuple, set)):
|
||||
for item in needles:
|
||||
text = str(item or "").strip().lower()
|
||||
if text and text not in needle_list:
|
||||
needle_list.append(text)
|
||||
if not needle_list:
|
||||
needle = (url_value or "").strip().lower()
|
||||
if needle:
|
||||
needle_list = [needle]
|
||||
chunk_size = 200
|
||||
out: list[dict[str, Any]] = []
|
||||
if scan_limit is None:
|
||||
try:
|
||||
if not want_any and url_value:
|
||||
scan_limit = max(200, min(int(fetch_limit), 400))
|
||||
if not want_any and needle_list:
|
||||
if len(needle_list) > 1:
|
||||
scan_limit = max(int(fetch_limit) * 20, 2000)
|
||||
else:
|
||||
scan_limit = max(200, min(int(fetch_limit), 400))
|
||||
else:
|
||||
scan_limit = max(int(fetch_limit) * 5, 1000)
|
||||
except Exception:
|
||||
scan_limit = 400 if (not want_any and url_value) else 1000
|
||||
scan_limit = 400 if (not want_any and needle_list) else 1000
|
||||
if scan_limit is not None:
|
||||
scan_limit = min(int(scan_limit), 10000)
|
||||
scanned = 0
|
||||
@@ -641,9 +654,9 @@ class HydrusNetwork(Store):
|
||||
if want_any:
|
||||
out.append(meta)
|
||||
continue
|
||||
if not needle:
|
||||
if not needle_list:
|
||||
continue
|
||||
if any(needle in u.lower() for u in urls):
|
||||
if any(any(n in u.lower() for n in needle_list) for u in urls):
|
||||
out.append(meta)
|
||||
continue
|
||||
|
||||
@@ -698,18 +711,37 @@ class HydrusNetwork(Store):
|
||||
|
||||
# Special case: url:* and url:<value>
|
||||
metadata_list: list[dict[str, Any]] | None = None
|
||||
pattern_hint = str(kwargs.get("pattern_hint") or "").strip().lower()
|
||||
pattern_hint_raw = kwargs.get("pattern_hint")
|
||||
pattern_hints: list[str] = []
|
||||
if isinstance(pattern_hint_raw, (list, tuple, set)):
|
||||
for item in pattern_hint_raw:
|
||||
text = str(item or "").strip().lower()
|
||||
if text and text not in pattern_hints:
|
||||
pattern_hints.append(text)
|
||||
elif isinstance(pattern_hint_raw, str):
|
||||
text = pattern_hint_raw.strip().lower()
|
||||
if text:
|
||||
pattern_hints.append(text)
|
||||
pattern_hint = pattern_hints[0] if pattern_hints else ""
|
||||
if ":" in query_lower and not query_lower.startswith(":"):
|
||||
namespace, pattern = query_lower.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip()
|
||||
if namespace == "url":
|
||||
if not pattern or pattern == "*":
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
None,
|
||||
want_any=True,
|
||||
fetch_limit=int(limit) if limit else 100
|
||||
)
|
||||
if pattern_hints:
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
None,
|
||||
want_any=False,
|
||||
fetch_limit=int(limit) if limit else 100,
|
||||
needles=pattern_hints,
|
||||
)
|
||||
else:
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
None,
|
||||
want_any=True,
|
||||
fetch_limit=int(limit) if limit else 100
|
||||
)
|
||||
else:
|
||||
def _clean_url_search_token(value: str | None) -> str:
|
||||
token = str(value or "").strip().lower()
|
||||
@@ -807,6 +839,7 @@ class HydrusNetwork(Store):
|
||||
want_any=False,
|
||||
fetch_limit=int(limit) if limit else 100,
|
||||
scan_limit=scan_limit_override,
|
||||
needles=pattern_hints if pattern_hints else None,
|
||||
)
|
||||
|
||||
# Parse the query into tags
|
||||
|
||||
Reference in New Issue
Block a user