h
This commit is contained in:
@@ -5,7 +5,7 @@ import sys
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
||||
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -455,6 +455,7 @@ class HydrusNetwork(Store):
|
||||
"""
|
||||
limit = kwargs.get("limit", 100)
|
||||
minimal = bool(kwargs.get("minimal", False))
|
||||
url_only = bool(kwargs.get("url_only", False))
|
||||
|
||||
try:
|
||||
client = self._client
|
||||
@@ -676,6 +677,86 @@ class HydrusNetwork(Store):
|
||||
|
||||
return out
|
||||
|
||||
def _search_url_query_metadata(
|
||||
url_query: str,
|
||||
fetch_limit: int,
|
||||
*,
|
||||
minimal: bool = False,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Run a strict url:<pattern> search without falling back to system predicates."""
|
||||
|
||||
if not url_query:
|
||||
return []
|
||||
|
||||
try:
|
||||
payload = client.search_files(
|
||||
tags=[url_query],
|
||||
return_hashes=True,
|
||||
return_file_ids=True,
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
candidate_ids, candidate_hashes = _extract_search_ids(payload)
|
||||
if not candidate_ids and not candidate_hashes:
|
||||
return []
|
||||
|
||||
metas_out: list[dict[str, Any]] = []
|
||||
chunk_size = 200
|
||||
|
||||
def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
|
||||
nonlocal metas_out
|
||||
if not values or len(metas_out) >= fetch_limit:
|
||||
return
|
||||
for start in range(0, len(values), chunk_size):
|
||||
if len(metas_out) >= fetch_limit:
|
||||
break
|
||||
remaining = fetch_limit - len(metas_out)
|
||||
if remaining <= 0:
|
||||
break
|
||||
end = start + min(chunk_size, remaining)
|
||||
chunk = values[start:end]
|
||||
if not chunk:
|
||||
continue
|
||||
try:
|
||||
if kind == "file_ids":
|
||||
metadata = client.fetch_file_metadata(
|
||||
file_ids=chunk,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=False,
|
||||
include_duration=False,
|
||||
include_size=not minimal,
|
||||
include_mime=False,
|
||||
)
|
||||
else:
|
||||
metadata = client.fetch_file_metadata(
|
||||
hashes=chunk,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=False,
|
||||
include_duration=False,
|
||||
include_size=not minimal,
|
||||
include_mime=False,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
|
||||
if not isinstance(fetched, list):
|
||||
continue
|
||||
for meta in fetched:
|
||||
if len(metas_out) >= fetch_limit:
|
||||
break
|
||||
if not isinstance(meta, dict):
|
||||
continue
|
||||
metas_out.append(meta)
|
||||
|
||||
if candidate_ids:
|
||||
_fetch_chunk("file_ids", candidate_ids)
|
||||
if len(metas_out) < fetch_limit and candidate_hashes:
|
||||
_fetch_chunk("hashes", candidate_hashes)
|
||||
|
||||
return metas_out[:fetch_limit]
|
||||
|
||||
query_lower = query.lower().strip()
|
||||
|
||||
# Support `ext:<value>` anywhere in the query. We filter results by the
|
||||
@@ -735,12 +816,23 @@ class HydrusNetwork(Store):
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip()
|
||||
if namespace == "url":
|
||||
try:
|
||||
fetch_limit_raw = int(limit) if limit else 100
|
||||
except Exception:
|
||||
fetch_limit_raw = 100
|
||||
if url_only:
|
||||
metadata_list = _search_url_query_metadata(
|
||||
query_lower,
|
||||
fetch_limit_raw,
|
||||
minimal=minimal,
|
||||
)
|
||||
else:
|
||||
if not pattern or pattern == "*":
|
||||
if pattern_hints:
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
None,
|
||||
want_any=False,
|
||||
fetch_limit=int(limit) if limit else 100,
|
||||
fetch_limit=fetch_limit_raw,
|
||||
needles=pattern_hints,
|
||||
minimal=minimal,
|
||||
)
|
||||
@@ -748,7 +840,7 @@ class HydrusNetwork(Store):
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
None,
|
||||
want_any=True,
|
||||
fetch_limit=int(limit) if limit else 100,
|
||||
fetch_limit=fetch_limit_raw,
|
||||
minimal=minimal,
|
||||
)
|
||||
else:
|
||||
@@ -840,13 +932,13 @@ class HydrusNetwork(Store):
|
||||
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
||||
if is_domain_only:
|
||||
try:
|
||||
scan_limit_override = max(int(limit or 100) * 20, 2000)
|
||||
scan_limit_override = max(fetch_limit_raw * 20, 2000)
|
||||
except Exception:
|
||||
scan_limit_override = 2000
|
||||
metadata_list = _iter_url_filtered_metadata(
|
||||
search_token,
|
||||
want_any=False,
|
||||
fetch_limit=int(limit) if limit else 100,
|
||||
fetch_limit=fetch_limit_raw,
|
||||
scan_limit=scan_limit_override,
|
||||
needles=pattern_hints if pattern_hints else None,
|
||||
minimal=minimal,
|
||||
|
||||
@@ -366,6 +366,7 @@ class Get_Url(Cmdlet):
|
||||
store_name,
|
||||
pattern_hint=target_pattern,
|
||||
minimal=True,
|
||||
url_only=True,
|
||||
)
|
||||
if search_results is None:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user