h
This commit is contained in:
@@ -5,7 +5,7 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
@@ -455,6 +455,7 @@ class HydrusNetwork(Store):
|
|||||||
"""
|
"""
|
||||||
limit = kwargs.get("limit", 100)
|
limit = kwargs.get("limit", 100)
|
||||||
minimal = bool(kwargs.get("minimal", False))
|
minimal = bool(kwargs.get("minimal", False))
|
||||||
|
url_only = bool(kwargs.get("url_only", False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = self._client
|
client = self._client
|
||||||
@@ -676,6 +677,86 @@ class HydrusNetwork(Store):
|
|||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def _search_url_query_metadata(
|
||||||
|
url_query: str,
|
||||||
|
fetch_limit: int,
|
||||||
|
*,
|
||||||
|
minimal: bool = False,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Run a strict url:<pattern> search without falling back to system predicates."""
|
||||||
|
|
||||||
|
if not url_query:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = client.search_files(
|
||||||
|
tags=[url_query],
|
||||||
|
return_hashes=True,
|
||||||
|
return_file_ids=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
candidate_ids, candidate_hashes = _extract_search_ids(payload)
|
||||||
|
if not candidate_ids and not candidate_hashes:
|
||||||
|
return []
|
||||||
|
|
||||||
|
metas_out: list[dict[str, Any]] = []
|
||||||
|
chunk_size = 200
|
||||||
|
|
||||||
|
def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
|
||||||
|
nonlocal metas_out
|
||||||
|
if not values or len(metas_out) >= fetch_limit:
|
||||||
|
return
|
||||||
|
for start in range(0, len(values), chunk_size):
|
||||||
|
if len(metas_out) >= fetch_limit:
|
||||||
|
break
|
||||||
|
remaining = fetch_limit - len(metas_out)
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
end = start + min(chunk_size, remaining)
|
||||||
|
chunk = values[start:end]
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if kind == "file_ids":
|
||||||
|
metadata = client.fetch_file_metadata(
|
||||||
|
file_ids=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=False,
|
||||||
|
include_duration=False,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
metadata = client.fetch_file_metadata(
|
||||||
|
hashes=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=False,
|
||||||
|
include_duration=False,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=False,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
|
||||||
|
if not isinstance(fetched, list):
|
||||||
|
continue
|
||||||
|
for meta in fetched:
|
||||||
|
if len(metas_out) >= fetch_limit:
|
||||||
|
break
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
continue
|
||||||
|
metas_out.append(meta)
|
||||||
|
|
||||||
|
if candidate_ids:
|
||||||
|
_fetch_chunk("file_ids", candidate_ids)
|
||||||
|
if len(metas_out) < fetch_limit and candidate_hashes:
|
||||||
|
_fetch_chunk("hashes", candidate_hashes)
|
||||||
|
|
||||||
|
return metas_out[:fetch_limit]
|
||||||
|
|
||||||
query_lower = query.lower().strip()
|
query_lower = query.lower().strip()
|
||||||
|
|
||||||
# Support `ext:<value>` anywhere in the query. We filter results by the
|
# Support `ext:<value>` anywhere in the query. We filter results by the
|
||||||
@@ -735,12 +816,23 @@ class HydrusNetwork(Store):
|
|||||||
namespace = namespace.strip().lower()
|
namespace = namespace.strip().lower()
|
||||||
pattern = pattern.strip()
|
pattern = pattern.strip()
|
||||||
if namespace == "url":
|
if namespace == "url":
|
||||||
|
try:
|
||||||
|
fetch_limit_raw = int(limit) if limit else 100
|
||||||
|
except Exception:
|
||||||
|
fetch_limit_raw = 100
|
||||||
|
if url_only:
|
||||||
|
metadata_list = _search_url_query_metadata(
|
||||||
|
query_lower,
|
||||||
|
fetch_limit_raw,
|
||||||
|
minimal=minimal,
|
||||||
|
)
|
||||||
|
else:
|
||||||
if not pattern or pattern == "*":
|
if not pattern or pattern == "*":
|
||||||
if pattern_hints:
|
if pattern_hints:
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
None,
|
None,
|
||||||
want_any=False,
|
want_any=False,
|
||||||
fetch_limit=int(limit) if limit else 100,
|
fetch_limit=fetch_limit_raw,
|
||||||
needles=pattern_hints,
|
needles=pattern_hints,
|
||||||
minimal=minimal,
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
@@ -748,7 +840,7 @@ class HydrusNetwork(Store):
|
|||||||
metadata_list = _iter_url_filtered_metadata(
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
None,
|
None,
|
||||||
want_any=True,
|
want_any=True,
|
||||||
fetch_limit=int(limit) if limit else 100,
|
fetch_limit=fetch_limit_raw,
|
||||||
minimal=minimal,
|
minimal=minimal,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -840,13 +932,13 @@ class HydrusNetwork(Store):
|
|||||||
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
||||||
if is_domain_only:
|
if is_domain_only:
|
||||||
try:
|
try:
|
||||||
scan_limit_override = max(int(limit or 100) * 20, 2000)
|
scan_limit_override = max(fetch_limit_raw * 20, 2000)
|
||||||
except Exception:
|
except Exception:
|
||||||
scan_limit_override = 2000
|
scan_limit_override = 2000
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
search_token,
|
search_token,
|
||||||
want_any=False,
|
want_any=False,
|
||||||
fetch_limit=int(limit) if limit else 100,
|
fetch_limit=fetch_limit_raw,
|
||||||
scan_limit=scan_limit_override,
|
scan_limit=scan_limit_override,
|
||||||
needles=pattern_hints if pattern_hints else None,
|
needles=pattern_hints if pattern_hints else None,
|
||||||
minimal=minimal,
|
minimal=minimal,
|
||||||
|
|||||||
@@ -366,6 +366,7 @@ class Get_Url(Cmdlet):
|
|||||||
store_name,
|
store_name,
|
||||||
pattern_hint=target_pattern,
|
pattern_hint=target_pattern,
|
||||||
minimal=True,
|
minimal=True,
|
||||||
|
url_only=True,
|
||||||
)
|
)
|
||||||
if search_results is None:
|
if search_results is None:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user