h
This commit is contained in:
@@ -5,7 +5,7 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
@@ -455,6 +455,7 @@ class HydrusNetwork(Store):
|
|||||||
"""
|
"""
|
||||||
limit = kwargs.get("limit", 100)
|
limit = kwargs.get("limit", 100)
|
||||||
minimal = bool(kwargs.get("minimal", False))
|
minimal = bool(kwargs.get("minimal", False))
|
||||||
|
url_only = bool(kwargs.get("url_only", False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = self._client
|
client = self._client
|
||||||
@@ -676,6 +677,86 @@ class HydrusNetwork(Store):
|
|||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def _search_url_query_metadata(
|
||||||
|
url_query: str,
|
||||||
|
fetch_limit: int,
|
||||||
|
*,
|
||||||
|
minimal: bool = False,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Run a strict url:<pattern> search without falling back to system predicates."""
|
||||||
|
|
||||||
|
if not url_query:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = client.search_files(
|
||||||
|
tags=[url_query],
|
||||||
|
return_hashes=True,
|
||||||
|
return_file_ids=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
candidate_ids, candidate_hashes = _extract_search_ids(payload)
|
||||||
|
if not candidate_ids and not candidate_hashes:
|
||||||
|
return []
|
||||||
|
|
||||||
|
metas_out: list[dict[str, Any]] = []
|
||||||
|
chunk_size = 200
|
||||||
|
|
||||||
|
def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
|
||||||
|
nonlocal metas_out
|
||||||
|
if not values or len(metas_out) >= fetch_limit:
|
||||||
|
return
|
||||||
|
for start in range(0, len(values), chunk_size):
|
||||||
|
if len(metas_out) >= fetch_limit:
|
||||||
|
break
|
||||||
|
remaining = fetch_limit - len(metas_out)
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
end = start + min(chunk_size, remaining)
|
||||||
|
chunk = values[start:end]
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if kind == "file_ids":
|
||||||
|
metadata = client.fetch_file_metadata(
|
||||||
|
file_ids=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=False,
|
||||||
|
include_duration=False,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
metadata = client.fetch_file_metadata(
|
||||||
|
hashes=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=False,
|
||||||
|
include_duration=False,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=False,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
|
||||||
|
if not isinstance(fetched, list):
|
||||||
|
continue
|
||||||
|
for meta in fetched:
|
||||||
|
if len(metas_out) >= fetch_limit:
|
||||||
|
break
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
continue
|
||||||
|
metas_out.append(meta)
|
||||||
|
|
||||||
|
if candidate_ids:
|
||||||
|
_fetch_chunk("file_ids", candidate_ids)
|
||||||
|
if len(metas_out) < fetch_limit and candidate_hashes:
|
||||||
|
_fetch_chunk("hashes", candidate_hashes)
|
||||||
|
|
||||||
|
return metas_out[:fetch_limit]
|
||||||
|
|
||||||
query_lower = query.lower().strip()
|
query_lower = query.lower().strip()
|
||||||
|
|
||||||
# Support `ext:<value>` anywhere in the query. We filter results by the
|
# Support `ext:<value>` anywhere in the query. We filter results by the
|
||||||
@@ -735,122 +816,133 @@ class HydrusNetwork(Store):
|
|||||||
namespace = namespace.strip().lower()
|
namespace = namespace.strip().lower()
|
||||||
pattern = pattern.strip()
|
pattern = pattern.strip()
|
||||||
if namespace == "url":
|
if namespace == "url":
|
||||||
if not pattern or pattern == "*":
|
try:
|
||||||
if pattern_hints:
|
fetch_limit_raw = int(limit) if limit else 100
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
except Exception:
|
||||||
None,
|
fetch_limit_raw = 100
|
||||||
want_any=False,
|
if url_only:
|
||||||
fetch_limit=int(limit) if limit else 100,
|
metadata_list = _search_url_query_metadata(
|
||||||
needles=pattern_hints,
|
query_lower,
|
||||||
minimal=minimal,
|
fetch_limit_raw,
|
||||||
)
|
minimal=minimal,
|
||||||
else:
|
)
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
|
||||||
None,
|
|
||||||
want_any=True,
|
|
||||||
fetch_limit=int(limit) if limit else 100,
|
|
||||||
minimal=minimal,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
def _clean_url_search_token(value: str | None) -> str:
|
if not pattern or pattern == "*":
|
||||||
token = str(value or "").strip().lower()
|
if pattern_hints:
|
||||||
if not token:
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
return ""
|
None,
|
||||||
return token.replace("*", "").replace("?", "")
|
want_any=False,
|
||||||
|
fetch_limit=fetch_limit_raw,
|
||||||
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
needles=pattern_hints,
|
||||||
try:
|
minimal=minimal,
|
||||||
if pattern.startswith("http://") or pattern.startswith(
|
|
||||||
"https://"):
|
|
||||||
from API.HydrusNetwork import HydrusRequestSpec
|
|
||||||
|
|
||||||
spec = HydrusRequestSpec(
|
|
||||||
method="GET",
|
|
||||||
endpoint="/add_urls/get_url_files",
|
|
||||||
query={
|
|
||||||
"url": pattern
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
response = client._perform_request(
|
else:
|
||||||
spec
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
) # type: ignore[attr-defined]
|
None,
|
||||||
hashes = []
|
want_any=True,
|
||||||
file_ids = []
|
fetch_limit=fetch_limit_raw,
|
||||||
if isinstance(response, dict):
|
minimal=minimal,
|
||||||
raw_hashes = response.get("hashes") or response.get(
|
)
|
||||||
"file_hashes"
|
else:
|
||||||
)
|
def _clean_url_search_token(value: str | None) -> str:
|
||||||
if isinstance(raw_hashes, list):
|
token = str(value or "").strip().lower()
|
||||||
hashes = [
|
if not token:
|
||||||
str(h).strip() for h in raw_hashes
|
return ""
|
||||||
if isinstance(h, str) and str(h).strip()
|
return token.replace("*", "").replace("?", "")
|
||||||
]
|
|
||||||
raw_ids = response.get("file_ids")
|
|
||||||
if isinstance(raw_ids, list):
|
|
||||||
for item in raw_ids:
|
|
||||||
try:
|
|
||||||
file_ids.append(int(item))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if file_ids:
|
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
||||||
payload = client.fetch_file_metadata(
|
try:
|
||||||
file_ids=file_ids,
|
if pattern.startswith("http://") or pattern.startswith(
|
||||||
include_file_url=True,
|
"https://"):
|
||||||
include_service_keys_to_tags=not minimal,
|
from API.HydrusNetwork import HydrusRequestSpec
|
||||||
include_duration=not minimal,
|
|
||||||
include_size=not minimal,
|
|
||||||
include_mime=not minimal,
|
|
||||||
)
|
|
||||||
metas = (
|
|
||||||
payload.get("metadata",
|
|
||||||
[]) if isinstance(payload,
|
|
||||||
dict) else []
|
|
||||||
)
|
|
||||||
if isinstance(metas, list):
|
|
||||||
metadata_list = [
|
|
||||||
m for m in metas if isinstance(m, dict)
|
|
||||||
]
|
|
||||||
elif hashes:
|
|
||||||
payload = client.fetch_file_metadata(
|
|
||||||
hashes=hashes,
|
|
||||||
include_file_url=True,
|
|
||||||
include_service_keys_to_tags=not minimal,
|
|
||||||
include_duration=not minimal,
|
|
||||||
include_size=not minimal,
|
|
||||||
include_mime=not minimal,
|
|
||||||
)
|
|
||||||
metas = (
|
|
||||||
payload.get("metadata",
|
|
||||||
[]) if isinstance(payload,
|
|
||||||
dict) else []
|
|
||||||
)
|
|
||||||
if isinstance(metas, list):
|
|
||||||
metadata_list = [
|
|
||||||
m for m in metas if isinstance(m, dict)
|
|
||||||
]
|
|
||||||
except Exception:
|
|
||||||
metadata_list = None
|
|
||||||
|
|
||||||
# Fallback: substring scan
|
spec = HydrusRequestSpec(
|
||||||
if metadata_list is None:
|
method="GET",
|
||||||
search_token = _clean_url_search_token(pattern_hint or pattern)
|
endpoint="/add_urls/get_url_files",
|
||||||
scan_limit_override: int | None = None
|
query={
|
||||||
if search_token:
|
"url": pattern
|
||||||
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
},
|
||||||
if is_domain_only:
|
)
|
||||||
try:
|
response = client._perform_request(
|
||||||
scan_limit_override = max(int(limit or 100) * 20, 2000)
|
spec
|
||||||
except Exception:
|
) # type: ignore[attr-defined]
|
||||||
scan_limit_override = 2000
|
hashes = []
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
file_ids = []
|
||||||
search_token,
|
if isinstance(response, dict):
|
||||||
want_any=False,
|
raw_hashes = response.get("hashes") or response.get(
|
||||||
fetch_limit=int(limit) if limit else 100,
|
"file_hashes"
|
||||||
scan_limit=scan_limit_override,
|
)
|
||||||
needles=pattern_hints if pattern_hints else None,
|
if isinstance(raw_hashes, list):
|
||||||
minimal=minimal,
|
hashes = [
|
||||||
)
|
str(h).strip() for h in raw_hashes
|
||||||
|
if isinstance(h, str) and str(h).strip()
|
||||||
|
]
|
||||||
|
raw_ids = response.get("file_ids")
|
||||||
|
if isinstance(raw_ids, list):
|
||||||
|
for item in raw_ids:
|
||||||
|
try:
|
||||||
|
file_ids.append(int(item))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if file_ids:
|
||||||
|
payload = client.fetch_file_metadata(
|
||||||
|
file_ids=file_ids,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=not minimal,
|
||||||
|
include_duration=not minimal,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=not minimal,
|
||||||
|
)
|
||||||
|
metas = (
|
||||||
|
payload.get("metadata",
|
||||||
|
[]) if isinstance(payload,
|
||||||
|
dict) else []
|
||||||
|
)
|
||||||
|
if isinstance(metas, list):
|
||||||
|
metadata_list = [
|
||||||
|
m for m in metas if isinstance(m, dict)
|
||||||
|
]
|
||||||
|
elif hashes:
|
||||||
|
payload = client.fetch_file_metadata(
|
||||||
|
hashes=hashes,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=not minimal,
|
||||||
|
include_duration=not minimal,
|
||||||
|
include_size=not minimal,
|
||||||
|
include_mime=not minimal,
|
||||||
|
)
|
||||||
|
metas = (
|
||||||
|
payload.get("metadata",
|
||||||
|
[]) if isinstance(payload,
|
||||||
|
dict) else []
|
||||||
|
)
|
||||||
|
if isinstance(metas, list):
|
||||||
|
metadata_list = [
|
||||||
|
m for m in metas if isinstance(m, dict)
|
||||||
|
]
|
||||||
|
except Exception:
|
||||||
|
metadata_list = None
|
||||||
|
|
||||||
|
# Fallback: substring scan
|
||||||
|
if metadata_list is None:
|
||||||
|
search_token = _clean_url_search_token(pattern_hint or pattern)
|
||||||
|
scan_limit_override: int | None = None
|
||||||
|
if search_token:
|
||||||
|
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
||||||
|
if is_domain_only:
|
||||||
|
try:
|
||||||
|
scan_limit_override = max(fetch_limit_raw * 20, 2000)
|
||||||
|
except Exception:
|
||||||
|
scan_limit_override = 2000
|
||||||
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
|
search_token,
|
||||||
|
want_any=False,
|
||||||
|
fetch_limit=fetch_limit_raw,
|
||||||
|
scan_limit=scan_limit_override,
|
||||||
|
needles=pattern_hints if pattern_hints else None,
|
||||||
|
minimal=minimal,
|
||||||
|
)
|
||||||
elif namespace == "system":
|
elif namespace == "system":
|
||||||
normalized_system_predicate = pattern.strip()
|
normalized_system_predicate = pattern.strip()
|
||||||
if normalized_system_predicate == "has url":
|
if normalized_system_predicate == "has url":
|
||||||
|
|||||||
@@ -366,6 +366,7 @@ class Get_Url(Cmdlet):
|
|||||||
store_name,
|
store_name,
|
||||||
pattern_hint=target_pattern,
|
pattern_hint=target_pattern,
|
||||||
minimal=True,
|
minimal=True,
|
||||||
|
url_only=True,
|
||||||
)
|
)
|
||||||
if search_results is None:
|
if search_results is None:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user