kl
This commit is contained in:
@@ -4,6 +4,7 @@ import re
|
||||
import sys
|
||||
import tempfile
|
||||
import shutil
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
||||
|
||||
@@ -1094,78 +1095,13 @@ class HydrusNetwork(Store):
|
||||
return token.replace("*", "").replace("?", "")
|
||||
|
||||
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
||||
exact_url_attempted = False
|
||||
try:
|
||||
if pattern.startswith("http://") or pattern.startswith(
|
||||
"https://"):
|
||||
from API.HydrusNetwork import HydrusRequestSpec
|
||||
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/add_urls/get_url_files",
|
||||
query={
|
||||
"url": pattern
|
||||
},
|
||||
)
|
||||
response = client._perform_request(
|
||||
spec
|
||||
) # type: ignore[attr-defined]
|
||||
hashes = []
|
||||
file_ids = []
|
||||
if isinstance(response, dict):
|
||||
raw_hashes = response.get("hashes") or response.get(
|
||||
"file_hashes"
|
||||
)
|
||||
if isinstance(raw_hashes, list):
|
||||
hashes = [
|
||||
str(h).strip() for h in raw_hashes
|
||||
if isinstance(h, str) and str(h).strip()
|
||||
]
|
||||
raw_ids = response.get("file_ids")
|
||||
if isinstance(raw_ids, list):
|
||||
for item in raw_ids:
|
||||
try:
|
||||
file_ids.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
|
||||
if file_ids:
|
||||
payload = client.fetch_file_metadata(
|
||||
file_ids=file_ids,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=not minimal,
|
||||
include_duration=not minimal,
|
||||
include_size=not minimal,
|
||||
include_mime=not minimal,
|
||||
)
|
||||
metas = (
|
||||
payload.get("metadata",
|
||||
[]) if isinstance(payload,
|
||||
dict) else []
|
||||
)
|
||||
if isinstance(metas, list):
|
||||
metadata_list = [
|
||||
m for m in metas if isinstance(m, dict)
|
||||
]
|
||||
elif hashes:
|
||||
payload = client.fetch_file_metadata(
|
||||
hashes=hashes,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=not minimal,
|
||||
include_duration=not minimal,
|
||||
include_size=not minimal,
|
||||
include_mime=not minimal,
|
||||
)
|
||||
metas = (
|
||||
payload.get("metadata",
|
||||
[]) if isinstance(payload,
|
||||
dict) else []
|
||||
)
|
||||
if isinstance(metas, list):
|
||||
metadata_list = [
|
||||
m for m in metas if isinstance(m, dict)
|
||||
]
|
||||
if pattern.startswith("http://") or pattern.startswith("https://"):
|
||||
exact_url_attempted = True
|
||||
metadata_list = self.lookup_url_metadata(pattern, minimal=minimal)
|
||||
except Exception:
|
||||
metadata_list = None
|
||||
metadata_list = [] if exact_url_attempted else None
|
||||
|
||||
# Fallback: substring scan
|
||||
if metadata_list is None:
|
||||
@@ -2108,6 +2044,115 @@ class HydrusNetwork(Store):
|
||||
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
||||
return []
|
||||
|
||||
def lookup_url_metadata(self, url_value: str, *, minimal: bool = False) -> List[Dict[str, Any]]:
|
||||
"""Resolve an exact URL to Hydrus metadata using /add_urls/get_url_files variants."""
|
||||
candidate_url = str(url_value or "").strip()
|
||||
if not candidate_url:
|
||||
return []
|
||||
|
||||
client = self._client
|
||||
if client is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
from API.HydrusNetwork import HydrusRequestSpec, _generate_hydrus_url_variants
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
pending: deque[str] = deque(_generate_hydrus_url_variants(candidate_url) or [candidate_url])
|
||||
seen_urls: set[str] = set()
|
||||
file_ids: List[int] = []
|
||||
hashes: List[str] = []
|
||||
seen_ids: set[int] = set()
|
||||
seen_hashes: set[str] = set()
|
||||
|
||||
while pending:
|
||||
current = str(pending.popleft() or "").strip()
|
||||
if not current or current in seen_urls:
|
||||
continue
|
||||
seen_urls.add(current)
|
||||
|
||||
try:
|
||||
response = client._perform_request(
|
||||
HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/add_urls/get_url_files",
|
||||
query={"url": current},
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not isinstance(response, dict):
|
||||
continue
|
||||
|
||||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||||
if isinstance(raw_hashes, list):
|
||||
for item in raw_hashes:
|
||||
try:
|
||||
file_hash = str(item or "").strip().lower()
|
||||
except Exception:
|
||||
continue
|
||||
if not file_hash or file_hash in seen_hashes:
|
||||
continue
|
||||
seen_hashes.add(file_hash)
|
||||
hashes.append(file_hash)
|
||||
|
||||
raw_ids = response.get("file_ids") or response.get("file_id")
|
||||
id_values = raw_ids if isinstance(raw_ids, list) else [raw_ids] if raw_ids is not None else []
|
||||
for item in id_values:
|
||||
try:
|
||||
file_id = int(item)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if file_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(file_id)
|
||||
file_ids.append(file_id)
|
||||
|
||||
for key in ("normalized_url", "redirect_url", "url"):
|
||||
value = response.get(key)
|
||||
if isinstance(value, str):
|
||||
next_url = value.strip()
|
||||
if next_url and next_url not in seen_urls:
|
||||
pending.append(next_url)
|
||||
|
||||
if not file_ids and not hashes:
|
||||
return []
|
||||
|
||||
try:
|
||||
payload = client.fetch_file_metadata(
|
||||
file_ids=file_ids or None,
|
||||
hashes=hashes or None,
|
||||
include_file_url=True,
|
||||
include_service_keys_to_tags=not minimal,
|
||||
include_duration=not minimal,
|
||||
include_size=not minimal,
|
||||
include_mime=not minimal,
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
metadata = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
if not isinstance(metadata, list):
|
||||
return []
|
||||
return [entry for entry in metadata if isinstance(entry, dict)]
|
||||
|
||||
def find_hashes_by_url(self, url_value: str) -> List[str]:
|
||||
hashes: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for entry in self.lookup_url_metadata(url_value, minimal=True):
|
||||
raw_hash = entry.get("hash") or entry.get("hash_hex") or entry.get("file_hash")
|
||||
try:
|
||||
file_hash = str(raw_hash or "").strip().lower()
|
||||
except Exception:
|
||||
continue
|
||||
if len(file_hash) != 64 or file_hash in seen:
|
||||
continue
|
||||
seen.add(file_hash)
|
||||
hashes.append(file_hash)
|
||||
return hashes
|
||||
|
||||
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
|
||||
"""Return Hydrus URL info for a single URL (Hydrus-only helper).
|
||||
|
||||
|
||||
Reference in New Issue
Block a user