This commit is contained in:
2026-03-31 23:30:57 -07:00
parent 6ef5b645a8
commit 57b595c1a4
7 changed files with 381 additions and 111 deletions

View File

@@ -4,6 +4,7 @@ import re
import sys
import tempfile
import shutil
from collections import deque
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
@@ -1094,78 +1095,13 @@ class HydrusNetwork(Store):
return token.replace("*", "").replace("?", "")
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
exact_url_attempted = False
try:
if pattern.startswith("http://") or pattern.startswith(
"https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={
"url": pattern
},
)
response = client._perform_request(
spec
) # type: ignore[attr-defined]
hashes = []
file_ids = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get(
"file_hashes"
)
if isinstance(raw_hashes, list):
hashes = [
str(h).strip() for h in raw_hashes
if isinstance(h, str) and str(h).strip()
]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
file_ids.append(int(item))
except (TypeError, ValueError):
continue
if file_ids:
payload = client.fetch_file_metadata(
file_ids=file_ids,
include_file_url=True,
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
)
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
)
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
elif hashes:
payload = client.fetch_file_metadata(
hashes=hashes,
include_file_url=True,
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
)
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
)
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
if pattern.startswith("http://") or pattern.startswith("https://"):
exact_url_attempted = True
metadata_list = self.lookup_url_metadata(pattern, minimal=minimal)
except Exception:
metadata_list = None
metadata_list = [] if exact_url_attempted else None
# Fallback: substring scan
if metadata_list is None:
@@ -2108,6 +2044,115 @@ class HydrusNetwork(Store):
debug(f"{self._log_prefix()} get_url failed: {exc}")
return []
def lookup_url_metadata(self, url_value: str, *, minimal: bool = False) -> List[Dict[str, Any]]:
"""Resolve an exact URL to Hydrus metadata using /add_urls/get_url_files variants."""
candidate_url = str(url_value or "").strip()
if not candidate_url:
return []
client = self._client
if client is None:
return []
try:
from API.HydrusNetwork import HydrusRequestSpec, _generate_hydrus_url_variants
except Exception:
return []
pending: deque[str] = deque(_generate_hydrus_url_variants(candidate_url) or [candidate_url])
seen_urls: set[str] = set()
file_ids: List[int] = []
hashes: List[str] = []
seen_ids: set[int] = set()
seen_hashes: set[str] = set()
while pending:
current = str(pending.popleft() or "").strip()
if not current or current in seen_urls:
continue
seen_urls.add(current)
try:
response = client._perform_request(
HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={"url": current},
)
)
except Exception:
continue
if not isinstance(response, dict):
continue
raw_hashes = response.get("hashes") or response.get("file_hashes")
if isinstance(raw_hashes, list):
for item in raw_hashes:
try:
file_hash = str(item or "").strip().lower()
except Exception:
continue
if not file_hash or file_hash in seen_hashes:
continue
seen_hashes.add(file_hash)
hashes.append(file_hash)
raw_ids = response.get("file_ids") or response.get("file_id")
id_values = raw_ids if isinstance(raw_ids, list) else [raw_ids] if raw_ids is not None else []
for item in id_values:
try:
file_id = int(item)
except (TypeError, ValueError):
continue
if file_id in seen_ids:
continue
seen_ids.add(file_id)
file_ids.append(file_id)
for key in ("normalized_url", "redirect_url", "url"):
value = response.get(key)
if isinstance(value, str):
next_url = value.strip()
if next_url and next_url not in seen_urls:
pending.append(next_url)
if not file_ids and not hashes:
return []
try:
payload = client.fetch_file_metadata(
file_ids=file_ids or None,
hashes=hashes or None,
include_file_url=True,
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
)
except Exception:
return []
metadata = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(metadata, list):
return []
return [entry for entry in metadata if isinstance(entry, dict)]
def find_hashes_by_url(self, url_value: str) -> List[str]:
hashes: List[str] = []
seen: set[str] = set()
for entry in self.lookup_url_metadata(url_value, minimal=True):
raw_hash = entry.get("hash") or entry.get("hash_hex") or entry.get("file_hash")
try:
file_hash = str(raw_hash or "").strip().lower()
except Exception:
continue
if len(file_hash) != 64 or file_hash in seen:
continue
seen.add(file_hash)
hashes.append(file_hash)
return hashes
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
"""Return Hydrus URL info for a single URL (Hydrus-only helper).