df
This commit is contained in:
@@ -11,6 +11,7 @@ import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
from SYS.logger import log
|
||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
|
||||
@@ -18,8 +19,8 @@ import tempfile
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, cast
|
||||
from urllib.parse import urlsplit, urlencode, quote, urlunsplit, unquote
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -1828,3 +1829,742 @@ def download_hydrus_file(
|
||||
print_final_progress(filename, file_size, elapsed)
|
||||
|
||||
return downloaded
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Hydrus metadata helpers (moved from SYS.metadata)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _normalize_hash(value: Any) -> str:
|
||||
candidate = str(value or "").strip().lower()
|
||||
if not candidate:
|
||||
raise ValueError("Hydrus hash is required")
|
||||
if len(candidate) != 64 or any(ch not in "0123456789abcdef" for ch in candidate):
|
||||
raise ValueError("Hydrus hash must be a 64-character hex string")
|
||||
return candidate
|
||||
|
||||
|
||||
def _normalize_tag(tag: Any) -> Optional[str]:
|
||||
if tag is None:
|
||||
return None
|
||||
if isinstance(tag, str):
|
||||
candidate = tag.strip()
|
||||
else:
|
||||
candidate = str(tag).strip()
|
||||
return candidate or None
|
||||
|
||||
|
||||
def _dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||
if not tags:
|
||||
return []
|
||||
|
||||
namespace_to_tags: Dict[Optional[str], List[Tuple[int, str]]] = {}
|
||||
first_appearance: Dict[Optional[str], int] = {}
|
||||
|
||||
for idx, tag in enumerate(tags):
|
||||
namespace: Optional[str] = tag.split(":", 1)[0] if ":" in tag else None
|
||||
if namespace not in first_appearance:
|
||||
first_appearance[namespace] = idx
|
||||
if namespace not in namespace_to_tags:
|
||||
namespace_to_tags[namespace] = []
|
||||
namespace_to_tags[namespace].append((idx, tag))
|
||||
|
||||
result: List[Tuple[int, str]] = []
|
||||
for namespace, tag_list in namespace_to_tags.items():
|
||||
chosen_tag = tag_list[0][1] if keep_first else tag_list[-1][1]
|
||||
result.append((first_appearance[namespace], chosen_tag))
|
||||
|
||||
result.sort(key=lambda x: x[0])
|
||||
return [tag for _, tag in result]
|
||||
|
||||
|
||||
def _extract_tag_services(entry: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
tags_section = entry.get("tags")
|
||||
services: List[Dict[str, Any]] = []
|
||||
if not isinstance(tags_section, dict):
|
||||
return services
|
||||
names_map = tags_section.get("service_keys_to_names")
|
||||
if not isinstance(names_map, dict):
|
||||
names_map = {}
|
||||
|
||||
def get_record(service_key: Optional[str], service_name: Optional[str]) -> Dict[str, Any]:
|
||||
key_lower = service_key.lower() if isinstance(service_key, str) else None
|
||||
name_lower = service_name.lower() if isinstance(service_name, str) else None
|
||||
for record in services:
|
||||
existing_key = record.get("service_key")
|
||||
if key_lower and isinstance(existing_key, str) and existing_key.lower() == key_lower:
|
||||
if service_name and not record.get("service_name"):
|
||||
record["service_name"] = service_name
|
||||
return record
|
||||
existing_name = record.get("service_name")
|
||||
if name_lower and isinstance(existing_name, str) and existing_name.lower() == name_lower:
|
||||
if service_key and not record.get("service_key"):
|
||||
record["service_key"] = service_key
|
||||
return record
|
||||
record = {
|
||||
"service_key": service_key,
|
||||
"service_name": service_name,
|
||||
"tags": [],
|
||||
}
|
||||
services.append(record)
|
||||
return record
|
||||
|
||||
def _iter_current_status_lists(container: Any) -> Iterable[List[Any]]:
|
||||
if isinstance(container, dict):
|
||||
for status_key, tags_list in container.items():
|
||||
if str(status_key) != "0":
|
||||
continue
|
||||
if isinstance(tags_list, list):
|
||||
yield tags_list
|
||||
elif isinstance(container, list):
|
||||
yield container
|
||||
|
||||
statuses_map = tags_section.get("service_keys_to_statuses_to_tags")
|
||||
if isinstance(statuses_map, dict):
|
||||
for service_key, status_map in statuses_map.items():
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, names_map.get(service_key))
|
||||
for tags_list in _iter_current_status_lists(status_map):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
ignored_keys = {
|
||||
"service_keys_to_statuses_to_tags",
|
||||
"service_keys_to_statuses_to_display_tags",
|
||||
"service_keys_to_display_friendly_tags",
|
||||
"service_keys_to_names",
|
||||
"tag_display_types_to_namespaces",
|
||||
"namespace_display_string_lookup",
|
||||
"tag_display_decoration_colour_lookup",
|
||||
}
|
||||
|
||||
for key, service in tags_section.items():
|
||||
if key in ignored_keys:
|
||||
continue
|
||||
if isinstance(service, dict):
|
||||
service_key = service.get("service_key") or (key if isinstance(key, str) else None)
|
||||
service_name = service.get("service_name") or service.get("name") or names_map.get(service_key)
|
||||
record = get_record(service_key if isinstance(service_key, str) else None, service_name)
|
||||
storage = service.get("storage_tags") or service.get("statuses_to_tags") or service.get("tags")
|
||||
if isinstance(storage, dict):
|
||||
for tags_list in _iter_current_status_lists(storage):
|
||||
for tag in tags_list:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
elif isinstance(storage, list):
|
||||
for tag in storage:
|
||||
normalized = _normalize_tag(tag)
|
||||
if normalized:
|
||||
record["tags"].append(normalized)
|
||||
|
||||
for record in services:
|
||||
record["tags"] = _dedup_tags_by_namespace(record["tags"], keep_first=True)
|
||||
return services
|
||||
|
||||
|
||||
def _select_primary_tags(
|
||||
services: List[Dict[str, Any]],
|
||||
aggregated: List[str],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Optional[str], List[str]]:
|
||||
prefer_lower = prefer_service.lower() if isinstance(prefer_service, str) else None
|
||||
if prefer_lower:
|
||||
for record in services:
|
||||
name = record.get("service_name")
|
||||
if isinstance(name, str) and name.lower() == prefer_lower and record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
for record in services:
|
||||
if record["tags"]:
|
||||
return record.get("service_key"), record["tags"]
|
||||
return None, aggregated
|
||||
|
||||
|
||||
def _derive_title(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() == "title":
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
for key in (
|
||||
"title",
|
||||
"display_name",
|
||||
"pretty_name",
|
||||
"original_display_filename",
|
||||
"original_filename",
|
||||
):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, str):
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
return None
|
||||
|
||||
|
||||
def _derive_clip_time(
|
||||
tags_primary: List[str],
|
||||
tags_aggregated: List[str],
|
||||
entry: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
namespaces = {"clip", "clip_time", "cliptime"}
|
||||
for source in (tags_primary, tags_aggregated):
|
||||
for tag in source:
|
||||
namespace, sep, value = tag.partition(":")
|
||||
if sep and namespace and namespace.lower() in namespaces:
|
||||
cleaned = value.strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
clip_value = entry.get("clip_time")
|
||||
if isinstance(clip_value, str):
|
||||
cleaned_clip = clip_value.strip()
|
||||
if cleaned_clip:
|
||||
return cleaned_clip
|
||||
return None
|
||||
|
||||
|
||||
def _summarize_hydrus_entry(
|
||||
entry: Dict[str, Any],
|
||||
prefer_service: Optional[str]
|
||||
) -> Tuple[Dict[str, Any], List[str], Optional[str], Optional[str], Optional[str]]:
|
||||
services = _extract_tag_services(entry)
|
||||
aggregated: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
for record in services:
|
||||
for tag in record["tags"]:
|
||||
if tag not in seen:
|
||||
seen.add(tag)
|
||||
aggregated.append(tag)
|
||||
service_key, primary_tags = _select_primary_tags(services, aggregated, prefer_service)
|
||||
title = _derive_title(primary_tags, aggregated, entry)
|
||||
clip_time = _derive_clip_time(primary_tags, aggregated, entry)
|
||||
summary = dict(entry)
|
||||
if title and not summary.get("title"):
|
||||
summary["title"] = title
|
||||
if clip_time and not summary.get("clip_time"):
|
||||
summary["clip_time"] = clip_time
|
||||
summary["tag_service_key"] = service_key
|
||||
summary["has_current_file_service"] = _has_current_file_service(entry)
|
||||
if "is_local" not in summary:
|
||||
summary["is_local"] = bool(entry.get("is_local"))
|
||||
return summary, primary_tags, service_key, title, clip_time
|
||||
|
||||
|
||||
def _looks_like_hash(value: Any) -> bool:
|
||||
if not isinstance(value, str):
|
||||
return False
|
||||
candidate = value.strip().lower()
|
||||
return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate)
|
||||
|
||||
|
||||
def _collect_relationship_hashes(payload: Any, accumulator: Set[str]) -> None:
|
||||
if isinstance(payload, dict):
|
||||
for value in payload.values():
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, (list, tuple, set)):
|
||||
for value in payload:
|
||||
_collect_relationship_hashes(value, accumulator)
|
||||
elif isinstance(payload, str) and _looks_like_hash(payload):
|
||||
accumulator.add(payload)
|
||||
|
||||
|
||||
def _generate_hydrus_url_variants(url: str) -> List[str]:
|
||||
seen: Set[str] = set()
|
||||
variants: List[str] = []
|
||||
|
||||
def push(candidate: Optional[str]) -> None:
|
||||
if not candidate:
|
||||
return
|
||||
text = candidate.strip()
|
||||
if not text or text in seen:
|
||||
return
|
||||
seen.add(text)
|
||||
variants.append(text)
|
||||
|
||||
push(url)
|
||||
try:
|
||||
parsed = urlsplit(url)
|
||||
except Exception:
|
||||
return variants
|
||||
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
alternate_scheme = "https" if parsed.scheme == "http" else "http"
|
||||
push(urlunsplit((alternate_scheme, parsed.netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
normalised_netloc = parsed.netloc.lower()
|
||||
if normalised_netloc and normalised_netloc != parsed.netloc:
|
||||
push(urlunsplit((parsed.scheme, normalised_netloc, parsed.path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.path:
|
||||
trimmed_path = parsed.path.rstrip("/")
|
||||
if trimmed_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, trimmed_path, parsed.query, parsed.fragment)))
|
||||
else:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path + "/", parsed.query, parsed.fragment)))
|
||||
unquoted_path = unquote(parsed.path)
|
||||
if unquoted_path != parsed.path:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, parsed.query, parsed.fragment)))
|
||||
|
||||
if parsed.query or parsed.fragment:
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", "")))
|
||||
if parsed.path:
|
||||
unquoted_path = unquote(parsed.path)
|
||||
push(urlunsplit((parsed.scheme, parsed.netloc, unquoted_path, "", "")))
|
||||
|
||||
return variants
|
||||
|
||||
|
||||
def _build_hydrus_query(
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> Dict[str, str]:
|
||||
query: Dict[str, str] = {}
|
||||
if hashes:
|
||||
query["hashes"] = json.dumps([_normalize_hash(h) for h in hashes])
|
||||
if file_ids:
|
||||
query["file_ids"] = json.dumps([int(fid) for fid in file_ids])
|
||||
if not query:
|
||||
raise ValueError("hashes or file_ids must be provided")
|
||||
query["include_service_keys_to_tags"] = json.dumps(True)
|
||||
query["include_tag_services"] = json.dumps(True)
|
||||
query["include_file_services"] = json.dumps(True)
|
||||
if include_relationships:
|
||||
query["include_file_relationships"] = json.dumps(True)
|
||||
if not minimal:
|
||||
extras = (
|
||||
"include_url",
|
||||
"include_size",
|
||||
"include_width",
|
||||
"include_height",
|
||||
"include_duration",
|
||||
"include_mime",
|
||||
"include_has_audio",
|
||||
"include_is_trashed",
|
||||
)
|
||||
for key in extras:
|
||||
query[key] = json.dumps(True)
|
||||
return query
|
||||
|
||||
|
||||
def _fetch_hydrus_entries(
|
||||
client: "HydrusNetwork",
|
||||
hashes: Optional[Sequence[str]],
|
||||
file_ids: Optional[Sequence[int]],
|
||||
include_relationships: bool,
|
||||
minimal: bool,
|
||||
) -> List[Dict[str, Any]]:
|
||||
if not hashes and not file_ids:
|
||||
return []
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/get_files/file_metadata",
|
||||
query=_build_hydrus_query(hashes, file_ids, include_relationships, minimal),
|
||||
)
|
||||
response = client._perform_request(spec)
|
||||
metadata = response.get("metadata") if isinstance(response, dict) else None
|
||||
if isinstance(metadata, list):
|
||||
return [entry for entry in metadata if isinstance(entry, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _has_current_file_service(entry: Dict[str, Any]) -> bool:
|
||||
services = entry.get("file_services")
|
||||
if not isinstance(services, dict):
|
||||
return False
|
||||
current = services.get("current")
|
||||
if isinstance(current, dict):
|
||||
for value in current.values():
|
||||
if value:
|
||||
return True
|
||||
return False
|
||||
if isinstance(current, list):
|
||||
return len(current) > 0
|
||||
return False
|
||||
|
||||
|
||||
def _compute_file_flags(entry: Dict[str, Any]) -> Tuple[bool, bool, bool]:
|
||||
mime = entry.get("mime")
|
||||
mime_lower = mime.lower() if isinstance(mime, str) else ""
|
||||
is_video = mime_lower.startswith("video/")
|
||||
is_audio = mime_lower.startswith("audio/")
|
||||
is_deleted = bool(entry.get("is_trashed"))
|
||||
file_services = entry.get("file_services")
|
||||
if not is_deleted and isinstance(file_services, dict):
|
||||
deleted = file_services.get("deleted")
|
||||
if isinstance(deleted, dict) and deleted:
|
||||
is_deleted = True
|
||||
return is_video, is_audio, is_deleted
|
||||
|
||||
|
||||
def fetch_hydrus_metadata(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
hash_hex = None
|
||||
raw_hash_value = payload.get("hash")
|
||||
if raw_hash_value is not None:
|
||||
hash_hex = _normalize_hash(raw_hash_value)
|
||||
file_ids: List[int] = []
|
||||
raw_file_ids = payload.get("file_ids")
|
||||
if isinstance(raw_file_ids, (list, tuple, set)):
|
||||
for value in raw_file_ids:
|
||||
try:
|
||||
file_ids.append(int(value))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_file_ids is not None:
|
||||
try:
|
||||
file_ids.append(int(raw_file_ids))
|
||||
except (TypeError, ValueError):
|
||||
file_ids = []
|
||||
raw_file_id = payload.get("file_id")
|
||||
if raw_file_id is not None:
|
||||
try:
|
||||
coerced = int(raw_file_id)
|
||||
except (TypeError, ValueError):
|
||||
coerced = None
|
||||
if coerced is not None and coerced not in file_ids:
|
||||
file_ids.append(coerced)
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
prefer_service = options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip()
|
||||
else:
|
||||
prefer_service = None
|
||||
include_relationships = bool(options.get("include_relationships"))
|
||||
minimal = bool(options.get("minimal"))
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
if hash_hex:
|
||||
hashes = [hash_hex]
|
||||
if not hashes and not file_ids:
|
||||
raise ValueError("Hydrus hash or file id is required")
|
||||
try:
|
||||
entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
hashes,
|
||||
file_ids or None,
|
||||
include_relationships,
|
||||
minimal
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
if not entries:
|
||||
response: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"metadata": {},
|
||||
"tags": [],
|
||||
"warnings": [f"No Hydrus metadata for {hash_hex or file_ids}"],
|
||||
"error": "not_found",
|
||||
}
|
||||
if file_ids:
|
||||
response["file_id"] = file_ids[0]
|
||||
return response
|
||||
entry = entries[0]
|
||||
if not hash_hex:
|
||||
entry_hash = entry.get("hash")
|
||||
if isinstance(entry_hash, str) and entry_hash:
|
||||
hash_hex = entry_hash
|
||||
hashes = [hash_hex]
|
||||
summary, primary_tags, service_key, title, clip_time = _summarize_hydrus_entry(entry, prefer_service)
|
||||
is_video, is_audio, is_deleted = _compute_file_flags(entry)
|
||||
has_current_file_service = _has_current_file_service(entry)
|
||||
is_local = bool(entry.get("is_local"))
|
||||
size_bytes = entry.get("size") or entry.get("file_size")
|
||||
filesize_mb = None
|
||||
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
||||
filesize_mb = float(size_bytes) / (1024.0 * 1024.0)
|
||||
duration = entry.get("duration")
|
||||
if duration is None and isinstance(entry.get("duration_ms"), (int, float)):
|
||||
duration = float(entry["duration_ms"]) / 1000.0
|
||||
warnings_list: List[str] = []
|
||||
if not primary_tags:
|
||||
warnings_list.append("No tags returned for preferred service")
|
||||
relationships = None
|
||||
relationship_metadata: Dict[str, Dict[str, Any]] = {}
|
||||
if include_relationships and hash_hex:
|
||||
try:
|
||||
rel_spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/manage_file_relationships/get_file_relationships",
|
||||
query={"hash": hash_hex},
|
||||
)
|
||||
relationships = client._perform_request(rel_spec)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship lookup failed: {exc}")
|
||||
relationships = None
|
||||
if isinstance(relationships, dict):
|
||||
related_hashes: Set[str] = set()
|
||||
_collect_relationship_hashes(relationships, related_hashes)
|
||||
related_hashes.discard(hash_hex)
|
||||
if related_hashes:
|
||||
try:
|
||||
related_entries = _fetch_hydrus_entries(
|
||||
client,
|
||||
sorted(related_hashes),
|
||||
None,
|
||||
False,
|
||||
True
|
||||
)
|
||||
except HydrusRequestError as exc:
|
||||
warnings_list.append(f"Relationship metadata fetch failed: {exc}")
|
||||
else:
|
||||
for rel_entry in related_entries:
|
||||
rel_hash = rel_entry.get("hash")
|
||||
if not isinstance(rel_hash, str):
|
||||
continue
|
||||
rel_summary, rel_tags, _, rel_title, rel_clip = _summarize_hydrus_entry(rel_entry, prefer_service)
|
||||
rel_summary["tags"] = rel_tags
|
||||
if rel_title:
|
||||
rel_summary["title"] = rel_title
|
||||
if rel_clip:
|
||||
rel_summary["clip_time"] = rel_clip
|
||||
relationship_metadata[rel_hash] = rel_summary
|
||||
result: Dict[str, Any] = {
|
||||
"hash": entry.get("hash") or hash_hex,
|
||||
"metadata": summary,
|
||||
"tags": primary_tags,
|
||||
"tag_service_key": service_key,
|
||||
"title": title,
|
||||
"clip_time": clip_time,
|
||||
"duration": duration,
|
||||
"filesize_mb": filesize_mb,
|
||||
"is_video": is_video,
|
||||
"is_audio": is_audio,
|
||||
"is_deleted": is_deleted,
|
||||
"is_local": is_local,
|
||||
"has_current_file_service": has_current_file_service,
|
||||
"matched_hash": entry.get("hash") or hash_hex,
|
||||
"swap_recommended": False,
|
||||
}
|
||||
file_id_value = entry.get("file_id")
|
||||
if isinstance(file_id_value, (int, float)):
|
||||
result["file_id"] = int(file_id_value)
|
||||
if relationships is not None:
|
||||
result["relationships"] = relationships
|
||||
if relationship_metadata:
|
||||
result["relationship_metadata"] = relationship_metadata
|
||||
if warnings_list:
|
||||
result["warnings"] = warnings_list
|
||||
return result
|
||||
|
||||
|
||||
def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
raw_url = payload.get("url") or payload.get("source_url")
|
||||
url = str(raw_url or "").strip()
|
||||
if not url:
|
||||
raise ValueError("URL is required to fetch Hydrus metadata by URL")
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or 60.0)
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
hashes: Optional[List[str]] = None
|
||||
file_ids: Optional[List[int]] = None
|
||||
matched_url = None
|
||||
normalised_reported = None
|
||||
seen: Set[str] = set()
|
||||
queue = deque()
|
||||
for variant in _generate_hydrus_url_variants(url):
|
||||
queue.append(variant)
|
||||
if not queue:
|
||||
queue.append(url)
|
||||
tried_variants: List[str] = []
|
||||
while queue:
|
||||
candidate = queue.popleft()
|
||||
candidate = str(candidate or "").strip()
|
||||
if not candidate or candidate in seen:
|
||||
continue
|
||||
seen.add(candidate)
|
||||
tried_variants.append(candidate)
|
||||
spec = HydrusRequestSpec(
|
||||
method="GET",
|
||||
endpoint="/add_urls/get_url_files",
|
||||
query={"url": candidate},
|
||||
)
|
||||
try:
|
||||
response = client._perform_request(spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
response_hashes_list: List[str] = []
|
||||
response_file_ids_list: List[int] = []
|
||||
if isinstance(response, dict):
|
||||
normalised_value = response.get("normalised_url")
|
||||
if isinstance(normalised_value, str):
|
||||
trimmed = normalised_value.strip()
|
||||
if trimmed:
|
||||
normalised_reported = normalised_reported or trimmed
|
||||
if trimmed not in seen:
|
||||
queue.append(trimmed)
|
||||
for redirect_key in ("redirect_url", "url"):
|
||||
redirect_value = response.get(redirect_key)
|
||||
if isinstance(redirect_value, str):
|
||||
redirect_trimmed = redirect_value.strip()
|
||||
if redirect_trimmed and redirect_trimmed not in seen:
|
||||
queue.append(redirect_trimmed)
|
||||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||||
if isinstance(raw_hashes, list):
|
||||
for item in raw_hashes:
|
||||
try:
|
||||
normalized = _normalize_hash(item)
|
||||
except ValueError:
|
||||
continue
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
raw_ids = response.get("file_ids") or response.get("file_id")
|
||||
if isinstance(raw_ids, list):
|
||||
for item in raw_ids:
|
||||
try:
|
||||
response_file_ids_list.append(int(item))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
elif raw_ids is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(raw_ids))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
statuses = response.get("url_file_statuses")
|
||||
if isinstance(statuses, list):
|
||||
for entry in statuses:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
status_hash = entry.get("hash") or entry.get("file_hash")
|
||||
if status_hash:
|
||||
try:
|
||||
normalized = _normalize_hash(status_hash)
|
||||
except ValueError:
|
||||
normalized = None
|
||||
if normalized:
|
||||
response_hashes_list.append(normalized)
|
||||
status_id = entry.get("file_id") or entry.get("fileid")
|
||||
if status_id is not None:
|
||||
try:
|
||||
response_file_ids_list.append(int(status_id))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if not hashes and response_hashes_list:
|
||||
hashes = response_hashes_list
|
||||
if not file_ids and response_file_ids_list:
|
||||
file_ids = response_file_ids_list
|
||||
if hashes or file_ids:
|
||||
matched_url = candidate
|
||||
break
|
||||
if not hashes and not file_ids:
|
||||
raise RuntimeError(
|
||||
"No Hydrus matches for URL variants: "
|
||||
+ ", ".join(tried_variants)
|
||||
)
|
||||
followup_payload = {
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"hash": hashes[0] if hashes else None,
|
||||
"file_ids": file_ids,
|
||||
"options": {"timeout": timeout, "minimal": True},
|
||||
}
|
||||
result = fetch_hydrus_metadata(followup_payload)
|
||||
result["matched_url"] = matched_url or url
|
||||
result["normalised_url"] = normalised_reported or matched_url or url
|
||||
result["tried_urls"] = tried_variants
|
||||
return result
|
||||
|
||||
|
||||
def _build_hydrus_context(payload: Dict[str, Any]) -> Tuple["HydrusNetwork", str, str, float, Optional[str]]:
|
||||
base_url = str(payload.get("api_url") or "").strip()
|
||||
if not base_url:
|
||||
raise ValueError("Hydrus api_url is required")
|
||||
access_key = str(payload.get("access_key") or "").strip()
|
||||
options_raw = payload.get("options")
|
||||
options = options_raw if isinstance(options_raw, dict) else {}
|
||||
timeout = float(options.get("timeout") or payload.get("timeout") or 60.0)
|
||||
prefer_service = payload.get("prefer_service_name") or options.get("prefer_service_name")
|
||||
if isinstance(prefer_service, str):
|
||||
prefer_service = prefer_service.strip() or None
|
||||
else:
|
||||
prefer_service = None
|
||||
client = HydrusNetwork(base_url, access_key, timeout)
|
||||
return client, base_url, access_key, timeout, prefer_service
|
||||
|
||||
|
||||
def _refetch_hydrus_summary(
|
||||
base_url: str,
|
||||
access_key: str,
|
||||
hash_hex: str,
|
||||
timeout: float,
|
||||
prefer_service: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"hash": hash_hex,
|
||||
"api_url": base_url,
|
||||
"access_key": access_key,
|
||||
"options": {
|
||||
"minimal": True,
|
||||
"include_relationships": False,
|
||||
"timeout": timeout,
|
||||
},
|
||||
}
|
||||
if prefer_service:
|
||||
payload["options"]["prefer_service_name"] = prefer_service
|
||||
return fetch_hydrus_metadata(payload)
|
||||
|
||||
|
||||
def apply_hydrus_tag_mutation(
|
||||
payload: Dict[str, Any],
|
||||
add: Iterable[Any],
|
||||
remove: Iterable[Any]
|
||||
) -> Dict[str, Any]:
|
||||
client, base_url, access_key, timeout, prefer_service = _build_hydrus_context(payload)
|
||||
hash_hex = _normalize_hash(payload.get("hash"))
|
||||
add_list = [_normalize_tag(tag) for tag in add if _normalize_tag(tag)]
|
||||
remove_list = [_normalize_tag(tag) for tag in remove if _normalize_tag(tag)]
|
||||
if not add_list and not remove_list:
|
||||
raise ValueError("No tag changes supplied")
|
||||
service_key = payload.get("service_key") or payload.get("tag_service_key")
|
||||
summary = None
|
||||
if not service_key:
|
||||
summary = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
service_key = summary.get("tag_service_key")
|
||||
if not isinstance(service_key, str) or not service_key:
|
||||
raise RuntimeError("Unable to determine Hydrus tag service key")
|
||||
actions: Dict[str, List[str]] = {}
|
||||
if add_list:
|
||||
actions["0"] = [tag for tag in add_list if tag]
|
||||
if remove_list:
|
||||
actions["1"] = [tag for tag in remove_list if tag]
|
||||
if not actions:
|
||||
raise ValueError("Tag mutation produced no actionable changes")
|
||||
request_payload = {
|
||||
"hashes": [hash_hex],
|
||||
"service_keys_to_actions_to_tags": {
|
||||
service_key: actions,
|
||||
},
|
||||
}
|
||||
try:
|
||||
tag_spec = HydrusRequestSpec(
|
||||
method="POST",
|
||||
endpoint="/add_tags/add_tags",
|
||||
data=request_payload,
|
||||
)
|
||||
client._perform_request(tag_spec)
|
||||
except HydrusRequestError as exc:
|
||||
raise RuntimeError(str(exc))
|
||||
summary_after = _refetch_hydrus_summary(base_url, access_key, hash_hex, timeout, prefer_service)
|
||||
result = dict(summary_after)
|
||||
result["added_tags"] = actions.get("0", [])
|
||||
result["removed_tags"] = actions.get("1", [])
|
||||
result["tag_service_key"] = summary_after.get("tag_service_key")
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user