1831 lines
64 KiB
Python
1831 lines
64 KiB
Python
"""Hydrus API helpers and export utilities."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import http.client
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from SYS.logger import log
|
|
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS
|
|
import tempfile
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast
|
|
from urllib.parse import urlsplit, urlencode, quote
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
from SYS.utils import (
|
|
decode_cbor,
|
|
jsonify,
|
|
ensure_directory,
|
|
unique_path,
|
|
)
|
|
from .HTTP import HTTPClient
|
|
|
|
|
|
class HydrusRequestError(RuntimeError):
|
|
"""Raised when the Hydrus Client API returns an error response."""
|
|
|
|
def __init__(self, status: int, message: str, payload: Any | None = None) -> None:
|
|
super().__init__(f"Hydrus request failed ({status}): {message}")
|
|
self.status = status
|
|
self.payload = payload
|
|
|
|
|
|
class HydrusConnectionError(HydrusRequestError):
|
|
"""Raised when Hydrus service is unavailable (connection refused, timeout, etc.).
|
|
|
|
This is an expected error when Hydrus is not running and should not include
|
|
a full traceback in logs.
|
|
"""
|
|
|
|
def __init__(self, message: str) -> None:
|
|
super().__init__(0, message, None) # status 0 indicates connection error
|
|
self.is_connection_error = True
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class HydrusRequestSpec:
|
|
method: str
|
|
endpoint: str
|
|
query: dict[str, Any] | None = None
|
|
data: Any | None = None
|
|
file_path: Path | None = None
|
|
content_type: str | None = None
|
|
accept: str | None = "application/cbor"
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class HydrusNetwork:
|
|
"""Thin wrapper around the Hydrus Client API."""
|
|
|
|
url: str
|
|
access_key: str = ""
|
|
timeout: float = 9.0
|
|
instance_name: str = "" # Optional store name (e.g., 'home') for namespaced logs
|
|
|
|
scheme: str = field(init=False)
|
|
hostname: str = field(init=False)
|
|
port: int = field(init=False)
|
|
base_path: str = field(init=False)
|
|
_session_key: str = field(init=False, default="", repr=False) # Cached session key
|
|
|
|
def __post_init__(self) -> None:
|
|
if not self.url:
|
|
raise ValueError("Hydrus base URL is required")
|
|
self.url = self.url.rstrip("/")
|
|
parsed = urlsplit(self.url)
|
|
if parsed.scheme not in {"http",
|
|
"https"}:
|
|
raise ValueError("Hydrus base URL must use http or https")
|
|
self.scheme = parsed.scheme
|
|
self.hostname = parsed.hostname or "localhost"
|
|
self.port = parsed.port or (443 if self.scheme == "https" else 80)
|
|
self.base_path = parsed.path.rstrip("/")
|
|
self.access_key = self.access_key or ""
|
|
self.instance_name = str(self.instance_name or "").strip()
|
|
|
|
def _log_prefix(self) -> str:
|
|
if self.instance_name:
|
|
return f"[hydrusnetwork:{self.instance_name}]"
|
|
return f"[hydrusnetwork:{self.hostname}:{self.port}]"
|
|
|
|
# ------------------------------------------------------------------
|
|
# low-level helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
def _build_path(self, endpoint: str, query: dict[str, Any] | None = None) -> str:
|
|
path = endpoint if endpoint.startswith("/") else f"/{endpoint}"
|
|
if self.base_path:
|
|
path = f"{self.base_path}{path}"
|
|
if query:
|
|
encoded = urlencode(query, doseq=True)
|
|
if encoded:
|
|
path = f"{path}?{encoded}"
|
|
return path
|
|
|
|
def _perform_request(self, spec: HydrusRequestSpec) -> Any:
|
|
headers: dict[str,
|
|
str] = {}
|
|
|
|
# Use session key if available, otherwise use access key
|
|
if self._session_key:
|
|
headers["Hydrus-Client-API-Session-Key"] = self._session_key
|
|
elif self.access_key:
|
|
headers["Hydrus-Client-API-Access-Key"] = self.access_key
|
|
if spec.accept:
|
|
headers["Accept"] = spec.accept
|
|
|
|
path = self._build_path(spec.endpoint, spec.query)
|
|
url = f"{self.scheme}://{self.hostname}:{self.port}{path}"
|
|
|
|
# Log request details
|
|
logger.debug(
|
|
f"{self._log_prefix()} {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})"
|
|
)
|
|
|
|
status = 0
|
|
reason = ""
|
|
body = b""
|
|
content_type = ""
|
|
|
|
try:
|
|
with HTTPClient(timeout=self.timeout,
|
|
headers=headers,
|
|
verify_ssl=False) as client:
|
|
response = None
|
|
|
|
if spec.file_path is not None:
|
|
file_path = Path(spec.file_path)
|
|
if not file_path.is_file():
|
|
error_msg = f"Upload file not found: {file_path}"
|
|
logger.error(f"{self._log_prefix()} {error_msg}")
|
|
raise FileNotFoundError(error_msg)
|
|
|
|
file_size = file_path.stat().st_size
|
|
headers["Content-Type"
|
|
] = spec.content_type or "application/octet-stream"
|
|
# Do not set Content-Length when streaming an iterator body.
|
|
# If the file size changes between stat() and read() (or the source is truncated),
|
|
# h11 will raise: "Too little data for declared Content-Length".
|
|
# Let httpx choose chunked transfer encoding for safety.
|
|
headers.pop("Content-Length", None)
|
|
|
|
logger.debug(
|
|
f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)"
|
|
)
|
|
|
|
# Stream upload body with a stderr progress bar (pipeline-safe).
|
|
from SYS.models import ProgressBar
|
|
|
|
bar = ProgressBar()
|
|
# Keep the PipelineLiveProgress transfer line clean: show the file name.
|
|
# (The hydrus instance/service is already visible in the logs above.)
|
|
label = str(getattr(file_path, "name", None) or "upload")
|
|
start_t = time.time()
|
|
last_render_t = [start_t]
|
|
sent = [0]
|
|
|
|
def _render_progress(final: bool = False) -> None:
|
|
if file_size <= 0:
|
|
return
|
|
now = time.time()
|
|
if not final and (now - float(last_render_t[0])) < 0.25:
|
|
return
|
|
last_render_t[0] = now
|
|
bar.update(
|
|
downloaded=int(sent[0]),
|
|
total=int(file_size),
|
|
label=str(label),
|
|
file=sys.stderr,
|
|
)
|
|
if final:
|
|
bar.finish()
|
|
|
|
def file_gen():
|
|
try:
|
|
with file_path.open("rb") as handle:
|
|
while True:
|
|
chunk = handle.read(256 * 1024)
|
|
if not chunk:
|
|
break
|
|
sent[0] += len(chunk)
|
|
_render_progress(final=False)
|
|
yield chunk
|
|
finally:
|
|
_render_progress(final=True)
|
|
|
|
response = client.request(
|
|
spec.method,
|
|
url,
|
|
content=file_gen(),
|
|
headers=headers,
|
|
raise_for_status=False,
|
|
log_http_errors=False,
|
|
)
|
|
else:
|
|
content = None
|
|
json_data = None
|
|
if spec.data is not None:
|
|
if isinstance(spec.data, (bytes, bytearray)):
|
|
content = spec.data
|
|
else:
|
|
json_data = spec.data
|
|
# Hydrus expects JSON bodies to be sent with Content-Type: application/json.
|
|
# httpx will usually set this automatically, but we set it explicitly to
|
|
# match the Hydrus API docs and avoid edge cases.
|
|
headers.setdefault("Content-Type", "application/json")
|
|
logger.debug(
|
|
f"{self._log_prefix()} Request body size: {len(content) if content else 'json'}"
|
|
)
|
|
|
|
response = client.request(
|
|
spec.method,
|
|
url,
|
|
content=content,
|
|
json=json_data,
|
|
headers=headers,
|
|
raise_for_status=False,
|
|
log_http_errors=False,
|
|
)
|
|
|
|
status = response.status_code
|
|
reason = response.reason_phrase
|
|
body = response.content
|
|
content_type = response.headers.get("Content-Type", "") or ""
|
|
|
|
logger.debug(
|
|
f"{self._log_prefix()} Response {status} {reason} ({len(body)} bytes)"
|
|
)
|
|
|
|
except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError) as exc:
|
|
msg = f"Hydrus unavailable: {exc}"
|
|
logger.warning(f"{self._log_prefix()} {msg}")
|
|
raise HydrusConnectionError(msg) from exc
|
|
except Exception as exc:
|
|
logger.error(f"{self._log_prefix()} Connection error: {exc}", exc_info=True)
|
|
raise
|
|
|
|
payload: Any
|
|
payload = {}
|
|
if body:
|
|
content_main = content_type.split(";", 1)[0].strip().lower()
|
|
if "json" in content_main:
|
|
try:
|
|
payload = json.loads(body.decode("utf-8"))
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
payload = body.decode("utf-8", "replace")
|
|
elif "cbor" in content_main:
|
|
try:
|
|
payload = decode_cbor(body)
|
|
except Exception:
|
|
payload = body
|
|
else:
|
|
payload = body
|
|
|
|
if status >= 400:
|
|
message = ""
|
|
if isinstance(payload, dict):
|
|
message = str(payload.get("message") or payload.get("error") or payload)
|
|
elif isinstance(payload, str):
|
|
message = payload
|
|
else:
|
|
message = reason or "HTTP error"
|
|
|
|
# Some endpoints are naturally "missing" sometimes and should not spam logs.
|
|
if status == 404 and spec.endpoint.rstrip("/") == "/get_files/file_path":
|
|
return {}
|
|
|
|
logger.error(f"{self._log_prefix()} HTTP {status}: {message}")
|
|
|
|
# Handle expired session key (419) by clearing cache and retrying once
|
|
if status == 419 and self._session_key and "session" in message.lower():
|
|
logger.warning(
|
|
f"{self._log_prefix()} Session key expired, acquiring new one and retrying..."
|
|
)
|
|
self._session_key = "" # Clear expired session key
|
|
try:
|
|
self._acquire_session_key()
|
|
# Retry the request with new session key
|
|
return self._perform_request(spec)
|
|
except Exception as retry_error:
|
|
logger.error(
|
|
f"{self._log_prefix()} Retry failed: {retry_error}",
|
|
exc_info=True
|
|
)
|
|
# If retry fails, raise the original error
|
|
raise HydrusRequestError(status, message, payload) from retry_error
|
|
|
|
raise HydrusRequestError(status, message, payload)
|
|
|
|
return payload
|
|
|
|
def _acquire_session_key(self) -> str:
|
|
"""Acquire a session key from the Hydrus API using the access key.
|
|
|
|
Session keys are temporary authentication tokens that expire after 24 hours
|
|
of inactivity, client restart, or if the access key is deleted. They are
|
|
more secure than passing access keys in every request.
|
|
|
|
Returns the session key string.
|
|
Raises HydrusRequestError if the request fails.
|
|
"""
|
|
if not self.access_key:
|
|
raise HydrusRequestError(
|
|
401,
|
|
"Cannot acquire session key: no access key configured"
|
|
)
|
|
|
|
# Temporarily use access key to get session key
|
|
original_session_key = self._session_key
|
|
try:
|
|
self._session_key = "" # Clear session key to use access key for this request
|
|
|
|
result = self._get("/session_key")
|
|
session_key = result.get("session_key")
|
|
|
|
if not session_key:
|
|
raise HydrusRequestError(
|
|
500,
|
|
"Session key response missing 'session_key' field",
|
|
result
|
|
)
|
|
|
|
self._session_key = session_key
|
|
return session_key
|
|
except HydrusRequestError:
|
|
self._session_key = original_session_key
|
|
raise
|
|
except Exception as e:
|
|
self._session_key = original_session_key
|
|
raise HydrusRequestError(500, f"Failed to acquire session key: {e}")
|
|
|
|
def ensure_session_key(self) -> str:
|
|
"""Ensure a valid session key exists, acquiring one if needed.
|
|
|
|
Returns the session key. If one is already cached, returns it.
|
|
Otherwise acquires a new session key from the API.
|
|
"""
|
|
if self._session_key:
|
|
return self._session_key
|
|
return self._acquire_session_key()
|
|
|
|
def _get(self,
|
|
endpoint: str,
|
|
*,
|
|
query: dict[str,
|
|
Any] | None = None) -> dict[str,
|
|
Any]:
|
|
spec = HydrusRequestSpec("GET", endpoint, query=query)
|
|
return cast(dict[str, Any], self._perform_request(spec))
|
|
|
|
def _post(
|
|
self,
|
|
endpoint: str,
|
|
*,
|
|
data: dict[str,
|
|
Any] | None = None,
|
|
file_path: Path | None = None,
|
|
content_type: str | None = None,
|
|
) -> dict[str,
|
|
Any]:
|
|
spec = HydrusRequestSpec(
|
|
"POST",
|
|
endpoint,
|
|
data=data,
|
|
file_path=file_path,
|
|
content_type=content_type
|
|
)
|
|
return cast(dict[str, Any], self._perform_request(spec))
|
|
|
|
def _ensure_hashes(self, hash: Union[str, Iterable[str]]) -> list[str]:
|
|
if isinstance(hash, str):
|
|
return [hash]
|
|
return list(hash)
|
|
|
|
def _append_access_key(self, url: str) -> str:
|
|
if not self.access_key:
|
|
return url
|
|
separator = "&" if "?" in url else "?"
|
|
# Use the correct parameter name for Hydrus API compatibility
|
|
return f"{url}{separator}access_key={quote(self.access_key)}"
|
|
|
|
def add_file(self, path: Union[str, Path]) -> dict[str, Any]:
|
|
"""Add a file to Hydrus using the octet-stream upload mode.
|
|
|
|
This mirrors the Hydrus API POST /add_files/add_file behavior when sending
|
|
the file bytes as the POST body. The method accepts either a filesystem
|
|
`Path` or a string path and will raise FileNotFoundError if the target
|
|
path is not a readable file.
|
|
"""
|
|
# Accept both Path and str for convenience
|
|
file_path = Path(path) if not isinstance(path, Path) else path
|
|
if not file_path.is_file():
|
|
raise FileNotFoundError(f"Upload file not found: {file_path}")
|
|
|
|
# Forward as file_path so the request body is streamed as application/octet-stream
|
|
return self._post("/add_files/add_file", file_path=file_path)
|
|
|
|
def undelete_files(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]:
|
|
"""Restore files from Hydrus trash back into 'my files'.
|
|
|
|
Hydrus Client API: POST /add_files/undelete_files
|
|
Required JSON args: {"hashes": [<sha256 hex>, ...]}
|
|
"""
|
|
hash_list = self._ensure_hashes(hashes)
|
|
body = {
|
|
"hashes": hash_list
|
|
}
|
|
return self._post("/add_files/undelete_files", data=body)
|
|
|
|
def delete_files(
|
|
self,
|
|
hashes: Union[str,
|
|
Iterable[str]],
|
|
*,
|
|
reason: str | None = None
|
|
) -> dict[str,
|
|
Any]:
|
|
"""Delete files in Hydrus.
|
|
|
|
Hydrus Client API: POST /add_files/delete_files
|
|
Required JSON args: {"hashes": [<sha256 hex>, ...]}
|
|
Optional JSON args: {"reason": "..."}
|
|
"""
|
|
hash_list = self._ensure_hashes(hashes)
|
|
body: dict[str,
|
|
Any] = {
|
|
"hashes": hash_list
|
|
}
|
|
if isinstance(reason, str) and reason.strip():
|
|
body["reason"] = reason.strip()
|
|
return self._post("/add_files/delete_files", data=body)
|
|
|
|
def clear_file_deletion_record(self,
|
|
hashes: Union[str,
|
|
Iterable[str]]) -> dict[str,
|
|
Any]:
|
|
"""Clear Hydrus's file deletion record for the provided hashes.
|
|
|
|
Hydrus Client API: POST /add_files/clear_file_deletion_record
|
|
Required JSON args: {"hashes": [<sha256 hex>, ...]}
|
|
"""
|
|
hash_list = self._ensure_hashes(hashes)
|
|
body = {
|
|
"hashes": hash_list
|
|
}
|
|
return self._post("/add_files/clear_file_deletion_record", data=body)
|
|
|
|
def add_tag(
|
|
self,
|
|
hash: Union[str,
|
|
Iterable[str]],
|
|
tags: Iterable[str],
|
|
service_name: str
|
|
) -> dict[str,
|
|
Any]:
|
|
hash = self._ensure_hashes(hash)
|
|
body = {
|
|
"hashes": hash,
|
|
"service_names_to_tags": {
|
|
service_name: list(tags)
|
|
}
|
|
}
|
|
return self._post("/add_tags/add_tags", data=body)
|
|
|
|
def delete_tag(
|
|
self,
|
|
file_hashes: Union[str,
|
|
Iterable[str]],
|
|
tags: Iterable[str],
|
|
service_name: str,
|
|
*,
|
|
action: int = 1,
|
|
) -> dict[str,
|
|
Any]:
|
|
hashes = self._ensure_hashes(file_hashes)
|
|
body = {
|
|
"hashes": hashes,
|
|
"service_names_to_actions_to_tags": {
|
|
service_name: {
|
|
action: list(tags)
|
|
}
|
|
},
|
|
}
|
|
return self._post("/add_tags/add_tags", data=body)
|
|
|
|
def add_tags_by_key(
|
|
self,
|
|
hash: Union[str,
|
|
Iterable[str]],
|
|
tags: Iterable[str],
|
|
service_key: str
|
|
) -> dict[str,
|
|
Any]:
|
|
hash = self._ensure_hashes(hash)
|
|
body = {
|
|
"hashes": hash,
|
|
"service_keys_to_tags": {
|
|
service_key: list(tags)
|
|
}
|
|
}
|
|
return self._post("/add_tags/add_tags", data=body)
|
|
|
|
def delete_tags_by_key(
|
|
self,
|
|
file_hashes: Union[str,
|
|
Iterable[str]],
|
|
tags: Iterable[str],
|
|
service_key: str,
|
|
*,
|
|
action: int = 1,
|
|
) -> dict[str,
|
|
Any]:
|
|
hashes = self._ensure_hashes(file_hashes)
|
|
body = {
|
|
"hashes": hashes,
|
|
"service_keys_to_actions_to_tags": {
|
|
service_key: {
|
|
action: list(tags)
|
|
}
|
|
},
|
|
}
|
|
return self._post("/add_tags/add_tags", data=body)
|
|
|
|
def mutate_tags_by_key(
|
|
self,
|
|
hash: Union[str,
|
|
Iterable[str]],
|
|
service_key: str,
|
|
*,
|
|
add_tags: Optional[Iterable[str]] = None,
|
|
remove_tags: Optional[Iterable[str]] = None,
|
|
) -> dict[str,
|
|
Any]:
|
|
"""Add or remove tags with a single /add_tags/add_tags call.
|
|
|
|
Hydrus Client API: POST /add_tags/add_tags
|
|
Use `service_keys_to_actions_to_tags` so the client can apply additions
|
|
and removals in a single request (action '0' = add, '1' = remove).
|
|
"""
|
|
hash_list = self._ensure_hashes(hash)
|
|
def _clean(tags: Optional[Iterable[str]]) -> list[str]:
|
|
if not tags:
|
|
return []
|
|
clean_list: list[str] = []
|
|
for tag in tags:
|
|
if not isinstance(tag, str):
|
|
continue
|
|
text = tag.strip()
|
|
if not text:
|
|
continue
|
|
clean_list.append(text)
|
|
return clean_list
|
|
|
|
actions: dict[str, list[str]] = {}
|
|
adds = _clean(add_tags)
|
|
removes = _clean(remove_tags)
|
|
if adds:
|
|
actions["0"] = adds
|
|
if removes:
|
|
actions["1"] = removes
|
|
if not actions:
|
|
return {}
|
|
body = {
|
|
"hashes": hash_list,
|
|
"service_keys_to_actions_to_tags": {
|
|
str(service_key): actions
|
|
},
|
|
}
|
|
return self._post("/add_tags/add_tags", data=body)
|
|
|
|
def associate_url(self,
|
|
file_hashes: Union[str,
|
|
Iterable[str]],
|
|
url: str) -> dict[str,
|
|
Any]:
|
|
hashes = self._ensure_hashes(file_hashes)
|
|
if len(hashes) == 1:
|
|
body = {
|
|
"hash": hashes[0],
|
|
"url_to_add": url
|
|
}
|
|
return self._post("/add_urls/associate_url", data=body)
|
|
|
|
results: dict[str,
|
|
Any] = {}
|
|
for file_hash in hashes:
|
|
body = {
|
|
"hash": file_hash,
|
|
"url_to_add": url
|
|
}
|
|
results[file_hash] = self._post("/add_urls/associate_url", data=body)
|
|
return {
|
|
"batched": results
|
|
}
|
|
|
|
def get_url_info(self, url: str) -> dict[str, Any]:
|
|
"""Get information about a URL.
|
|
|
|
Hydrus Client API: GET /add_urls/get_url_info
|
|
Docs: https://hydrusnetwork.github.io/hydrus/developer_api.html#add_urls_get_url_info
|
|
"""
|
|
url = str(url or "").strip()
|
|
if not url:
|
|
raise ValueError("url must not be empty")
|
|
|
|
spec = HydrusRequestSpec(
|
|
method="GET",
|
|
endpoint="/add_urls/get_url_info",
|
|
query={
|
|
"url": url
|
|
},
|
|
)
|
|
return cast(dict[str, Any], self._perform_request(spec))
|
|
|
|
def delete_url(self,
|
|
file_hashes: Union[str,
|
|
Iterable[str]],
|
|
url: str) -> dict[str,
|
|
Any]:
|
|
hashes = self._ensure_hashes(file_hashes)
|
|
if len(hashes) == 1:
|
|
body = {
|
|
"hash": hashes[0],
|
|
"url_to_delete": url
|
|
}
|
|
return self._post("/add_urls/associate_url", data=body)
|
|
|
|
results: dict[str,
|
|
Any] = {}
|
|
for file_hash in hashes:
|
|
body = {
|
|
"hash": file_hash,
|
|
"url_to_delete": url
|
|
}
|
|
results[file_hash] = self._post("/add_urls/associate_url", data=body)
|
|
return {
|
|
"batched": results
|
|
}
|
|
|
|
def set_notes(
|
|
self,
|
|
file_hash: str,
|
|
notes: dict[str,
|
|
str],
|
|
*,
|
|
merge_cleverly: bool = False,
|
|
extend_existing_note_if_possible: bool = True,
|
|
conflict_resolution: int = 3,
|
|
) -> dict[str,
|
|
Any]:
|
|
"""Add or update notes associated with a file.
|
|
|
|
Hydrus Client API: POST /add_notes/set_notes
|
|
Required JSON args: {"hash": <sha256 hex>, "notes": {name: text}}
|
|
"""
|
|
if not notes:
|
|
raise ValueError("notes mapping must not be empty")
|
|
|
|
file_hash = str(file_hash or "").strip().lower()
|
|
if not file_hash:
|
|
raise ValueError("file_hash must not be empty")
|
|
|
|
body: dict[str,
|
|
Any] = {
|
|
"hash": file_hash,
|
|
"notes": notes
|
|
}
|
|
|
|
if merge_cleverly:
|
|
body["merge_cleverly"] = True
|
|
body["extend_existing_note_if_possible"] = bool(
|
|
extend_existing_note_if_possible
|
|
)
|
|
body["conflict_resolution"] = int(conflict_resolution)
|
|
return self._post("/add_notes/set_notes", data=body)
|
|
|
|
def delete_notes(
|
|
self,
|
|
file_hash: str,
|
|
note_names: Sequence[str],
|
|
) -> dict[str,
|
|
Any]:
|
|
"""Delete notes associated with a file.
|
|
|
|
Hydrus Client API: POST /add_notes/delete_notes
|
|
Required JSON args: {"hash": <sha256 hex>, "note_names": [..]}
|
|
"""
|
|
names = [str(name) for name in note_names if str(name or "").strip()]
|
|
if not names:
|
|
raise ValueError("note_names must not be empty")
|
|
|
|
file_hash = str(file_hash or "").strip().lower()
|
|
if not file_hash:
|
|
raise ValueError("file_hash must not be empty")
|
|
|
|
body = {
|
|
"hash": file_hash,
|
|
"note_names": names
|
|
}
|
|
return self._post("/add_notes/delete_notes", data=body)
|
|
|
|
def get_file_relationships(self, file_hash: str) -> dict[str, Any]:
|
|
query = {
|
|
"hash": file_hash
|
|
}
|
|
return self._get(
|
|
"/manage_file_relationships/get_file_relationships",
|
|
query=query
|
|
)
|
|
|
|
def set_relationship(
|
|
self,
|
|
hash_a: str,
|
|
hash_b: str,
|
|
relationship: Union[str,
|
|
int],
|
|
do_default_content_merge: bool = False,
|
|
) -> dict[str,
|
|
Any]:
|
|
"""Set a relationship between two files in Hydrus.
|
|
|
|
This wraps Hydrus Client API: POST /manage_file_relationships/set_file_relationships.
|
|
|
|
Hydrus relationship enum (per Hydrus developer API docs):
|
|
- 0: set as potential duplicates
|
|
- 1: set as false positives
|
|
- 2: set as same quality (duplicates)
|
|
- 3: set as alternates
|
|
- 4: set A as better (duplicates)
|
|
|
|
Args:
|
|
hash_a: First file SHA256 hex
|
|
hash_b: Second file SHA256 hex
|
|
relationship: Relationship type as string or integer enum (0-4)
|
|
do_default_content_merge: Whether to perform default duplicate content merge
|
|
|
|
Returns:
|
|
Response from Hydrus API
|
|
"""
|
|
# Convert string relationship types to integers
|
|
if isinstance(relationship, str):
|
|
rel_map = {
|
|
# Potential duplicates
|
|
"potential": 0,
|
|
"potentials": 0,
|
|
"potential duplicate": 0,
|
|
"potential duplicates": 0,
|
|
# False positives
|
|
"false positive": 1,
|
|
"false_positive": 1,
|
|
"false positives": 1,
|
|
"false_positives": 1,
|
|
"not related": 1,
|
|
"not_related": 1,
|
|
# Duplicates (same quality)
|
|
"duplicate": 2,
|
|
"duplicates": 2,
|
|
"same quality": 2,
|
|
"same_quality": 2,
|
|
"equal": 2,
|
|
# Alternates
|
|
"alt": 3,
|
|
"alternate": 3,
|
|
"alternates": 3,
|
|
"alternative": 3,
|
|
"related": 3,
|
|
# Better/worse (duplicates)
|
|
"better": 4,
|
|
"a better": 4,
|
|
"a_better": 4,
|
|
# Back-compat: some older call sites used 'king' for primary.
|
|
# Hydrus does not accept 'king' as a relationship; this maps to 'A is better'.
|
|
"king": 4,
|
|
}
|
|
relationship = rel_map.get(
|
|
relationship.lower().strip(),
|
|
3
|
|
) # Default to alternates
|
|
|
|
body = {
|
|
"relationships": [
|
|
{
|
|
"hash_a": hash_a,
|
|
"hash_b": hash_b,
|
|
"relationship": relationship,
|
|
"do_default_content_merge": do_default_content_merge,
|
|
}
|
|
]
|
|
}
|
|
return self._post(
|
|
"/manage_file_relationships/set_file_relationships",
|
|
data=body
|
|
)
|
|
|
|
def get_services(self) -> dict[str, Any]:
|
|
return self._get("/get_services")
|
|
|
|
def search_files(
|
|
self,
|
|
tags: Sequence[Any],
|
|
*,
|
|
file_service_name: str | None = None,
|
|
return_hashes: bool = False,
|
|
return_file_ids: bool = True,
|
|
return_file_count: bool = False,
|
|
include_current_tags: bool | None = None,
|
|
include_pending_tags: bool | None = None,
|
|
file_sort_type: int | None = None,
|
|
file_sort_asc: bool | None = None,
|
|
file_sort_key: str | None = None,
|
|
) -> dict[str,
|
|
Any]:
|
|
if not tags:
|
|
raise ValueError("tags must not be empty")
|
|
|
|
query: dict[str,
|
|
Any] = {}
|
|
query_fields = [
|
|
("tags",
|
|
tags, lambda v: json.dumps(list(v))),
|
|
("file_service_name",
|
|
file_service_name, lambda v: v),
|
|
("return_hashes",
|
|
return_hashes, lambda v: "true" if v else None),
|
|
("return_file_ids",
|
|
return_file_ids, lambda v: "true" if v else None),
|
|
("return_file_count",
|
|
return_file_count, lambda v: "true" if v else None),
|
|
(
|
|
"include_current_tags",
|
|
include_current_tags,
|
|
lambda v: "true" if v else "false" if v is not None else None,
|
|
),
|
|
(
|
|
"include_pending_tags",
|
|
include_pending_tags,
|
|
lambda v: "true" if v else "false" if v is not None else None,
|
|
),
|
|
(
|
|
"file_sort_type",
|
|
file_sort_type, lambda v: str(v) if v is not None else None
|
|
),
|
|
(
|
|
"file_sort_asc",
|
|
file_sort_asc,
|
|
lambda v: "true" if v else "false" if v is not None else None,
|
|
),
|
|
("file_sort_key",
|
|
file_sort_key, lambda v: v),
|
|
]
|
|
|
|
for key, value, formatter in query_fields:
|
|
if value is None or value == []:
|
|
continue
|
|
formatted = formatter(value)
|
|
if formatted is not None:
|
|
query[key] = formatted
|
|
|
|
return self._get("/get_files/search_files", query=query)
|
|
|
|
def fetch_file_metadata(
|
|
self,
|
|
*,
|
|
file_ids: Sequence[int] | None = None,
|
|
hashes: Sequence[str] | None = None,
|
|
include_service_keys_to_tags: bool = True,
|
|
include_file_url: bool = False,
|
|
include_duration: bool = True,
|
|
include_size: bool = True,
|
|
include_mime: bool = False,
|
|
include_notes: bool = False,
|
|
) -> dict[str,
|
|
Any]:
|
|
if not file_ids and not hashes:
|
|
raise ValueError("Either file_ids or hashes must be provided")
|
|
|
|
query: dict[str,
|
|
Any] = {}
|
|
query_fields = [
|
|
("file_ids",
|
|
file_ids, lambda v: json.dumps(list(v))),
|
|
("hashes",
|
|
hashes, lambda v: json.dumps(list(v))),
|
|
(
|
|
"include_service_keys_to_tags",
|
|
include_service_keys_to_tags,
|
|
lambda v: "true" if v else None,
|
|
),
|
|
("include_file_url",
|
|
include_file_url, lambda v: "true" if v else None),
|
|
("include_duration",
|
|
include_duration, lambda v: "true" if v else None),
|
|
("include_size",
|
|
include_size, lambda v: "true" if v else None),
|
|
("include_mime",
|
|
include_mime, lambda v: "true" if v else None),
|
|
("include_notes",
|
|
include_notes, lambda v: "true" if v else None),
|
|
]
|
|
|
|
for key, value, formatter in query_fields:
|
|
if not value:
|
|
continue
|
|
formatted = formatter(value)
|
|
if formatted is not None:
|
|
query[key] = formatted
|
|
|
|
return self._get("/get_files/file_metadata", query=query)
|
|
|
|
def get_file_path(self, file_hash: str) -> dict[str, Any]:
|
|
"""Get the local file system path for a given file hash."""
|
|
query = {
|
|
"hash": file_hash
|
|
}
|
|
return self._get("/get_files/file_path", query=query)
|
|
|
|
def file_url(self, file_hash: str) -> str:
|
|
hash_param = quote(file_hash)
|
|
# Don't append access_key parameter for file downloads - use header instead
|
|
url = f"{self.url}/get_files/file?hash={hash_param}"
|
|
return url
|
|
|
|
def thumbnail_url(self, file_hash: str) -> str:
|
|
hash_param = quote(file_hash)
|
|
# Don't append access_key parameter for file downloads - use header instead
|
|
url = f"{self.url}/get_files/thumbnail?hash={hash_param}"
|
|
return url
|
|
|
|
|
|
HydrusCliOptionsT = TypeVar("HydrusCliOptionsT", bound="HydrusCliOptions")
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class HydrusCliOptions:
|
|
url: str
|
|
method: str
|
|
access_key: str
|
|
accept: str
|
|
timeout: float
|
|
content_type: str | None
|
|
body_bytes: bytes | None = None
|
|
body_path: Path | None = None
|
|
debug: bool = False
|
|
|
|
@classmethod
|
|
def from_namespace(
|
|
cls: Type[HydrusCliOptionsT],
|
|
namespace: Any
|
|
) -> HydrusCliOptionsT:
|
|
accept_header = namespace.accept or "application/cbor"
|
|
body_bytes: bytes | None = None
|
|
body_path: Path | None = None
|
|
if namespace.body_file:
|
|
body_path = Path(namespace.body_file)
|
|
elif namespace.body is not None:
|
|
body_bytes = namespace.body.encode("utf-8")
|
|
return cls(
|
|
url=namespace.url,
|
|
method=namespace.method.upper(),
|
|
access_key=namespace.access_key or "",
|
|
accept=accept_header,
|
|
timeout=namespace.timeout,
|
|
content_type=namespace.content_type,
|
|
body_bytes=body_bytes,
|
|
body_path=body_path,
|
|
debug=bool(os.environ.get("DOWNLOW_DEBUG")),
|
|
)
|
|
|
|
|
|
def hydrus_request(args, parser) -> int:
|
|
if args.body and args.body_file:
|
|
parser.error("Only one of --body or --body-file may be supplied")
|
|
|
|
options = HydrusCliOptions.from_namespace(args)
|
|
|
|
parsed = urlsplit(options.url)
|
|
if parsed.scheme not in ("http", "https"):
|
|
parser.error("Only http and https url are supported")
|
|
if not parsed.hostname:
|
|
parser.error("Invalid Hydrus URL")
|
|
|
|
headers: dict[str,
|
|
str] = {}
|
|
if options.access_key:
|
|
headers["Hydrus-Client-API-Access-Key"] = options.access_key
|
|
if options.accept:
|
|
headers["Accept"] = options.accept
|
|
|
|
request_body_bytes: bytes | None = None
|
|
body_path: Path | None = None
|
|
if options.body_path is not None:
|
|
body_path = options.body_path
|
|
if not body_path.is_file():
|
|
parser.error(f"File not found: {body_path}")
|
|
headers.setdefault(
|
|
"Content-Type",
|
|
options.content_type or "application/octet-stream"
|
|
)
|
|
headers["Content-Length"] = str(body_path.stat().st_size)
|
|
elif options.body_bytes is not None:
|
|
request_body_bytes = options.body_bytes
|
|
headers["Content-Type"] = options.content_type or "application/json"
|
|
assert request_body_bytes is not None
|
|
headers["Content-Length"] = str(len(request_body_bytes))
|
|
elif options.content_type:
|
|
headers["Content-Type"] = options.content_type
|
|
|
|
if parsed.username or parsed.password:
|
|
userinfo = f"{parsed.username or ''}:{parsed.password or ''}".encode("utf-8")
|
|
headers["Authorization"] = "Basic " + base64.b64encode(userinfo).decode("ascii")
|
|
|
|
path = parsed.path or "/"
|
|
if parsed.query:
|
|
path += "?" + parsed.query
|
|
|
|
port = parsed.port
|
|
if port is None:
|
|
port = 443 if parsed.scheme == "https" else 80
|
|
|
|
connection_cls = (
|
|
http.client.HTTPSConnection
|
|
if parsed.scheme == "https" else http.client.HTTPConnection
|
|
)
|
|
host = parsed.hostname or "localhost"
|
|
connection = connection_cls(host, port, timeout=options.timeout)
|
|
|
|
if options.debug:
|
|
log(
|
|
f"Hydrus connecting to {parsed.scheme}://{host}:{port}{path}",
|
|
file=sys.stderr
|
|
)
|
|
response_bytes: bytes = b""
|
|
content_type = ""
|
|
status = 0
|
|
try:
|
|
if body_path is not None:
|
|
with body_path.open("rb") as handle:
|
|
if options.debug:
|
|
size_hint = headers.get("Content-Length", "unknown")
|
|
log(
|
|
f"Hydrus sending file body ({size_hint} bytes)",
|
|
file=sys.stderr
|
|
)
|
|
connection.putrequest(options.method, path)
|
|
host_header = host
|
|
if (parsed.scheme == "http"
|
|
and port not in (80,
|
|
None)) or (parsed.scheme == "https"
|
|
and port not in (443,
|
|
None)):
|
|
host_header = f"{host}:{port}"
|
|
connection.putheader("Host", host_header)
|
|
for key, value in headers.items():
|
|
if value:
|
|
connection.putheader(key, value)
|
|
connection.endheaders()
|
|
while True:
|
|
chunk = handle.read(65536)
|
|
if not chunk:
|
|
break
|
|
connection.send(chunk)
|
|
if options.debug:
|
|
log(
|
|
"[downlow.py] Hydrus upload complete; awaiting response",
|
|
file=sys.stderr
|
|
)
|
|
else:
|
|
if options.debug:
|
|
size_hint = "none" if request_body_bytes is None else str(
|
|
len(request_body_bytes)
|
|
)
|
|
log(f"Hydrus sending request body bytes={size_hint}", file=sys.stderr)
|
|
sanitized_headers = {
|
|
k: v
|
|
for k, v in headers.items() if v
|
|
}
|
|
connection.request(
|
|
options.method,
|
|
path,
|
|
body=request_body_bytes,
|
|
headers=sanitized_headers
|
|
)
|
|
response = connection.getresponse()
|
|
status = response.status
|
|
response_bytes = response.read()
|
|
if options.debug:
|
|
log(
|
|
f"Hydrus response received ({len(response_bytes)} bytes)",
|
|
file=sys.stderr
|
|
)
|
|
content_type = response.getheader("Content-Type", "")
|
|
except (OSError, http.client.HTTPException) as exc:
|
|
log(f"HTTP error: {exc}", file=sys.stderr)
|
|
return 1
|
|
finally:
|
|
connection.close()
|
|
content_type_lower = (content_type or "").split(";", 1)[0].strip().lower()
|
|
accept_value = options.accept or ""
|
|
expect_cbor = "cbor" in (content_type_lower or "") or "cbor" in accept_value.lower()
|
|
payload = None
|
|
decode_error: Exception | None = None
|
|
if response_bytes:
|
|
if expect_cbor:
|
|
try:
|
|
payload = decode_cbor(response_bytes)
|
|
except Exception as exc: # pragma: no cover - library errors surfaced
|
|
decode_error = exc
|
|
if payload is None and not expect_cbor:
|
|
try:
|
|
payload = json.loads(response_bytes.decode("utf-8"))
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
payload = response_bytes.decode("utf-8", "replace")
|
|
elif payload is None and expect_cbor and decode_error is not None:
|
|
log(
|
|
f"Expected CBOR response but decoding failed: {decode_error}",
|
|
file=sys.stderr
|
|
)
|
|
return 1
|
|
json_ready = jsonify(payload) if isinstance(payload, (dict, list)) else payload
|
|
if options.debug:
|
|
log(f"Hydrus {options.method} {options.url} -> {status}", file=sys.stderr)
|
|
if isinstance(json_ready, (dict, list)):
|
|
log(json.dumps(json_ready, ensure_ascii=False))
|
|
elif json_ready is None:
|
|
log("{}")
|
|
else:
|
|
log(json.dumps({
|
|
"value": json_ready
|
|
},
|
|
ensure_ascii=False))
|
|
return 0 if 200 <= status < 400 else 1
|
|
|
|
|
|
def hydrus_export(args, _parser) -> int:
|
|
from SYS.metadata import apply_mutagen_metadata, build_ffmpeg_command, prepare_ffmpeg_metadata
|
|
|
|
output_path: Path = args.output
|
|
original_suffix = output_path.suffix
|
|
target_dir = output_path.parent
|
|
metadata_payload: Optional[dict[str, Any]] = None
|
|
metadata_raw = getattr(args, "metadata_json", None)
|
|
if metadata_raw:
|
|
try:
|
|
parsed = json.loads(metadata_raw)
|
|
except json.JSONDecodeError as exc:
|
|
log(f"Invalid metadata JSON: {exc}", file=sys.stderr)
|
|
return 1
|
|
if isinstance(parsed, dict):
|
|
metadata_payload = parsed
|
|
else:
|
|
log("[downlow.py] Metadata JSON must decode to an object", file=sys.stderr)
|
|
return 1
|
|
ffmpeg_metadata = prepare_ffmpeg_metadata(metadata_payload)
|
|
|
|
def _normalise_ext(value: Optional[str]) -> Optional[str]:
|
|
if not value:
|
|
return None
|
|
cleaned = value.strip()
|
|
if not cleaned:
|
|
return None
|
|
if not cleaned.startswith("."): # tolerate inputs like "mp4"
|
|
cleaned = "." + cleaned.lstrip(".")
|
|
return cleaned
|
|
|
|
def _extension_from_mime(mime: Optional[str]) -> Optional[str]:
|
|
if not mime:
|
|
return None
|
|
mime_map = {
|
|
# Images / bitmaps
|
|
"image/jpeg": ".jpg",
|
|
"image/jpg": ".jpg",
|
|
"image/png": ".png",
|
|
"image/gif": ".gif",
|
|
"image/webp": ".webp",
|
|
"image/avif": ".avif",
|
|
"image/jxl": ".jxl", # JPEG XL
|
|
"image/bmp": ".bmp",
|
|
"image/heic": ".heic",
|
|
"image/heif": ".heif",
|
|
"image/x-icon": ".ico",
|
|
"image/vnd.microsoft.icon": ".ico",
|
|
"image/qoi": ".qoi", # Quite OK Image
|
|
"image/tiff": ".tiff",
|
|
"image/svg+xml": ".svg",
|
|
"image/vnd.adobe.photoshop": ".psd",
|
|
# Animation / sequence variants
|
|
"image/apng": ".apng",
|
|
"image/avif-sequence": ".avifs",
|
|
"image/heic-sequence": ".heics",
|
|
"image/heif-sequence": ".heifs",
|
|
# Video
|
|
"video/mp4": ".mp4",
|
|
"video/webm": ".webm",
|
|
"video/quicktime": ".mov",
|
|
"video/ogg": ".ogv",
|
|
"video/mpeg": ".mpeg",
|
|
"video/x-msvideo": ".avi",
|
|
"video/x-flv": ".flv",
|
|
"video/x-matroska": ".mkv",
|
|
"video/x-ms-wmv": ".wmv",
|
|
"video/vnd.rn-realvideo": ".rv",
|
|
# Audio
|
|
"audio/mpeg": ".mp3",
|
|
"audio/mp4": ".m4a",
|
|
"audio/ogg": ".ogg",
|
|
"audio/flac": ".flac",
|
|
"audio/wav": ".wav",
|
|
"audio/x-wav": ".wav",
|
|
"audio/x-ms-wma": ".wma",
|
|
"audio/x-tta": ".tta",
|
|
"audio/vnd.wave": ".wav",
|
|
"audio/x-wavpack": ".wv",
|
|
# Documents / office
|
|
"application/pdf": ".pdf",
|
|
"application/epub+zip": ".epub",
|
|
"application/vnd.djvu": ".djvu",
|
|
"application/rtf": ".rtf",
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
|
"application/msword": ".doc",
|
|
"application/vnd.ms-excel": ".xls",
|
|
"application/vnd.ms-powerpoint": ".ppt",
|
|
# Archive / comicbook / zip-like
|
|
"application/zip": ".zip",
|
|
"application/x-7z-compressed": ".7z",
|
|
"application/x-rar-compressed": ".rar",
|
|
"application/gzip": ".gz",
|
|
"application/x-tar": ".tar",
|
|
"application/x-cbz": ".cbz", # often just ZIP with images; CBZ is not an official mime type but used as mapping
|
|
# App / project / other
|
|
"application/clip": ".clip", # Clip Studio
|
|
"application/x-krita": ".kra",
|
|
"application/x-procreate": ".procreate",
|
|
"application/x-shockwave-flash": ".swf",
|
|
}
|
|
|
|
return mime_map.get(mime.lower())
|
|
|
|
def _extract_hash(file_url: str) -> Optional[str]:
|
|
match = re.search(r"[?&]hash=([0-9a-fA-F]+)", file_url)
|
|
return match.group(1) if match else None
|
|
|
|
# Ensure output and temp directories exist using global helper
|
|
for dir_path in [target_dir, Path(args.tmp_dir) if args.tmp_dir else target_dir]:
|
|
try:
|
|
ensure_directory(dir_path)
|
|
except RuntimeError as exc:
|
|
log(f"{exc}", file=sys.stderr)
|
|
return 1
|
|
|
|
source_suffix = _normalise_ext(getattr(args, "source_ext", None))
|
|
if source_suffix and source_suffix.lower() == ".bin":
|
|
source_suffix = None
|
|
|
|
if source_suffix is None:
|
|
hydrus_url = getattr(args, "hydrus_url", None)
|
|
if not hydrus_url:
|
|
try:
|
|
from SYS.config import load_config, get_hydrus_url
|
|
|
|
hydrus_url = get_hydrus_url(load_config())
|
|
except Exception as exc:
|
|
hydrus_url = None
|
|
if os.environ.get("DOWNLOW_DEBUG"):
|
|
log(
|
|
f"hydrus-export could not load Hydrus URL: {exc}",
|
|
file=sys.stderr
|
|
)
|
|
if hydrus_url:
|
|
try:
|
|
setattr(args, "hydrus_url", hydrus_url)
|
|
except Exception:
|
|
pass
|
|
resolved_suffix: Optional[str] = None
|
|
file_hash = getattr(args, "file_hash", None) or _extract_hash(args.file_url)
|
|
if hydrus_url and file_hash:
|
|
try:
|
|
client = HydrusNetwork(
|
|
url=hydrus_url,
|
|
access_key=args.access_key,
|
|
timeout=args.timeout
|
|
)
|
|
meta_response = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_mime=True
|
|
)
|
|
entries = meta_response.get("metadata") if isinstance(
|
|
meta_response,
|
|
dict
|
|
) else None
|
|
if isinstance(entries, list) and entries:
|
|
entry = entries[0]
|
|
ext_value = _normalise_ext(
|
|
entry.get("ext") if isinstance(entry,
|
|
dict) else None
|
|
)
|
|
if ext_value:
|
|
resolved_suffix = ext_value
|
|
else:
|
|
mime_value = entry.get("mime"
|
|
) if isinstance(entry,
|
|
dict) else None
|
|
resolved_suffix = _extension_from_mime(mime_value)
|
|
except Exception as exc: # pragma: no cover - defensive
|
|
if os.environ.get("DOWNLOW_DEBUG"):
|
|
log(f"hydrus metadata fetch failed: {exc}", file=sys.stderr)
|
|
if not resolved_suffix:
|
|
fallback_suffix = _normalise_ext(original_suffix)
|
|
if fallback_suffix and fallback_suffix.lower() == ".bin":
|
|
fallback_suffix = None
|
|
resolved_suffix = fallback_suffix or ".hydrus"
|
|
source_suffix = resolved_suffix
|
|
|
|
suffix = source_suffix or ".hydrus"
|
|
if suffix and output_path.suffix.lower() in {"",
|
|
".bin"}:
|
|
if output_path.suffix.lower() != suffix.lower():
|
|
output_path = output_path.with_suffix(suffix)
|
|
target_dir = output_path.parent
|
|
# Determine temp directory (prefer provided tmp_dir, fallback to output location)
|
|
temp_dir = Path(getattr(args, "tmp_dir", None) or target_dir)
|
|
try:
|
|
ensure_directory(temp_dir)
|
|
except RuntimeError:
|
|
temp_dir = target_dir
|
|
temp_file = tempfile.NamedTemporaryFile(
|
|
delete=False,
|
|
suffix=suffix,
|
|
dir=str(temp_dir)
|
|
)
|
|
temp_path = Path(temp_file.name)
|
|
temp_file.close()
|
|
downloaded_bytes = 0
|
|
headers = {
|
|
"Hydrus-Client-API-Access-Key": args.access_key,
|
|
}
|
|
try:
|
|
downloaded_bytes = download_hydrus_file(
|
|
args.file_url,
|
|
headers,
|
|
temp_path,
|
|
args.timeout
|
|
)
|
|
if os.environ.get("DOWNLOW_DEBUG"):
|
|
log(f"hydrus-export downloaded {downloaded_bytes} bytes", file=sys.stderr)
|
|
except httpx.RequestError as exc:
|
|
if temp_path.exists():
|
|
temp_path.unlink()
|
|
log(f"hydrus-export download failed: {exc}", file=sys.stderr)
|
|
return 1
|
|
except Exception as exc: # pragma: no cover - unexpected
|
|
if temp_path.exists():
|
|
temp_path.unlink()
|
|
log(f"hydrus-export error: {exc}", file=sys.stderr)
|
|
return 1
|
|
ffmpeg_log: Optional[str] = None
|
|
converted_tmp: Optional[Path] = None
|
|
try:
|
|
final_target = unique_path(output_path)
|
|
if args.format == "copy":
|
|
shutil.move(str(temp_path), str(final_target))
|
|
result_path = final_target
|
|
else:
|
|
ffmpeg_path = shutil.which("ffmpeg")
|
|
if not ffmpeg_path:
|
|
raise RuntimeError("ffmpeg executable not found in PATH")
|
|
converted_tmp = final_target.with_suffix(final_target.suffix + ".part")
|
|
if converted_tmp.exists():
|
|
converted_tmp.unlink()
|
|
max_width = args.max_width if args.max_width and args.max_width > 0 else 0
|
|
cmd = build_ffmpeg_command(
|
|
ffmpeg_path,
|
|
temp_path,
|
|
converted_tmp,
|
|
args.format,
|
|
max_width,
|
|
metadata=ffmpeg_metadata if ffmpeg_metadata else None,
|
|
)
|
|
if os.environ.get("DOWNLOW_DEBUG"):
|
|
log(f"ffmpeg command: {' '.join(cmd)}", file=sys.stderr)
|
|
completed = subprocess.run(
|
|
cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
check=False,
|
|
text=True,
|
|
)
|
|
ffmpeg_log = (completed.stderr or "").strip()
|
|
if completed.returncode != 0:
|
|
error_details = ffmpeg_log or (completed.stdout or "").strip()
|
|
raise RuntimeError(
|
|
f"ffmpeg failed with exit code {completed.returncode}" +
|
|
(f": {error_details}" if error_details else "")
|
|
)
|
|
shutil.move(str(converted_tmp), str(final_target))
|
|
result_path = final_target
|
|
apply_mutagen_metadata(result_path, ffmpeg_metadata, args.format)
|
|
result_size = result_path.stat().st_size if result_path.exists() else None
|
|
payload: dict[str,
|
|
object] = {
|
|
"output": str(result_path)
|
|
}
|
|
if downloaded_bytes:
|
|
payload["source_bytes"] = downloaded_bytes
|
|
if result_size is not None:
|
|
payload["size_bytes"] = result_size
|
|
if metadata_payload:
|
|
payload["metadata_keys"] = sorted(ffmpeg_metadata.keys()
|
|
) if ffmpeg_metadata else []
|
|
log(json.dumps(payload, ensure_ascii=False))
|
|
if ffmpeg_log:
|
|
log(ffmpeg_log, file=sys.stderr)
|
|
return 0
|
|
except Exception as exc:
|
|
log(f"hydrus-export failed: {exc}", file=sys.stderr)
|
|
return 1
|
|
finally:
|
|
if temp_path.exists():
|
|
try:
|
|
temp_path.unlink()
|
|
except OSError:
|
|
pass
|
|
if converted_tmp and converted_tmp.exists():
|
|
try:
|
|
converted_tmp.unlink()
|
|
except OSError:
|
|
pass
|
|
|
|
|
|
# ============================================================================
|
|
# Hydrus Wrapper Functions - Utilities for client initialization and config
|
|
# ============================================================================
|
|
# This section consolidates functions formerly in hydrus_wrapper.py
|
|
# Provides: supported filetypes, client initialization, caching, service resolution
|
|
|
|
# Official Hydrus supported filetypes
|
|
# Source: https://hydrusnetwork.github.io/hydrus/filetypes.html
|
|
SUPPORTED_FILETYPES = {
|
|
# Images
|
|
"image": {
|
|
".jpeg": "image/jpeg",
|
|
".jpg": "image/jpeg",
|
|
".png": "image/png",
|
|
".gif": "image/gif",
|
|
".webp": "image/webp",
|
|
".avif": "image/avif",
|
|
".jxl": "image/jxl",
|
|
".bmp": "image/bmp",
|
|
".heic": "image/heic",
|
|
".heif": "image/heif",
|
|
".ico": "image/x-icon",
|
|
".qoi": "image/qoi",
|
|
".tiff": "image/tiff",
|
|
},
|
|
# Animated Images
|
|
"animation": {
|
|
".apng": "image/apng",
|
|
".avifs": "image/avif-sequence",
|
|
".heics": "image/heic-sequence",
|
|
".heifs": "image/heif-sequence",
|
|
},
|
|
# Video
|
|
"video": {
|
|
".mp4": "video/mp4",
|
|
".webm": "video/webm",
|
|
".mkv": "video/x-matroska",
|
|
".avi": "video/x-msvideo",
|
|
".flv": "video/x-flv",
|
|
".mov": "video/quicktime",
|
|
".mpeg": "video/mpeg",
|
|
".ogv": "video/ogg",
|
|
".rm": "video/vnd.rn-realvideo",
|
|
".wmv": "video/x-ms-wmv",
|
|
},
|
|
# Audio
|
|
"audio": {
|
|
".mp3": "audio/mp3",
|
|
".ogg": "audio/ogg",
|
|
".flac": "audio/flac",
|
|
".m4a": "audio/mp4",
|
|
".mka": "audio/x-matroska",
|
|
".mkv": "audio/x-matroska",
|
|
".mp4": "audio/mp4",
|
|
".ra": "audio/vnd.rn-realaudio",
|
|
".tta": "audio/x-tta",
|
|
".wav": "audio/x-wav",
|
|
".wv": "audio/wavpack",
|
|
".wma": "audio/x-ms-wma",
|
|
},
|
|
# Applications & Documents
|
|
"application": {
|
|
".swf": "application/x-shockwave-flash",
|
|
".pdf": "application/pdf",
|
|
".epub": "application/epub+zip",
|
|
".djvu": "image/vnd.djvu",
|
|
".docx":
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
".pptx":
|
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
".doc": "application/msword",
|
|
".xls": "application/vnd.ms-excel",
|
|
".ppt": "application/vnd.ms-powerpoint",
|
|
".rtf": "application/rtf",
|
|
},
|
|
# Image Project Files
|
|
"project": {
|
|
".clip": "application/clip1",
|
|
".kra": "application/x-krita",
|
|
".procreate": "application/x-procreate1",
|
|
".psd": "image/vnd.adobe.photoshop",
|
|
".sai2": "application/sai21",
|
|
".svg": "image/svg+xml",
|
|
".xcf": "application/x-xcf",
|
|
},
|
|
# Archives
|
|
"archive": {
|
|
".cbz": "application/vnd.comicbook+zip",
|
|
".7z": "application/x-7z-compressed",
|
|
".gz": "application/gzip",
|
|
".rar": "application/vnd.rar",
|
|
".zip": "application/zip",
|
|
},
|
|
}
|
|
|
|
# Flatten to get all supported extensions
|
|
ALL_SUPPORTED_EXTENSIONS = set(GLOBAL_SUPPORTED_EXTENSIONS)
|
|
|
|
# Global Hydrus client cache to reuse session keys
|
|
_hydrus_client_cache: dict[str,
|
|
Any] = {}
|
|
|
|
# Cache Hydrus availability across the session
|
|
_HYDRUS_AVAILABLE: Optional[bool] = None
|
|
_HYDRUS_UNAVAILABLE_REASON: Optional[str] = None
|
|
|
|
|
|
def reset_cache() -> None:
|
|
"""Reset the availability cache (useful for testing)."""
|
|
global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
|
|
_HYDRUS_AVAILABLE = None
|
|
_HYDRUS_UNAVAILABLE_REASON = None
|
|
|
|
|
|
def is_available(config: dict[str,
|
|
Any],
|
|
use_cache: bool = True) -> tuple[bool,
|
|
Optional[str]]:
|
|
"""Check if Hydrus is available and accessible.
|
|
|
|
Performs a lightweight probe to verify:
|
|
- Hydrus URL is configured
|
|
- Can connect to Hydrus URL/port
|
|
|
|
Results are cached per session unless use_cache=False.
|
|
|
|
Args:
|
|
config: Configuration dict with Hydrus settings
|
|
use_cache: If True, use cached result from previous probe
|
|
|
|
Returns:
|
|
Tuple of (is_available: bool, reason: Optional[str])
|
|
reason is None if available, or an error message if not
|
|
"""
|
|
global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
|
|
|
|
if use_cache and _HYDRUS_AVAILABLE is not None:
|
|
return _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON
|
|
|
|
# Use new config helpers first, fallback to old method
|
|
from SYS.config import get_hydrus_url, get_hydrus_access_key
|
|
|
|
url = (get_hydrus_url(config, "home") or "").strip()
|
|
if not url:
|
|
reason = "Hydrus URL not configured (check config.conf store.hydrusnetwork.home.URL)"
|
|
_HYDRUS_AVAILABLE = False
|
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
|
return False, reason
|
|
|
|
access_key = get_hydrus_access_key(config, "home") or ""
|
|
if not access_key:
|
|
reason = "Hydrus access key not configured"
|
|
_HYDRUS_AVAILABLE = False
|
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
|
return False, reason
|
|
|
|
timeout_raw = config.get("HydrusNetwork_Request_Timeout")
|
|
try:
|
|
timeout = float(timeout_raw) if timeout_raw is not None else 5.0
|
|
except (TypeError, ValueError):
|
|
timeout = 5.0
|
|
|
|
try:
|
|
# Simple TCP connection test to URL/port
|
|
import socket
|
|
from urllib.parse import urlparse
|
|
|
|
parsed = urlparse(url)
|
|
hostname = parsed.hostname or "localhost"
|
|
port = parsed.port or (443 if parsed.scheme == "https" else 80)
|
|
|
|
# Try to connect to the host/port
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.settimeout(timeout)
|
|
try:
|
|
result = sock.connect_ex((hostname, port))
|
|
if result == 0:
|
|
_HYDRUS_AVAILABLE = True
|
|
_HYDRUS_UNAVAILABLE_REASON = None
|
|
return True, None
|
|
else:
|
|
reason = f"Cannot connect to {hostname}:{port}"
|
|
_HYDRUS_AVAILABLE = False
|
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
|
return False, reason
|
|
finally:
|
|
sock.close()
|
|
|
|
except Exception as exc:
|
|
reason = str(exc)
|
|
_HYDRUS_AVAILABLE = False
|
|
_HYDRUS_UNAVAILABLE_REASON = reason
|
|
return False, reason
|
|
|
|
|
|
def is_hydrus_available(config: dict[str, Any]) -> bool:
|
|
"""Check if Hydrus is available without raising.
|
|
|
|
Args:
|
|
config: Configuration dict
|
|
|
|
Returns:
|
|
True if Hydrus is available, False otherwise
|
|
"""
|
|
available, _ = is_available(config)
|
|
return available
|
|
|
|
|
|
def get_client(config: dict[str, Any]) -> HydrusNetwork:
|
|
"""Create and return a Hydrus client.
|
|
|
|
Uses access-key authentication by default (no session key acquisition).
|
|
A session key may still be acquired explicitly by calling
|
|
`HydrusNetwork.ensure_session_key()`.
|
|
|
|
Args:
|
|
config: Configuration dict with Hydrus settings
|
|
|
|
Returns:
|
|
HydrusClient instance
|
|
|
|
Raises:
|
|
RuntimeError: If Hydrus is not configured or unavailable
|
|
"""
|
|
# Check availability first - if unavailable, raise immediately
|
|
available, reason = is_available(config)
|
|
if not available:
|
|
raise RuntimeError(f"Hydrus is unavailable: {reason}")
|
|
|
|
from SYS.config import get_hydrus_url, get_hydrus_access_key
|
|
|
|
# Use new config helpers
|
|
hydrus_url = (get_hydrus_url(config, "home") or "").strip()
|
|
if not hydrus_url:
|
|
raise RuntimeError(
|
|
"Hydrus URL is not configured (check config.conf store.hydrusnetwork.home.URL)"
|
|
)
|
|
|
|
access_key = get_hydrus_access_key(config, "home") or ""
|
|
timeout_raw = config.get("HydrusNetwork_Request_Timeout")
|
|
try:
|
|
timeout = float(timeout_raw) if timeout_raw is not None else 60.0
|
|
except (TypeError, ValueError):
|
|
timeout = 60.0
|
|
|
|
# Create cache key from URL and access key
|
|
cache_key = f"{hydrus_url}#{access_key}"
|
|
|
|
# Check if we have a cached client
|
|
if cache_key in _hydrus_client_cache:
|
|
return _hydrus_client_cache[cache_key]
|
|
|
|
# Create new client
|
|
client = HydrusNetwork(hydrus_url, access_key, timeout)
|
|
|
|
# Cache the client
|
|
_hydrus_client_cache[cache_key] = client
|
|
|
|
return client
|
|
|
|
|
|
def get_tag_service_name(config: dict[str, Any]) -> str:
|
|
"""Get the name of the tag service to use for tagging operations.
|
|
|
|
Currently always returns "my tags" to avoid remote service errors.
|
|
|
|
Args:
|
|
config: Configuration dict (not currently used)
|
|
|
|
Returns:
|
|
Service name string, typically "my tags"
|
|
"""
|
|
# Always use 'my tags' to avoid remote service errors
|
|
return "my tags"
|
|
|
|
|
|
def get_tag_service_key(client: HydrusNetwork,
|
|
fallback_name: str = "my tags") -> Optional[str]:
|
|
"""Get the service key for a named tag service.
|
|
|
|
Queries the Hydrus client's services and finds the service key matching
|
|
the given name.
|
|
|
|
Args:
|
|
client: HydrusClient instance
|
|
fallback_name: Name of the service to find (e.g., "my tags")
|
|
|
|
Returns:
|
|
Service key string if found, None otherwise
|
|
"""
|
|
try:
|
|
services = client.get_services()
|
|
except Exception:
|
|
return None
|
|
|
|
if not isinstance(services, dict):
|
|
return None
|
|
|
|
# Hydrus returns services grouped by type; walk all lists and match on name
|
|
for group in services.values():
|
|
if not isinstance(group, list):
|
|
continue
|
|
for item in group:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
name = str(item.get("name") or "").strip().lower()
|
|
key = item.get("service_key") or item.get("key")
|
|
if name == fallback_name.lower() and key:
|
|
return str(key)
|
|
|
|
return None
|
|
|
|
|
|
def is_request_error(exc: Exception) -> bool:
|
|
"""Check if an exception is a Hydrus request error.
|
|
|
|
Args:
|
|
exc: Exception to check
|
|
|
|
Returns:
|
|
True if this is a HydrusRequestError
|
|
"""
|
|
return isinstance(exc, HydrusRequestError)
|
|
|
|
|
|
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
|
|
|
|
|
def download_hydrus_file(
|
|
file_url: str,
|
|
headers: dict[str,
|
|
str],
|
|
destination: Path,
|
|
timeout: float
|
|
) -> int:
|
|
"""Download *file_url* into *destination* returning the byte count with progress bar."""
|
|
from SYS.progress import print_progress, print_final_progress
|
|
|
|
downloaded = 0
|
|
start_time = time.time()
|
|
last_update = start_time
|
|
|
|
# Try to get file size from headers if available
|
|
file_size = None
|
|
with HTTPClient(timeout=timeout, headers=headers) as client:
|
|
response = client.get(file_url)
|
|
response.raise_for_status()
|
|
|
|
# Try to get size from content-length header
|
|
try:
|
|
file_size = int(response.headers.get("content-length", 0))
|
|
except (ValueError, TypeError):
|
|
file_size = None
|
|
|
|
filename = destination.name
|
|
|
|
with destination.open("wb") as handle:
|
|
for chunk in response.iter_bytes(CHUNK_SIZE):
|
|
if not chunk:
|
|
break
|
|
handle.write(chunk)
|
|
downloaded += len(chunk)
|
|
|
|
# Update progress every 0.5 seconds if we know total size
|
|
if file_size:
|
|
now = time.time()
|
|
if now - last_update >= 0.5:
|
|
elapsed = now - start_time
|
|
speed = downloaded / elapsed if elapsed > 0 else 0
|
|
print_progress(filename, downloaded, file_size, speed)
|
|
last_update = now
|
|
|
|
# Print final progress line if we tracked it
|
|
if file_size:
|
|
elapsed = time.time() - start_time
|
|
print_final_progress(filename, file_size, elapsed)
|
|
|
|
return downloaded
|