"""Hydrus API helpers and export utilities.""" from __future__ import annotations import base64 import http.client import json import os import re import shutil import subprocess import sys import time from SYS.logger import log from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS as GLOBAL_SUPPORTED_EXTENSIONS import tempfile import logging from dataclasses import dataclass, field from pathlib import Path from typing import Any, Iterable, Optional, Sequence, Type, TypeVar, Union, cast from urllib.parse import urlsplit, urlencode, quote import httpx logger = logging.getLogger(__name__) from SYS.utils import ( decode_cbor, jsonify, ensure_directory, unique_path, ) from .HTTP import HTTPClient class HydrusRequestError(RuntimeError): """Raised when the Hydrus Client API returns an error response.""" def __init__(self, status: int, message: str, payload: Any | None = None) -> None: super().__init__(f"Hydrus request failed ({status}): {message}") self.status = status self.payload = payload class HydrusConnectionError(HydrusRequestError): """Raised when Hydrus service is unavailable (connection refused, timeout, etc.). This is an expected error when Hydrus is not running and should not include a full traceback in logs. """ def __init__(self, message: str) -> None: super().__init__(0, message, None) # status 0 indicates connection error self.is_connection_error = True @dataclass(slots=True) class HydrusRequestSpec: method: str endpoint: str query: dict[str, Any] | None = None data: Any | None = None file_path: Path | None = None content_type: str | None = None accept: str | None = "application/cbor" @dataclass(slots=True) class HydrusNetwork: """Thin wrapper around the Hydrus Client API.""" url: str access_key: str = "" timeout: float = 9.0 instance_name: str = "" # Optional store name (e.g., 'home') for namespaced logs scheme: str = field(init=False) hostname: str = field(init=False) port: int = field(init=False) base_path: str = field(init=False) _session_key: str = field(init=False, default="", repr=False) # Cached session key def __post_init__(self) -> None: if not self.url: raise ValueError("Hydrus base URL is required") self.url = self.url.rstrip("/") parsed = urlsplit(self.url) if parsed.scheme not in {"http", "https"}: raise ValueError("Hydrus base URL must use http or https") self.scheme = parsed.scheme self.hostname = parsed.hostname or "localhost" self.port = parsed.port or (443 if self.scheme == "https" else 80) self.base_path = parsed.path.rstrip("/") self.access_key = self.access_key or "" self.instance_name = str(self.instance_name or "").strip() def _log_prefix(self) -> str: if self.instance_name: return f"[hydrusnetwork:{self.instance_name}]" return f"[hydrusnetwork:{self.hostname}:{self.port}]" # ------------------------------------------------------------------ # low-level helpers # ------------------------------------------------------------------ def _build_path(self, endpoint: str, query: dict[str, Any] | None = None) -> str: path = endpoint if endpoint.startswith("/") else f"/{endpoint}" if self.base_path: path = f"{self.base_path}{path}" if query: encoded = urlencode(query, doseq=True) if encoded: path = f"{path}?{encoded}" return path def _perform_request(self, spec: HydrusRequestSpec) -> Any: headers: dict[str, str] = {} # Use session key if available, otherwise use access key if self._session_key: headers["Hydrus-Client-API-Session-Key"] = self._session_key elif self.access_key: headers["Hydrus-Client-API-Access-Key"] = self.access_key if spec.accept: headers["Accept"] = spec.accept path = self._build_path(spec.endpoint, spec.query) url = f"{self.scheme}://{self.hostname}:{self.port}{path}" # Log request details logger.debug( f"{self._log_prefix()} {spec.method} {spec.endpoint} (auth: {'session_key' if self._session_key else 'access_key' if self.access_key else 'none'})" ) status = 0 reason = "" body = b"" content_type = "" try: with HTTPClient(timeout=self.timeout, headers=headers, verify_ssl=False) as client: response = None if spec.file_path is not None: file_path = Path(spec.file_path) if not file_path.is_file(): error_msg = f"Upload file not found: {file_path}" logger.error(f"{self._log_prefix()} {error_msg}") raise FileNotFoundError(error_msg) file_size = file_path.stat().st_size headers["Content-Type" ] = spec.content_type or "application/octet-stream" # Do not set Content-Length when streaming an iterator body. # If the file size changes between stat() and read() (or the source is truncated), # h11 will raise: "Too little data for declared Content-Length". # Let httpx choose chunked transfer encoding for safety. headers.pop("Content-Length", None) logger.debug( f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)" ) # Stream upload body with a stderr progress bar (pipeline-safe). from SYS.models import ProgressBar bar = ProgressBar() # Keep the PipelineLiveProgress transfer line clean: show the file name. # (The hydrus instance/service is already visible in the logs above.) label = str(getattr(file_path, "name", None) or "upload") start_t = time.time() last_render_t = [start_t] sent = [0] def _render_progress(final: bool = False) -> None: if file_size <= 0: return now = time.time() if not final and (now - float(last_render_t[0])) < 0.25: return last_render_t[0] = now bar.update( downloaded=int(sent[0]), total=int(file_size), label=str(label), file=sys.stderr, ) if final: bar.finish() def file_gen(): try: with file_path.open("rb") as handle: while True: chunk = handle.read(256 * 1024) if not chunk: break sent[0] += len(chunk) _render_progress(final=False) yield chunk finally: _render_progress(final=True) response = client.request( spec.method, url, content=file_gen(), headers=headers, raise_for_status=False, log_http_errors=False, ) else: content = None json_data = None if spec.data is not None: if isinstance(spec.data, (bytes, bytearray)): content = spec.data else: json_data = spec.data # Hydrus expects JSON bodies to be sent with Content-Type: application/json. # httpx will usually set this automatically, but we set it explicitly to # match the Hydrus API docs and avoid edge cases. headers.setdefault("Content-Type", "application/json") logger.debug( f"{self._log_prefix()} Request body size: {len(content) if content else 'json'}" ) response = client.request( spec.method, url, content=content, json=json_data, headers=headers, raise_for_status=False, log_http_errors=False, ) status = response.status_code reason = response.reason_phrase body = response.content content_type = response.headers.get("Content-Type", "") or "" logger.debug( f"{self._log_prefix()} Response {status} {reason} ({len(body)} bytes)" ) except (httpx.ConnectError, httpx.TimeoutException, httpx.NetworkError) as exc: msg = f"Hydrus unavailable: {exc}" logger.warning(f"{self._log_prefix()} {msg}") raise HydrusConnectionError(msg) from exc except Exception as exc: logger.error(f"{self._log_prefix()} Connection error: {exc}", exc_info=True) raise payload: Any payload = {} if body: content_main = content_type.split(";", 1)[0].strip().lower() if "json" in content_main: try: payload = json.loads(body.decode("utf-8")) except (json.JSONDecodeError, UnicodeDecodeError): payload = body.decode("utf-8", "replace") elif "cbor" in content_main: try: payload = decode_cbor(body) except Exception: payload = body else: payload = body if status >= 400: message = "" if isinstance(payload, dict): message = str(payload.get("message") or payload.get("error") or payload) elif isinstance(payload, str): message = payload else: message = reason or "HTTP error" # Some endpoints are naturally "missing" sometimes and should not spam logs. if status == 404 and spec.endpoint.rstrip("/") == "/get_files/file_path": return {} logger.error(f"{self._log_prefix()} HTTP {status}: {message}") # Handle expired session key (419) by clearing cache and retrying once if status == 419 and self._session_key and "session" in message.lower(): logger.warning( f"{self._log_prefix()} Session key expired, acquiring new one and retrying..." ) self._session_key = "" # Clear expired session key try: self._acquire_session_key() # Retry the request with new session key return self._perform_request(spec) except Exception as retry_error: logger.error( f"{self._log_prefix()} Retry failed: {retry_error}", exc_info=True ) # If retry fails, raise the original error raise HydrusRequestError(status, message, payload) from retry_error raise HydrusRequestError(status, message, payload) return payload def _acquire_session_key(self) -> str: """Acquire a session key from the Hydrus API using the access key. Session keys are temporary authentication tokens that expire after 24 hours of inactivity, client restart, or if the access key is deleted. They are more secure than passing access keys in every request. Returns the session key string. Raises HydrusRequestError if the request fails. """ if not self.access_key: raise HydrusRequestError( 401, "Cannot acquire session key: no access key configured" ) # Temporarily use access key to get session key original_session_key = self._session_key try: self._session_key = "" # Clear session key to use access key for this request result = self._get("/session_key") session_key = result.get("session_key") if not session_key: raise HydrusRequestError( 500, "Session key response missing 'session_key' field", result ) self._session_key = session_key return session_key except HydrusRequestError: self._session_key = original_session_key raise except Exception as e: self._session_key = original_session_key raise HydrusRequestError(500, f"Failed to acquire session key: {e}") def ensure_session_key(self) -> str: """Ensure a valid session key exists, acquiring one if needed. Returns the session key. If one is already cached, returns it. Otherwise acquires a new session key from the API. """ if self._session_key: return self._session_key return self._acquire_session_key() def _get(self, endpoint: str, *, query: dict[str, Any] | None = None) -> dict[str, Any]: spec = HydrusRequestSpec("GET", endpoint, query=query) return cast(dict[str, Any], self._perform_request(spec)) def _post( self, endpoint: str, *, data: dict[str, Any] | None = None, file_path: Path | None = None, content_type: str | None = None, ) -> dict[str, Any]: spec = HydrusRequestSpec( "POST", endpoint, data=data, file_path=file_path, content_type=content_type ) return cast(dict[str, Any], self._perform_request(spec)) def _ensure_hashes(self, hash: Union[str, Iterable[str]]) -> list[str]: if isinstance(hash, str): return [hash] return list(hash) def _append_access_key(self, url: str) -> str: if not self.access_key: return url separator = "&" if "?" in url else "?" # Use the correct parameter name for Hydrus API compatibility return f"{url}{separator}access_key={quote(self.access_key)}" def add_file(self, path: Union[str, Path]) -> dict[str, Any]: """Add a file to Hydrus using the octet-stream upload mode. This mirrors the Hydrus API POST /add_files/add_file behavior when sending the file bytes as the POST body. The method accepts either a filesystem `Path` or a string path and will raise FileNotFoundError if the target path is not a readable file. """ # Accept both Path and str for convenience file_path = Path(path) if not isinstance(path, Path) else path if not file_path.is_file(): raise FileNotFoundError(f"Upload file not found: {file_path}") # Forward as file_path so the request body is streamed as application/octet-stream return self._post("/add_files/add_file", file_path=file_path) def undelete_files(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]: """Restore files from Hydrus trash back into 'my files'. Hydrus Client API: POST /add_files/undelete_files Required JSON args: {"hashes": [, ...]} """ hash_list = self._ensure_hashes(hashes) body = { "hashes": hash_list } return self._post("/add_files/undelete_files", data=body) def delete_files( self, hashes: Union[str, Iterable[str]], *, reason: str | None = None ) -> dict[str, Any]: """Delete files in Hydrus. Hydrus Client API: POST /add_files/delete_files Required JSON args: {"hashes": [, ...]} Optional JSON args: {"reason": "..."} """ hash_list = self._ensure_hashes(hashes) body: dict[str, Any] = { "hashes": hash_list } if isinstance(reason, str) and reason.strip(): body["reason"] = reason.strip() return self._post("/add_files/delete_files", data=body) def clear_file_deletion_record(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]: """Clear Hydrus's file deletion record for the provided hashes. Hydrus Client API: POST /add_files/clear_file_deletion_record Required JSON args: {"hashes": [, ...]} """ hash_list = self._ensure_hashes(hashes) body = { "hashes": hash_list } return self._post("/add_files/clear_file_deletion_record", data=body) def add_tag( self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str ) -> dict[str, Any]: hash = self._ensure_hashes(hash) body = { "hashes": hash, "service_names_to_tags": { service_name: list(tags) } } return self._post("/add_tags/add_tags", data=body) def delete_tag( self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_name: str, *, action: int = 1, ) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) body = { "hashes": hashes, "service_names_to_actions_to_tags": { service_name: { action: list(tags) } }, } return self._post("/add_tags/add_tags", data=body) def add_tags_by_key( self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_key: str ) -> dict[str, Any]: hash = self._ensure_hashes(hash) body = { "hashes": hash, "service_keys_to_tags": { service_key: list(tags) } } return self._post("/add_tags/add_tags", data=body) def delete_tags_by_key( self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_key: str, *, action: int = 1, ) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) body = { "hashes": hashes, "service_keys_to_actions_to_tags": { service_key: { action: list(tags) } }, } return self._post("/add_tags/add_tags", data=body) def associate_url(self, file_hashes: Union[str, Iterable[str]], url: str) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = { "hash": hashes[0], "url_to_add": url } return self._post("/add_urls/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = { "hash": file_hash, "url_to_add": url } results[file_hash] = self._post("/add_urls/associate_url", data=body) return { "batched": results } def get_url_info(self, url: str) -> dict[str, Any]: """Get information about a URL. Hydrus Client API: GET /add_urls/get_url_info Docs: https://hydrusnetwork.github.io/hydrus/developer_api.html#add_urls_get_url_info """ url = str(url or "").strip() if not url: raise ValueError("url must not be empty") spec = HydrusRequestSpec( method="GET", endpoint="/add_urls/get_url_info", query={ "url": url }, ) return cast(dict[str, Any], self._perform_request(spec)) def delete_url(self, file_hashes: Union[str, Iterable[str]], url: str) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = { "hash": hashes[0], "url_to_delete": url } return self._post("/add_urls/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = { "hash": file_hash, "url_to_delete": url } results[file_hash] = self._post("/add_urls/associate_url", data=body) return { "batched": results } def set_notes( self, file_hash: str, notes: dict[str, str], *, merge_cleverly: bool = False, extend_existing_note_if_possible: bool = True, conflict_resolution: int = 3, ) -> dict[str, Any]: """Add or update notes associated with a file. Hydrus Client API: POST /add_notes/set_notes Required JSON args: {"hash": , "notes": {name: text}} """ if not notes: raise ValueError("notes mapping must not be empty") file_hash = str(file_hash or "").strip().lower() if not file_hash: raise ValueError("file_hash must not be empty") body: dict[str, Any] = { "hash": file_hash, "notes": notes } if merge_cleverly: body["merge_cleverly"] = True body["extend_existing_note_if_possible"] = bool( extend_existing_note_if_possible ) body["conflict_resolution"] = int(conflict_resolution) return self._post("/add_notes/set_notes", data=body) def delete_notes( self, file_hash: str, note_names: Sequence[str], ) -> dict[str, Any]: """Delete notes associated with a file. Hydrus Client API: POST /add_notes/delete_notes Required JSON args: {"hash": , "note_names": [..]} """ names = [str(name) for name in note_names if str(name or "").strip()] if not names: raise ValueError("note_names must not be empty") file_hash = str(file_hash or "").strip().lower() if not file_hash: raise ValueError("file_hash must not be empty") body = { "hash": file_hash, "note_names": names } return self._post("/add_notes/delete_notes", data=body) def get_file_relationships(self, file_hash: str) -> dict[str, Any]: query = { "hash": file_hash } return self._get( "/manage_file_relationships/get_file_relationships", query=query ) def set_relationship( self, hash_a: str, hash_b: str, relationship: Union[str, int], do_default_content_merge: bool = False, ) -> dict[str, Any]: """Set a relationship between two files in Hydrus. This wraps Hydrus Client API: POST /manage_file_relationships/set_file_relationships. Hydrus relationship enum (per Hydrus developer API docs): - 0: set as potential duplicates - 1: set as false positives - 2: set as same quality (duplicates) - 3: set as alternates - 4: set A as better (duplicates) Args: hash_a: First file SHA256 hex hash_b: Second file SHA256 hex relationship: Relationship type as string or integer enum (0-4) do_default_content_merge: Whether to perform default duplicate content merge Returns: Response from Hydrus API """ # Convert string relationship types to integers if isinstance(relationship, str): rel_map = { # Potential duplicates "potential": 0, "potentials": 0, "potential duplicate": 0, "potential duplicates": 0, # False positives "false positive": 1, "false_positive": 1, "false positives": 1, "false_positives": 1, "not related": 1, "not_related": 1, # Duplicates (same quality) "duplicate": 2, "duplicates": 2, "same quality": 2, "same_quality": 2, "equal": 2, # Alternates "alt": 3, "alternate": 3, "alternates": 3, "alternative": 3, "related": 3, # Better/worse (duplicates) "better": 4, "a better": 4, "a_better": 4, # Back-compat: some older call sites used 'king' for primary. # Hydrus does not accept 'king' as a relationship; this maps to 'A is better'. "king": 4, } relationship = rel_map.get( relationship.lower().strip(), 3 ) # Default to alternates body = { "relationships": [ { "hash_a": hash_a, "hash_b": hash_b, "relationship": relationship, "do_default_content_merge": do_default_content_merge, } ] } return self._post( "/manage_file_relationships/set_file_relationships", data=body ) def get_services(self) -> dict[str, Any]: return self._get("/get_services") def search_files( self, tags: Sequence[Any], *, file_service_name: str | None = None, return_hashes: bool = False, return_file_ids: bool = True, return_file_count: bool = False, include_current_tags: bool | None = None, include_pending_tags: bool | None = None, file_sort_type: int | None = None, file_sort_asc: bool | None = None, file_sort_key: str | None = None, ) -> dict[str, Any]: if not tags: raise ValueError("tags must not be empty") query: dict[str, Any] = {} query_fields = [ ("tags", tags, lambda v: json.dumps(list(v))), ("file_service_name", file_service_name, lambda v: v), ("return_hashes", return_hashes, lambda v: "true" if v else None), ("return_file_ids", return_file_ids, lambda v: "true" if v else None), ("return_file_count", return_file_count, lambda v: "true" if v else None), ( "include_current_tags", include_current_tags, lambda v: "true" if v else "false" if v is not None else None, ), ( "include_pending_tags", include_pending_tags, lambda v: "true" if v else "false" if v is not None else None, ), ( "file_sort_type", file_sort_type, lambda v: str(v) if v is not None else None ), ( "file_sort_asc", file_sort_asc, lambda v: "true" if v else "false" if v is not None else None, ), ("file_sort_key", file_sort_key, lambda v: v), ] for key, value, formatter in query_fields: if value is None or value == []: continue formatted = formatter(value) if formatted is not None: query[key] = formatted return self._get("/get_files/search_files", query=query) def fetch_file_metadata( self, *, file_ids: Sequence[int] | None = None, hashes: Sequence[str] | None = None, include_service_keys_to_tags: bool = True, include_file_url: bool = False, include_duration: bool = True, include_size: bool = True, include_mime: bool = False, include_notes: bool = False, ) -> dict[str, Any]: if not file_ids and not hashes: raise ValueError("Either file_ids or hashes must be provided") query: dict[str, Any] = {} query_fields = [ ("file_ids", file_ids, lambda v: json.dumps(list(v))), ("hashes", hashes, lambda v: json.dumps(list(v))), ( "include_service_keys_to_tags", include_service_keys_to_tags, lambda v: "true" if v else None, ), ("include_file_url", include_file_url, lambda v: "true" if v else None), ("include_duration", include_duration, lambda v: "true" if v else None), ("include_size", include_size, lambda v: "true" if v else None), ("include_mime", include_mime, lambda v: "true" if v else None), ("include_notes", include_notes, lambda v: "true" if v else None), ] for key, value, formatter in query_fields: if not value: continue formatted = formatter(value) if formatted is not None: query[key] = formatted return self._get("/get_files/file_metadata", query=query) def get_file_path(self, file_hash: str) -> dict[str, Any]: """Get the local file system path for a given file hash.""" query = { "hash": file_hash } return self._get("/get_files/file_path", query=query) def file_url(self, file_hash: str) -> str: hash_param = quote(file_hash) # Don't append access_key parameter for file downloads - use header instead url = f"{self.url}/get_files/file?hash={hash_param}" return url def thumbnail_url(self, file_hash: str) -> str: hash_param = quote(file_hash) # Don't append access_key parameter for file downloads - use header instead url = f"{self.url}/get_files/thumbnail?hash={hash_param}" return url HydrusCliOptionsT = TypeVar("HydrusCliOptionsT", bound="HydrusCliOptions") @dataclass(slots=True) class HydrusCliOptions: url: str method: str access_key: str accept: str timeout: float content_type: str | None body_bytes: bytes | None = None body_path: Path | None = None debug: bool = False @classmethod def from_namespace( cls: Type[HydrusCliOptionsT], namespace: Any ) -> HydrusCliOptionsT: accept_header = namespace.accept or "application/cbor" body_bytes: bytes | None = None body_path: Path | None = None if namespace.body_file: body_path = Path(namespace.body_file) elif namespace.body is not None: body_bytes = namespace.body.encode("utf-8") return cls( url=namespace.url, method=namespace.method.upper(), access_key=namespace.access_key or "", accept=accept_header, timeout=namespace.timeout, content_type=namespace.content_type, body_bytes=body_bytes, body_path=body_path, debug=bool(os.environ.get("DOWNLOW_DEBUG")), ) def hydrus_request(args, parser) -> int: if args.body and args.body_file: parser.error("Only one of --body or --body-file may be supplied") options = HydrusCliOptions.from_namespace(args) parsed = urlsplit(options.url) if parsed.scheme not in ("http", "https"): parser.error("Only http and https url are supported") if not parsed.hostname: parser.error("Invalid Hydrus URL") headers: dict[str, str] = {} if options.access_key: headers["Hydrus-Client-API-Access-Key"] = options.access_key if options.accept: headers["Accept"] = options.accept request_body_bytes: bytes | None = None body_path: Path | None = None if options.body_path is not None: body_path = options.body_path if not body_path.is_file(): parser.error(f"File not found: {body_path}") headers.setdefault( "Content-Type", options.content_type or "application/octet-stream" ) headers["Content-Length"] = str(body_path.stat().st_size) elif options.body_bytes is not None: request_body_bytes = options.body_bytes headers["Content-Type"] = options.content_type or "application/json" assert request_body_bytes is not None headers["Content-Length"] = str(len(request_body_bytes)) elif options.content_type: headers["Content-Type"] = options.content_type if parsed.username or parsed.password: userinfo = f"{parsed.username or ''}:{parsed.password or ''}".encode("utf-8") headers["Authorization"] = "Basic " + base64.b64encode(userinfo).decode("ascii") path = parsed.path or "/" if parsed.query: path += "?" + parsed.query port = parsed.port if port is None: port = 443 if parsed.scheme == "https" else 80 connection_cls = ( http.client.HTTPSConnection if parsed.scheme == "https" else http.client.HTTPConnection ) host = parsed.hostname or "localhost" connection = connection_cls(host, port, timeout=options.timeout) if options.debug: log( f"Hydrus connecting to {parsed.scheme}://{host}:{port}{path}", file=sys.stderr ) response_bytes: bytes = b"" content_type = "" status = 0 try: if body_path is not None: with body_path.open("rb") as handle: if options.debug: size_hint = headers.get("Content-Length", "unknown") log( f"Hydrus sending file body ({size_hint} bytes)", file=sys.stderr ) connection.putrequest(options.method, path) host_header = host if (parsed.scheme == "http" and port not in (80, None)) or (parsed.scheme == "https" and port not in (443, None)): host_header = f"{host}:{port}" connection.putheader("Host", host_header) for key, value in headers.items(): if value: connection.putheader(key, value) connection.endheaders() while True: chunk = handle.read(65536) if not chunk: break connection.send(chunk) if options.debug: log( "[downlow.py] Hydrus upload complete; awaiting response", file=sys.stderr ) else: if options.debug: size_hint = "none" if request_body_bytes is None else str( len(request_body_bytes) ) log(f"Hydrus sending request body bytes={size_hint}", file=sys.stderr) sanitized_headers = { k: v for k, v in headers.items() if v } connection.request( options.method, path, body=request_body_bytes, headers=sanitized_headers ) response = connection.getresponse() status = response.status response_bytes = response.read() if options.debug: log( f"Hydrus response received ({len(response_bytes)} bytes)", file=sys.stderr ) content_type = response.getheader("Content-Type", "") except (OSError, http.client.HTTPException) as exc: log(f"HTTP error: {exc}", file=sys.stderr) return 1 finally: connection.close() content_type_lower = (content_type or "").split(";", 1)[0].strip().lower() accept_value = options.accept or "" expect_cbor = "cbor" in (content_type_lower or "") or "cbor" in accept_value.lower() payload = None decode_error: Exception | None = None if response_bytes: if expect_cbor: try: payload = decode_cbor(response_bytes) except Exception as exc: # pragma: no cover - library errors surfaced decode_error = exc if payload is None and not expect_cbor: try: payload = json.loads(response_bytes.decode("utf-8")) except (json.JSONDecodeError, UnicodeDecodeError): payload = response_bytes.decode("utf-8", "replace") elif payload is None and expect_cbor and decode_error is not None: log( f"Expected CBOR response but decoding failed: {decode_error}", file=sys.stderr ) return 1 json_ready = jsonify(payload) if isinstance(payload, (dict, list)) else payload if options.debug: log(f"Hydrus {options.method} {options.url} -> {status}", file=sys.stderr) if isinstance(json_ready, (dict, list)): log(json.dumps(json_ready, ensure_ascii=False)) elif json_ready is None: log("{}") else: log(json.dumps({ "value": json_ready }, ensure_ascii=False)) return 0 if 200 <= status < 400 else 1 def hydrus_export(args, _parser) -> int: from SYS.metadata import apply_mutagen_metadata, build_ffmpeg_command, prepare_ffmpeg_metadata output_path: Path = args.output original_suffix = output_path.suffix target_dir = output_path.parent metadata_payload: Optional[dict[str, Any]] = None metadata_raw = getattr(args, "metadata_json", None) if metadata_raw: try: parsed = json.loads(metadata_raw) except json.JSONDecodeError as exc: log(f"Invalid metadata JSON: {exc}", file=sys.stderr) return 1 if isinstance(parsed, dict): metadata_payload = parsed else: log("[downlow.py] Metadata JSON must decode to an object", file=sys.stderr) return 1 ffmpeg_metadata = prepare_ffmpeg_metadata(metadata_payload) def _normalise_ext(value: Optional[str]) -> Optional[str]: if not value: return None cleaned = value.strip() if not cleaned: return None if not cleaned.startswith("."): # tolerate inputs like "mp4" cleaned = "." + cleaned.lstrip(".") return cleaned def _extension_from_mime(mime: Optional[str]) -> Optional[str]: if not mime: return None mime_map = { # Images / bitmaps "image/jpeg": ".jpg", "image/jpg": ".jpg", "image/png": ".png", "image/gif": ".gif", "image/webp": ".webp", "image/avif": ".avif", "image/jxl": ".jxl", # JPEG XL "image/bmp": ".bmp", "image/heic": ".heic", "image/heif": ".heif", "image/x-icon": ".ico", "image/vnd.microsoft.icon": ".ico", "image/qoi": ".qoi", # Quite OK Image "image/tiff": ".tiff", "image/svg+xml": ".svg", "image/vnd.adobe.photoshop": ".psd", # Animation / sequence variants "image/apng": ".apng", "image/avif-sequence": ".avifs", "image/heic-sequence": ".heics", "image/heif-sequence": ".heifs", # Video "video/mp4": ".mp4", "video/webm": ".webm", "video/quicktime": ".mov", "video/ogg": ".ogv", "video/mpeg": ".mpeg", "video/x-msvideo": ".avi", "video/x-flv": ".flv", "video/x-matroska": ".mkv", "video/x-ms-wmv": ".wmv", "video/vnd.rn-realvideo": ".rv", # Audio "audio/mpeg": ".mp3", "audio/mp4": ".m4a", "audio/ogg": ".ogg", "audio/flac": ".flac", "audio/wav": ".wav", "audio/x-wav": ".wav", "audio/x-ms-wma": ".wma", "audio/x-tta": ".tta", "audio/vnd.wave": ".wav", "audio/x-wavpack": ".wv", # Documents / office "application/pdf": ".pdf", "application/epub+zip": ".epub", "application/vnd.djvu": ".djvu", "application/rtf": ".rtf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", "application/msword": ".doc", "application/vnd.ms-excel": ".xls", "application/vnd.ms-powerpoint": ".ppt", # Archive / comicbook / zip-like "application/zip": ".zip", "application/x-7z-compressed": ".7z", "application/x-rar-compressed": ".rar", "application/gzip": ".gz", "application/x-tar": ".tar", "application/x-cbz": ".cbz", # often just ZIP with images; CBZ is not an official mime type but used as mapping # App / project / other "application/clip": ".clip", # Clip Studio "application/x-krita": ".kra", "application/x-procreate": ".procreate", "application/x-shockwave-flash": ".swf", } return mime_map.get(mime.lower()) def _extract_hash(file_url: str) -> Optional[str]: match = re.search(r"[?&]hash=([0-9a-fA-F]+)", file_url) return match.group(1) if match else None # Ensure output and temp directories exist using global helper for dir_path in [target_dir, Path(args.tmp_dir) if args.tmp_dir else target_dir]: try: ensure_directory(dir_path) except RuntimeError as exc: log(f"{exc}", file=sys.stderr) return 1 source_suffix = _normalise_ext(getattr(args, "source_ext", None)) if source_suffix and source_suffix.lower() == ".bin": source_suffix = None if source_suffix is None: hydrus_url = getattr(args, "hydrus_url", None) if not hydrus_url: try: from SYS.config import load_config, get_hydrus_url hydrus_url = get_hydrus_url(load_config()) except Exception as exc: hydrus_url = None if os.environ.get("DOWNLOW_DEBUG"): log( f"hydrus-export could not load Hydrus URL: {exc}", file=sys.stderr ) if hydrus_url: try: setattr(args, "hydrus_url", hydrus_url) except Exception: pass resolved_suffix: Optional[str] = None file_hash = getattr(args, "file_hash", None) or _extract_hash(args.file_url) if hydrus_url and file_hash: try: client = HydrusNetwork( url=hydrus_url, access_key=args.access_key, timeout=args.timeout ) meta_response = client.fetch_file_metadata( hashes=[file_hash], include_mime=True ) entries = meta_response.get("metadata") if isinstance( meta_response, dict ) else None if isinstance(entries, list) and entries: entry = entries[0] ext_value = _normalise_ext( entry.get("ext") if isinstance(entry, dict) else None ) if ext_value: resolved_suffix = ext_value else: mime_value = entry.get("mime" ) if isinstance(entry, dict) else None resolved_suffix = _extension_from_mime(mime_value) except Exception as exc: # pragma: no cover - defensive if os.environ.get("DOWNLOW_DEBUG"): log(f"hydrus metadata fetch failed: {exc}", file=sys.stderr) if not resolved_suffix: fallback_suffix = _normalise_ext(original_suffix) if fallback_suffix and fallback_suffix.lower() == ".bin": fallback_suffix = None resolved_suffix = fallback_suffix or ".hydrus" source_suffix = resolved_suffix suffix = source_suffix or ".hydrus" if suffix and output_path.suffix.lower() in {"", ".bin"}: if output_path.suffix.lower() != suffix.lower(): output_path = output_path.with_suffix(suffix) target_dir = output_path.parent # Determine temp directory (prefer provided tmp_dir, fallback to output location) temp_dir = Path(getattr(args, "tmp_dir", None) or target_dir) try: ensure_directory(temp_dir) except RuntimeError: temp_dir = target_dir temp_file = tempfile.NamedTemporaryFile( delete=False, suffix=suffix, dir=str(temp_dir) ) temp_path = Path(temp_file.name) temp_file.close() downloaded_bytes = 0 headers = { "Hydrus-Client-API-Access-Key": args.access_key, } try: downloaded_bytes = download_hydrus_file( args.file_url, headers, temp_path, args.timeout ) if os.environ.get("DOWNLOW_DEBUG"): log(f"hydrus-export downloaded {downloaded_bytes} bytes", file=sys.stderr) except httpx.RequestError as exc: if temp_path.exists(): temp_path.unlink() log(f"hydrus-export download failed: {exc}", file=sys.stderr) return 1 except Exception as exc: # pragma: no cover - unexpected if temp_path.exists(): temp_path.unlink() log(f"hydrus-export error: {exc}", file=sys.stderr) return 1 ffmpeg_log: Optional[str] = None converted_tmp: Optional[Path] = None try: final_target = unique_path(output_path) if args.format == "copy": shutil.move(str(temp_path), str(final_target)) result_path = final_target else: ffmpeg_path = shutil.which("ffmpeg") if not ffmpeg_path: raise RuntimeError("ffmpeg executable not found in PATH") converted_tmp = final_target.with_suffix(final_target.suffix + ".part") if converted_tmp.exists(): converted_tmp.unlink() max_width = args.max_width if args.max_width and args.max_width > 0 else 0 cmd = build_ffmpeg_command( ffmpeg_path, temp_path, converted_tmp, args.format, max_width, metadata=ffmpeg_metadata if ffmpeg_metadata else None, ) if os.environ.get("DOWNLOW_DEBUG"): log(f"ffmpeg command: {' '.join(cmd)}", file=sys.stderr) completed = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False, text=True, ) ffmpeg_log = (completed.stderr or "").strip() if completed.returncode != 0: error_details = ffmpeg_log or (completed.stdout or "").strip() raise RuntimeError( f"ffmpeg failed with exit code {completed.returncode}" + (f": {error_details}" if error_details else "") ) shutil.move(str(converted_tmp), str(final_target)) result_path = final_target apply_mutagen_metadata(result_path, ffmpeg_metadata, args.format) result_size = result_path.stat().st_size if result_path.exists() else None payload: dict[str, object] = { "output": str(result_path) } if downloaded_bytes: payload["source_bytes"] = downloaded_bytes if result_size is not None: payload["size_bytes"] = result_size if metadata_payload: payload["metadata_keys"] = sorted(ffmpeg_metadata.keys() ) if ffmpeg_metadata else [] log(json.dumps(payload, ensure_ascii=False)) if ffmpeg_log: log(ffmpeg_log, file=sys.stderr) return 0 except Exception as exc: log(f"hydrus-export failed: {exc}", file=sys.stderr) return 1 finally: if temp_path.exists(): try: temp_path.unlink() except OSError: pass if converted_tmp and converted_tmp.exists(): try: converted_tmp.unlink() except OSError: pass # ============================================================================ # Hydrus Wrapper Functions - Utilities for client initialization and config # ============================================================================ # This section consolidates functions formerly in hydrus_wrapper.py # Provides: supported filetypes, client initialization, caching, service resolution # Official Hydrus supported filetypes # Source: https://hydrusnetwork.github.io/hydrus/filetypes.html SUPPORTED_FILETYPES = { # Images "image": { ".jpeg": "image/jpeg", ".jpg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".webp": "image/webp", ".avif": "image/avif", ".jxl": "image/jxl", ".bmp": "image/bmp", ".heic": "image/heic", ".heif": "image/heif", ".ico": "image/x-icon", ".qoi": "image/qoi", ".tiff": "image/tiff", }, # Animated Images "animation": { ".apng": "image/apng", ".avifs": "image/avif-sequence", ".heics": "image/heic-sequence", ".heifs": "image/heif-sequence", }, # Video "video": { ".mp4": "video/mp4", ".webm": "video/webm", ".mkv": "video/x-matroska", ".avi": "video/x-msvideo", ".flv": "video/x-flv", ".mov": "video/quicktime", ".mpeg": "video/mpeg", ".ogv": "video/ogg", ".rm": "video/vnd.rn-realvideo", ".wmv": "video/x-ms-wmv", }, # Audio "audio": { ".mp3": "audio/mp3", ".ogg": "audio/ogg", ".flac": "audio/flac", ".m4a": "audio/mp4", ".mka": "audio/x-matroska", ".mkv": "audio/x-matroska", ".mp4": "audio/mp4", ".ra": "audio/vnd.rn-realaudio", ".tta": "audio/x-tta", ".wav": "audio/x-wav", ".wv": "audio/wavpack", ".wma": "audio/x-ms-wma", }, # Applications & Documents "application": { ".swf": "application/x-shockwave-flash", ".pdf": "application/pdf", ".epub": "application/epub+zip", ".djvu": "image/vnd.djvu", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", ".doc": "application/msword", ".xls": "application/vnd.ms-excel", ".ppt": "application/vnd.ms-powerpoint", ".rtf": "application/rtf", }, # Image Project Files "project": { ".clip": "application/clip1", ".kra": "application/x-krita", ".procreate": "application/x-procreate1", ".psd": "image/vnd.adobe.photoshop", ".sai2": "application/sai21", ".svg": "image/svg+xml", ".xcf": "application/x-xcf", }, # Archives "archive": { ".cbz": "application/vnd.comicbook+zip", ".7z": "application/x-7z-compressed", ".gz": "application/gzip", ".rar": "application/vnd.rar", ".zip": "application/zip", }, } # Flatten to get all supported extensions ALL_SUPPORTED_EXTENSIONS = set(GLOBAL_SUPPORTED_EXTENSIONS) # Global Hydrus client cache to reuse session keys _hydrus_client_cache: dict[str, Any] = {} # Cache Hydrus availability across the session _HYDRUS_AVAILABLE: Optional[bool] = None _HYDRUS_UNAVAILABLE_REASON: Optional[str] = None def reset_cache() -> None: """Reset the availability cache (useful for testing).""" global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON _HYDRUS_AVAILABLE = None _HYDRUS_UNAVAILABLE_REASON = None def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool, Optional[str]]: """Check if Hydrus is available and accessible. Performs a lightweight probe to verify: - Hydrus URL is configured - Can connect to Hydrus URL/port Results are cached per session unless use_cache=False. Args: config: Configuration dict with Hydrus settings use_cache: If True, use cached result from previous probe Returns: Tuple of (is_available: bool, reason: Optional[str]) reason is None if available, or an error message if not """ global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON if use_cache and _HYDRUS_AVAILABLE is not None: return _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON # Use new config helpers first, fallback to old method from SYS.config import get_hydrus_url, get_hydrus_access_key url = (get_hydrus_url(config, "home") or "").strip() if not url: reason = "Hydrus URL not configured (check config.conf store.hydrusnetwork.home.URL)" _HYDRUS_AVAILABLE = False _HYDRUS_UNAVAILABLE_REASON = reason return False, reason access_key = get_hydrus_access_key(config, "home") or "" if not access_key: reason = "Hydrus access key not configured" _HYDRUS_AVAILABLE = False _HYDRUS_UNAVAILABLE_REASON = reason return False, reason timeout_raw = config.get("HydrusNetwork_Request_Timeout") try: timeout = float(timeout_raw) if timeout_raw is not None else 5.0 except (TypeError, ValueError): timeout = 5.0 try: # Simple TCP connection test to URL/port import socket from urllib.parse import urlparse parsed = urlparse(url) hostname = parsed.hostname or "localhost" port = parsed.port or (443 if parsed.scheme == "https" else 80) # Try to connect to the host/port sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(timeout) try: result = sock.connect_ex((hostname, port)) if result == 0: _HYDRUS_AVAILABLE = True _HYDRUS_UNAVAILABLE_REASON = None return True, None else: reason = f"Cannot connect to {hostname}:{port}" _HYDRUS_AVAILABLE = False _HYDRUS_UNAVAILABLE_REASON = reason return False, reason finally: sock.close() except Exception as exc: reason = str(exc) _HYDRUS_AVAILABLE = False _HYDRUS_UNAVAILABLE_REASON = reason return False, reason def is_hydrus_available(config: dict[str, Any]) -> bool: """Check if Hydrus is available without raising. Args: config: Configuration dict Returns: True if Hydrus is available, False otherwise """ available, _ = is_available(config) return available def get_client(config: dict[str, Any]) -> HydrusNetwork: """Create and return a Hydrus client. Uses access-key authentication by default (no session key acquisition). A session key may still be acquired explicitly by calling `HydrusNetwork.ensure_session_key()`. Args: config: Configuration dict with Hydrus settings Returns: HydrusClient instance Raises: RuntimeError: If Hydrus is not configured or unavailable """ # Check availability first - if unavailable, raise immediately available, reason = is_available(config) if not available: raise RuntimeError(f"Hydrus is unavailable: {reason}") from SYS.config import get_hydrus_url, get_hydrus_access_key # Use new config helpers hydrus_url = (get_hydrus_url(config, "home") or "").strip() if not hydrus_url: raise RuntimeError( "Hydrus URL is not configured (check config.conf store.hydrusnetwork.home.URL)" ) access_key = get_hydrus_access_key(config, "home") or "" timeout_raw = config.get("HydrusNetwork_Request_Timeout") try: timeout = float(timeout_raw) if timeout_raw is not None else 60.0 except (TypeError, ValueError): timeout = 60.0 # Create cache key from URL and access key cache_key = f"{hydrus_url}#{access_key}" # Check if we have a cached client if cache_key in _hydrus_client_cache: return _hydrus_client_cache[cache_key] # Create new client client = HydrusNetwork(hydrus_url, access_key, timeout) # Cache the client _hydrus_client_cache[cache_key] = client return client def get_tag_service_name(config: dict[str, Any]) -> str: """Get the name of the tag service to use for tagging operations. Currently always returns "my tags" to avoid remote service errors. Args: config: Configuration dict (not currently used) Returns: Service name string, typically "my tags" """ # Always use 'my tags' to avoid remote service errors return "my tags" def get_tag_service_key(client: HydrusNetwork, fallback_name: str = "my tags") -> Optional[str]: """Get the service key for a named tag service. Queries the Hydrus client's services and finds the service key matching the given name. Args: client: HydrusClient instance fallback_name: Name of the service to find (e.g., "my tags") Returns: Service key string if found, None otherwise """ try: services = client.get_services() except Exception: return None if not isinstance(services, dict): return None # Hydrus returns services grouped by type; walk all lists and match on name for group in services.values(): if not isinstance(group, list): continue for item in group: if not isinstance(item, dict): continue name = str(item.get("name") or "").strip().lower() key = item.get("service_key") or item.get("key") if name == fallback_name.lower() and key: return str(key) return None def is_request_error(exc: Exception) -> bool: """Check if an exception is a Hydrus request error. Args: exc: Exception to check Returns: True if this is a HydrusRequestError """ return isinstance(exc, HydrusRequestError) CHUNK_SIZE = 1024 * 1024 # 1 MiB def download_hydrus_file( file_url: str, headers: dict[str, str], destination: Path, timeout: float ) -> int: """Download *file_url* into *destination* returning the byte count with progress bar.""" from SYS.progress import print_progress, print_final_progress downloaded = 0 start_time = time.time() last_update = start_time # Try to get file size from headers if available file_size = None with HTTPClient(timeout=timeout, headers=headers) as client: response = client.get(file_url) response.raise_for_status() # Try to get size from content-length header try: file_size = int(response.headers.get("content-length", 0)) except (ValueError, TypeError): file_size = None filename = destination.name with destination.open("wb") as handle: for chunk in response.iter_bytes(CHUNK_SIZE): if not chunk: break handle.write(chunk) downloaded += len(chunk) # Update progress every 0.5 seconds if we know total size if file_size: now = time.time() if now - last_update >= 0.5: elapsed = now - start_time speed = downloaded / elapsed if elapsed > 0 else 0 print_progress(filename, downloaded, file_size, speed) last_update = now # Print final progress line if we tracked it if file_size: elapsed = time.time() - start_time print_final_progress(filename, file_size, elapsed) return downloaded