This commit is contained in:
nose
2025-12-16 23:23:43 -08:00
parent 9873280f0e
commit 86918f2ae2
46 changed files with 2277 additions and 1347 deletions

View File

@@ -6,6 +6,7 @@ import sys
import shutil
import tempfile
import re
from urllib.parse import urlsplit, parse_qs
import models
import pipeline as ctx
@@ -13,12 +14,20 @@ from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
from Store import Store
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
)
from ._shared import collapse_namespace_tag
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
SharedArgs = sh.SharedArgs
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
merge_sequences = sh.merge_sequences
extract_relationships = sh.extract_relationships
extract_duration = sh.extract_duration
coerce_to_pipe_object = sh.coerce_to_pipe_object
collapse_namespace_tag = sh.collapse_namespace_tag
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
from metadata import write_metadata
@@ -181,7 +190,7 @@ class Add_File(Cmdlet):
downloaded_path = Path(downloaded)
if downloaded_path.exists() and downloaded_path.is_dir():
log(
"[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
"[add-file] OpenLibrary download produced a directory (PDF conversion failed). Cannot ingest.",
file=sys.stderr,
)
failures += 1
@@ -195,12 +204,32 @@ class Add_File(Cmdlet):
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
if code == 0:
successes += 1
else:
failures += 1
continue
# Hydrus file URLs are direct file downloads and may require Hydrus auth headers.
# If the user provided a destination (-provider or -store), download now and continue.
if (provider_name or location) and isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://")):
downloaded = self._try_download_hydrus_file_url(
file_url=str(media_path_or_url),
pipe_obj=pipe_obj,
config=config,
)
if downloaded is not None:
downloaded_path, downloaded_temp_dir = downloaded
temp_dir_to_cleanup = downloaded_temp_dir
media_path_or_url = str(downloaded_path)
pipe_obj.path = str(downloaded_path)
pipe_obj.is_temp = True
delete_after_item = True
# If it's still a URL target, fall back to the legacy delegate.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
if code == 0:
successes += 1
else:
failures += 1
continue
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
@@ -767,6 +796,134 @@ class Add_File(Cmdlet):
return True
return False
@staticmethod
def _sanitize_filename(value: str) -> str:
# Minimal Windows-safe filename sanitization.
text = str(value or "").strip()
if not text:
return "file"
invalid = '<>:"/\\|?*'
text = "".join("_" if (ch in invalid or ord(ch) < 32) else ch for ch in text)
text = re.sub(r"\s+", " ", text).strip(" .")
return text or "file"
@staticmethod
def _parse_hydrus_file_url(file_url: str) -> Optional[str]:
"""Return the sha256 hash from a Hydrus /get_files/file URL, or None."""
try:
split = urlsplit(str(file_url))
if split.scheme.lower() not in {"http", "https"}:
return None
path_lower = (split.path or "").lower()
if "/get_files/file" not in path_lower:
return None
params = parse_qs(split.query or "")
raw = None
if "hash" in params and params["hash"]:
raw = params["hash"][0]
if not raw:
return None
hash_val = str(raw).strip().lower()
if not re.fullmatch(r"[0-9a-f]{64}", hash_val):
return None
return hash_val
except Exception:
return None
def _try_download_hydrus_file_url(
self,
*,
file_url: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Optional[tuple[Path, Path]]:
"""If *file_url* is a Hydrus file URL, download it to temp and return (path, temp_dir)."""
file_hash = self._parse_hydrus_file_url(file_url)
if not file_hash:
return None
# Resolve Hydrus backend for auth.
store_name = str(getattr(pipe_obj, "store", "") or "").strip()
if ":" in store_name:
store_name = store_name.split(":", 1)[-1].strip()
backend = None
try:
store_registry = Store(config)
if store_name and store_registry.is_available(store_name):
candidate = store_registry[store_name]
if type(candidate).__name__.lower() == "hydrusnetwork":
backend = candidate
except Exception:
backend = None
if backend is None:
try:
store_registry = Store(config)
target_prefix = str(file_url).split("/get_files/file", 1)[0].rstrip("/")
for backend_name in store_registry.list_backends():
candidate = store_registry[backend_name]
if type(candidate).__name__.lower() != "hydrusnetwork":
continue
base_url = str(getattr(candidate, "URL", "") or "").rstrip("/")
if base_url and (target_prefix.lower() == base_url.lower() or target_prefix.lower().startswith(base_url.lower())):
backend = candidate
break
except Exception:
backend = None
if backend is None:
debug("[add-file] Hydrus file URL detected but no Hydrus backend matched for auth")
return None
api_key = str(getattr(backend, "API", "") or "").strip()
if not api_key:
debug(f"[add-file] Hydrus backend '{getattr(backend, 'NAME', '') or store_name}' missing API key")
return None
# Best-effort filename from title + ext.
ext = ""
try:
if isinstance(pipe_obj.extra, dict):
ext = str(pipe_obj.extra.get("ext") or "").strip().lstrip(".")
except Exception:
ext = ""
if not ext:
ext = "bin"
title_hint = str(getattr(pipe_obj, "title", "") or "").strip()
base_name = self._sanitize_filename(title_hint) if title_hint else f"hydrus_{file_hash[:12]}"
temp_dir = Path(tempfile.mkdtemp(prefix="medios_hydrus_"))
destination = unique_path(temp_dir / f"{base_name}.{ext}")
headers = {"Hydrus-Client-API-Access-Key": api_key}
timeout = 60.0
try:
client = getattr(backend, "_client", None)
timeout_val = getattr(client, "timeout", None)
if timeout_val is not None:
timeout = float(timeout_val)
except Exception:
timeout = 60.0
try:
log(
f"[add-file] Downloading Hydrus file via API ({getattr(backend, 'NAME', '') or store_name})",
file=sys.stderr,
)
downloaded_bytes = hydrus_wrapper.download_hydrus_file(str(file_url), headers, destination, timeout)
if downloaded_bytes <= 0 and not destination.exists():
return None
return destination, temp_dir
except Exception as exc:
log(f"[add-file] Hydrus download failed: {exc}", file=sys.stderr)
try:
shutil.rmtree(temp_dir, ignore_errors=True)
except Exception:
pass
return None
def _delegate_to_download_data(
self,
result: Any,
@@ -883,6 +1040,61 @@ class Add_File(Cmdlet):
except Exception:
return None
@staticmethod
def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]:
"""Extract a named note text from a piped item.
Supports:
- pipe_obj.extra["notes"][note_name]
- result["notes"][note_name] for dict results
- pipe_obj.extra[note_name] / result[note_name] as fallback
"""
def _normalize(val: Any) -> Optional[str]:
if val is None:
return None
if isinstance(val, bytes):
try:
val = val.decode("utf-8", errors="ignore")
except Exception:
val = str(val)
if isinstance(val, str):
text = val.strip()
return text if text else None
try:
text = str(val).strip()
return text if text else None
except Exception:
return None
note_key = str(note_name or "").strip()
if not note_key:
return None
# Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
try:
if isinstance(pipe_obj.extra, dict):
notes_val = pipe_obj.extra.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in pipe_obj.extra:
return _normalize(pipe_obj.extra.get(note_key))
except Exception:
pass
# Fallback to raw result dict
if isinstance(result, dict):
try:
notes_val = result.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in result:
return _normalize(result.get(note_key))
except Exception:
pass
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
@@ -1451,6 +1663,26 @@ class Add_File(Cmdlet):
except Exception:
pass
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
sub_note = Add_File._get_note_text(result, pipe_obj, "sub")
if sub_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "sub", sub_note)
except Exception:
pass
chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
if chapters_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "chapters", chapters_note)
except Exception:
pass
meta: Dict[str, Any] = {}
try:
meta = backend.get_metadata(resolved_hash) or {}