df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
This commit is contained in:
@@ -373,6 +373,29 @@ class HydrusNetwork:
|
|||||||
body = {"hashes": hash_list}
|
body = {"hashes": hash_list}
|
||||||
return self._post("/add_files/undelete_files", data=body)
|
return self._post("/add_files/undelete_files", data=body)
|
||||||
|
|
||||||
|
def delete_files(self, hashes: Union[str, Iterable[str]], *, reason: str | None = None) -> dict[str, Any]:
|
||||||
|
"""Delete files in Hydrus.
|
||||||
|
|
||||||
|
Hydrus Client API: POST /add_files/delete_files
|
||||||
|
Required JSON args: {"hashes": [<sha256 hex>, ...]}
|
||||||
|
Optional JSON args: {"reason": "..."}
|
||||||
|
"""
|
||||||
|
hash_list = self._ensure_hashes(hashes)
|
||||||
|
body: dict[str, Any] = {"hashes": hash_list}
|
||||||
|
if isinstance(reason, str) and reason.strip():
|
||||||
|
body["reason"] = reason.strip()
|
||||||
|
return self._post("/add_files/delete_files", data=body)
|
||||||
|
|
||||||
|
def clear_file_deletion_record(self, hashes: Union[str, Iterable[str]]) -> dict[str, Any]:
|
||||||
|
"""Clear Hydrus's file deletion record for the provided hashes.
|
||||||
|
|
||||||
|
Hydrus Client API: POST /add_files/clear_file_deletion_record
|
||||||
|
Required JSON args: {"hashes": [<sha256 hex>, ...]}
|
||||||
|
"""
|
||||||
|
hash_list = self._ensure_hashes(hashes)
|
||||||
|
body = {"hashes": hash_list}
|
||||||
|
return self._post("/add_files/clear_file_deletion_record", data=body)
|
||||||
|
|
||||||
def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
|
def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
|
||||||
hash = self._ensure_hashes(hash)
|
hash = self._ensure_hashes(hash)
|
||||||
body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
|
body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
|
||||||
|
|||||||
47
CLI.py
47
CLI.py
@@ -1012,6 +1012,14 @@ class CmdletExecutor:
|
|||||||
|
|
||||||
ensure_registry_loaded()
|
ensure_registry_loaded()
|
||||||
|
|
||||||
|
# REPL guard: stage-local selection tables should not leak across independent
|
||||||
|
# commands. @ selection can always re-seed from the last result table.
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "set_current_stage_table"):
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
cmd_fn = REGISTRY.get(cmd_name)
|
cmd_fn = REGISTRY.get(cmd_name)
|
||||||
if not cmd_fn:
|
if not cmd_fn:
|
||||||
# Lazy-import module and register its CMDLET.
|
# Lazy-import module and register its CMDLET.
|
||||||
@@ -1451,6 +1459,13 @@ class CmdletExecutor:
|
|||||||
ctx.set_live_progress(None)
|
ctx.set_live_progress(None)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
# Do not keep stage tables around after a single command; it can cause
|
||||||
|
# later @ selections to bind to stale tables (e.g. old add-file scans).
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "set_current_stage_table"):
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
if hasattr(ctx, "clear_current_cmdlet_name"):
|
if hasattr(ctx, "clear_current_cmdlet_name"):
|
||||||
ctx.clear_current_cmdlet_name()
|
ctx.clear_current_cmdlet_name()
|
||||||
@@ -2027,6 +2042,9 @@ class PipelineExecutor:
|
|||||||
elif table_type == "bandcamp":
|
elif table_type == "bandcamp":
|
||||||
print("Auto-running Bandcamp selection via download-media")
|
print("Auto-running Bandcamp selection via download-media")
|
||||||
stages.append(["download-media"])
|
stages.append(["download-media"])
|
||||||
|
elif table_type == "internetarchive":
|
||||||
|
print("Auto-loading Internet Archive item via download-data")
|
||||||
|
stages.append(["download-data"])
|
||||||
elif table_type in {"soulseek", "openlibrary", "libgen"}:
|
elif table_type in {"soulseek", "openlibrary", "libgen"}:
|
||||||
print("Auto-piping selection to download-file")
|
print("Auto-piping selection to download-file")
|
||||||
stages.append(["download-file"])
|
stages.append(["download-file"])
|
||||||
@@ -2056,6 +2074,16 @@ class PipelineExecutor:
|
|||||||
):
|
):
|
||||||
print("Auto-inserting download-media after Bandcamp selection")
|
print("Auto-inserting download-media after Bandcamp selection")
|
||||||
stages.insert(0, ["download-media"])
|
stages.insert(0, ["download-media"])
|
||||||
|
if table_type == "internetarchive" and first_cmd not in (
|
||||||
|
"download-data",
|
||||||
|
"download_data",
|
||||||
|
"download-file",
|
||||||
|
"download-media",
|
||||||
|
"download_media",
|
||||||
|
".pipe",
|
||||||
|
):
|
||||||
|
debug("Auto-inserting download-data after Internet Archive selection")
|
||||||
|
stages.insert(0, ["download-data"])
|
||||||
if table_type == "libgen" and first_cmd not in (
|
if table_type == "libgen" and first_cmd not in (
|
||||||
"download-file",
|
"download-file",
|
||||||
"download-media",
|
"download-media",
|
||||||
@@ -2166,6 +2194,14 @@ class PipelineExecutor:
|
|||||||
try:
|
try:
|
||||||
self._try_clear_pipeline_stop(ctx)
|
self._try_clear_pipeline_stop(ctx)
|
||||||
|
|
||||||
|
# REPL guard: stage-local tables should not persist across independent
|
||||||
|
# commands. Selection stages can always seed from last/display tables.
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "set_current_stage_table"):
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Preflight (URL-duplicate prompts, etc.) should be cached within a single
|
# Preflight (URL-duplicate prompts, etc.) should be cached within a single
|
||||||
# pipeline run, not across independent pipelines.
|
# pipeline run, not across independent pipelines.
|
||||||
try:
|
try:
|
||||||
@@ -2615,11 +2651,13 @@ class PipelineExecutor:
|
|||||||
if (
|
if (
|
||||||
(not stage_is_last)
|
(not stage_is_last)
|
||||||
and (not emits)
|
and (not emits)
|
||||||
and cmd_name in {"download-media", "download_media"}
|
and cmd_name in {"download-media", "download_media", "download-data", "download_data"}
|
||||||
and stage_table is not None
|
and stage_table is not None
|
||||||
and (
|
and (
|
||||||
stage_table_type in {"ytdlp.formatlist", "download-media", "download_media", "bandcamp", "youtube"}
|
stage_table_type in {"ytdlp.formatlist", "download-media", "download_media", "bandcamp", "youtube"}
|
||||||
or stage_table_source in {"download-media", "download_media"}
|
or stage_table_source in {"download-media", "download_media"}
|
||||||
|
or stage_table_type in {"internetarchive.formats"}
|
||||||
|
or stage_table_source in {"download-file"}
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
@@ -2812,6 +2850,13 @@ class PipelineExecutor:
|
|||||||
_pipeline_ctx.set_live_progress(None)
|
_pipeline_ctx.set_live_progress(None)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
# End-of-command cleanup: avoid leaking current stage tables into
|
||||||
|
# the next REPL command (causes stale @ selection sources).
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "set_current_stage_table"):
|
||||||
|
ctx.set_current_stage_table(None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
if pipeline_session:
|
if pipeline_session:
|
||||||
pipeline_session.close(status=pipeline_status, error_msg=pipeline_error)
|
pipeline_session.close(status=pipeline_status, error_msg=pipeline_error)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|||||||
@@ -220,7 +220,6 @@ class InternetArchive(Provider):
|
|||||||
"mediatype",
|
"mediatype",
|
||||||
"creator",
|
"creator",
|
||||||
"date",
|
"date",
|
||||||
"downloads",
|
|
||||||
"collection",
|
"collection",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -243,7 +242,11 @@ class InternetArchive(Provider):
|
|||||||
|
|
||||||
title = str(row.get("title") or identifier).strip() or identifier
|
title = str(row.get("title") or identifier).strip() or identifier
|
||||||
mediatype = str(row.get("mediatype") or "").strip()
|
mediatype = str(row.get("mediatype") or "").strip()
|
||||||
creator = str(row.get("creator") or "").strip()
|
creator_raw = row.get("creator")
|
||||||
|
if isinstance(creator_raw, list):
|
||||||
|
creator = ", ".join(str(x) for x in creator_raw if x)
|
||||||
|
else:
|
||||||
|
creator = str(creator_raw or "").strip()
|
||||||
date = str(row.get("date") or "").strip()
|
date = str(row.get("date") or "").strip()
|
||||||
|
|
||||||
annotations: List[str] = []
|
annotations: List[str] = []
|
||||||
@@ -272,9 +275,10 @@ class InternetArchive(Provider):
|
|||||||
size_bytes=None,
|
size_bytes=None,
|
||||||
tag=set(),
|
tag=set(),
|
||||||
columns=[
|
columns=[
|
||||||
("identifier", identifier),
|
("title", title),
|
||||||
("mediatype", mediatype),
|
("mediatype", mediatype),
|
||||||
("date", date),
|
("date", date),
|
||||||
|
("creator", creator),
|
||||||
],
|
],
|
||||||
full_metadata=dict(row),
|
full_metadata=dict(row),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,6 +15,115 @@ from ProviderCore.base import Provider
|
|||||||
_MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
|
_MATRIX_INIT_CHECK_CACHE: Dict[str, Tuple[bool, Optional[str]]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _sniff_mime_from_header(path: Path) -> Optional[str]:
|
||||||
|
"""Best-effort MIME sniffing from file headers.
|
||||||
|
|
||||||
|
Used when the file has no/unknown extension (common for exported/temp files).
|
||||||
|
Keeps dependencies to stdlib only.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not path.exists() or not path.is_file():
|
||||||
|
return None
|
||||||
|
with open(path, "rb") as handle:
|
||||||
|
header = handle.read(512)
|
||||||
|
if not header:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Images
|
||||||
|
if header.startswith(b"\xFF\xD8\xFF"):
|
||||||
|
return "image/jpeg"
|
||||||
|
if header.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||||
|
return "image/png"
|
||||||
|
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
|
||||||
|
return "image/gif"
|
||||||
|
if header.startswith(b"BM"):
|
||||||
|
return "image/bmp"
|
||||||
|
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
|
||||||
|
return "image/webp"
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
if header.startswith(b"fLaC"):
|
||||||
|
return "audio/flac"
|
||||||
|
if header.startswith(b"OggS"):
|
||||||
|
# Could be audio or video; treat as audio unless extension suggests video.
|
||||||
|
return "audio/ogg"
|
||||||
|
if header.startswith(b"ID3"):
|
||||||
|
return "audio/mpeg"
|
||||||
|
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
|
||||||
|
return "audio/mpeg"
|
||||||
|
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
|
||||||
|
return "audio/wav"
|
||||||
|
|
||||||
|
# Video
|
||||||
|
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
|
||||||
|
return "video/x-msvideo"
|
||||||
|
if header.startswith(b"\x1A\x45\xDF\xA3"):
|
||||||
|
# EBML container: Matroska/WebM.
|
||||||
|
return "video/x-matroska"
|
||||||
|
if len(header) >= 12 and header[4:8] == b"ftyp":
|
||||||
|
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
|
||||||
|
return "video/mp4"
|
||||||
|
# MPEG-TS / M2TS (sync byte every 188 bytes)
|
||||||
|
try:
|
||||||
|
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
|
||||||
|
with open(path, "rb") as handle:
|
||||||
|
handle.seek(188)
|
||||||
|
b = handle.read(1)
|
||||||
|
if b == b"\x47":
|
||||||
|
return "video/mp2t"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_matrix_upload(path: Path, *, explicit_mime_type: Optional[str] = None) -> Tuple[str, str]:
|
||||||
|
"""Return (mime_type, msgtype) for Matrix uploads."""
|
||||||
|
mime_type = str(explicit_mime_type or "").strip() or None
|
||||||
|
|
||||||
|
if not mime_type:
|
||||||
|
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
|
||||||
|
mime_type, _ = mimetypes.guess_type(str(path))
|
||||||
|
|
||||||
|
if not mime_type:
|
||||||
|
mime_type = _sniff_mime_from_header(path)
|
||||||
|
|
||||||
|
# Refinements based on extension for ambiguous containers.
|
||||||
|
ext = path.suffix.lower()
|
||||||
|
if ext in {".m4a", ".aac"}:
|
||||||
|
mime_type = mime_type or "audio/mp4"
|
||||||
|
if ext in {".mkv", ".webm"}:
|
||||||
|
mime_type = mime_type or "video/x-matroska"
|
||||||
|
if ext in {".ogv"}:
|
||||||
|
mime_type = mime_type or "video/ogg"
|
||||||
|
|
||||||
|
msgtype = "m.file"
|
||||||
|
if mime_type:
|
||||||
|
mt = mime_type.casefold()
|
||||||
|
if mt.startswith("image/"):
|
||||||
|
msgtype = "m.image"
|
||||||
|
elif mt.startswith("audio/"):
|
||||||
|
msgtype = "m.audio"
|
||||||
|
elif mt.startswith("video/"):
|
||||||
|
msgtype = "m.video"
|
||||||
|
|
||||||
|
# Final fallback for unknown MIME types.
|
||||||
|
if msgtype == "m.file":
|
||||||
|
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
|
||||||
|
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv", ".m2ts", ".mts", ".3gp", ".ogv"}
|
||||||
|
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
|
||||||
|
if ext in audio_exts:
|
||||||
|
msgtype = "m.audio"
|
||||||
|
elif ext in video_exts:
|
||||||
|
msgtype = "m.video"
|
||||||
|
elif ext in image_exts:
|
||||||
|
msgtype = "m.image"
|
||||||
|
|
||||||
|
return (mime_type or "application/octet-stream"), msgtype
|
||||||
|
|
||||||
|
|
||||||
def _normalize_homeserver(value: str) -> str:
|
def _normalize_homeserver(value: str) -> str:
|
||||||
text = str(value or "").strip()
|
text = str(value or "").strip()
|
||||||
if not text:
|
if not text:
|
||||||
@@ -189,9 +298,8 @@ class Matrix(Provider):
|
|||||||
"Content-Type": "application/octet-stream",
|
"Content-Type": "application/octet-stream",
|
||||||
}
|
}
|
||||||
|
|
||||||
mime_type, _ = mimetypes.guess_type(path)
|
mime_type, msgtype = _classify_matrix_upload(path, explicit_mime_type=kwargs.get("mime_type"))
|
||||||
if mime_type:
|
headers["Content-Type"] = mime_type
|
||||||
headers["Content-Type"] = mime_type
|
|
||||||
|
|
||||||
filename = path.name
|
filename = path.name
|
||||||
|
|
||||||
@@ -222,19 +330,6 @@ class Matrix(Provider):
|
|||||||
except Exception:
|
except Exception:
|
||||||
download_url_for_store = ""
|
download_url_for_store = ""
|
||||||
|
|
||||||
# Determine message type
|
|
||||||
msgtype = "m.file"
|
|
||||||
ext = path.suffix.lower()
|
|
||||||
audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"}
|
|
||||||
video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv"}
|
|
||||||
image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
|
|
||||||
if ext in audio_exts:
|
|
||||||
msgtype = "m.audio"
|
|
||||||
elif ext in video_exts:
|
|
||||||
msgtype = "m.video"
|
|
||||||
elif ext in image_exts:
|
|
||||||
msgtype = "m.image"
|
|
||||||
|
|
||||||
info = {"mimetype": mime_type, "size": path.stat().st_size}
|
info = {"mimetype": mime_type, "size": path.stat().st_size}
|
||||||
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
|
payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info}
|
||||||
|
|
||||||
|
|||||||
@@ -955,6 +955,39 @@ class HydrusNetwork(Store):
|
|||||||
debug(f"{self._log_prefix()} get_file: url={browser_url}")
|
debug(f"{self._log_prefix()} get_file: url={browser_url}")
|
||||||
return browser_url
|
return browser_url
|
||||||
|
|
||||||
|
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
||||||
|
"""Delete a file from Hydrus, then clear the deletion record.
|
||||||
|
|
||||||
|
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
client = self._client
|
||||||
|
if client is None:
|
||||||
|
debug(f"{self._log_prefix()} delete_file: client unavailable")
|
||||||
|
return False
|
||||||
|
|
||||||
|
file_hash = str(file_identifier or "").strip().lower()
|
||||||
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||||
|
debug(f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'")
|
||||||
|
return False
|
||||||
|
|
||||||
|
reason = kwargs.get("reason")
|
||||||
|
reason_text = str(reason).strip() if isinstance(reason, str) and reason.strip() else None
|
||||||
|
|
||||||
|
# 1) Delete file
|
||||||
|
client.delete_files([file_hash], reason=reason_text)
|
||||||
|
|
||||||
|
# 2) Clear deletion record (best-effort)
|
||||||
|
try:
|
||||||
|
client.clear_file_deletion_record([file_hash])
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"{self._log_prefix()} delete_file failed: {exc}")
|
||||||
|
return False
|
||||||
|
|
||||||
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
||||||
"""Get metadata for a file from Hydrus by hash.
|
"""Get metadata for a file from Hydrus by hash.
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,13 @@ def _normalize_title_for_extract(text: str) -> str:
|
|||||||
s = s.replace("\u2011", "-") # non-breaking hyphen
|
s = s.replace("\u2011", "-") # non-breaking hyphen
|
||||||
s = s.replace("\u2012", "-") # figure dash
|
s = s.replace("\u2012", "-") # figure dash
|
||||||
s = s.replace("\u2015", "-") # horizontal bar
|
s = s.replace("\u2015", "-") # horizontal bar
|
||||||
|
|
||||||
|
# Collapse any whitespace runs (including newlines/tabs) to a single space.
|
||||||
|
# Some sources wrap the artist name or title across lines.
|
||||||
|
try:
|
||||||
|
s = re.sub(r"\s+", " ", s).strip()
|
||||||
|
except Exception:
|
||||||
|
s = " ".join(s.split())
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
@@ -70,7 +77,7 @@ def _literal_to_title_pattern_regex(literal: str) -> str:
|
|||||||
if ch.isspace():
|
if ch.isspace():
|
||||||
while i < len(literal) and literal[i].isspace():
|
while i < len(literal) and literal[i].isspace():
|
||||||
i += 1
|
i += 1
|
||||||
out.append(r"\\s*")
|
out.append(r"\s*")
|
||||||
continue
|
continue
|
||||||
out.append(re.escape(ch))
|
out.append(re.escape(ch))
|
||||||
i += 1
|
i += 1
|
||||||
@@ -95,7 +102,7 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
|||||||
raise ValueError("extract template must contain at least one (field)")
|
raise ValueError("extract template must contain at least one (field)")
|
||||||
|
|
||||||
field_names: List[str] = []
|
field_names: List[str] = []
|
||||||
parts: List[str] = [r"^\\s*"]
|
parts: List[str] = [r"^\s*"]
|
||||||
last_end = 0
|
last_end = 0
|
||||||
|
|
||||||
for idx, m in enumerate(matches):
|
for idx, m in enumerate(matches):
|
||||||
@@ -108,18 +115,24 @@ def _compile_extract_template(template: str) -> tuple[re.Pattern[str], List[str]
|
|||||||
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
|
raise ValueError(f"invalid field name '{raw_name}' (use A-Z, 0-9, underscore)")
|
||||||
field_names.append(raw_name)
|
field_names.append(raw_name)
|
||||||
|
|
||||||
|
name_lower = raw_name.lower()
|
||||||
is_last = idx == (len(matches) - 1)
|
is_last = idx == (len(matches) - 1)
|
||||||
if is_last:
|
if is_last:
|
||||||
parts.append(fr"(?P<{raw_name}>.+)")
|
parts.append(fr"(?P<{raw_name}>.+)")
|
||||||
else:
|
else:
|
||||||
parts.append(fr"(?P<{raw_name}>.+?)")
|
# Heuristic: common numeric fields should capture full digit runs.
|
||||||
|
# This avoids ambiguous splits like track='2', title='3 ...'.
|
||||||
|
if name_lower in {"disk", "disc", "cd", "track", "trk", "episode", "ep", "season", "year"}:
|
||||||
|
parts.append(fr"(?P<{raw_name}>\d+)")
|
||||||
|
else:
|
||||||
|
parts.append(fr"(?P<{raw_name}>.+?)")
|
||||||
|
|
||||||
last_end = m.end()
|
last_end = m.end()
|
||||||
|
|
||||||
tail = tpl[last_end:]
|
tail = tpl[last_end:]
|
||||||
if tail:
|
if tail:
|
||||||
parts.append(_literal_to_title_pattern_regex(tail))
|
parts.append(_literal_to_title_pattern_regex(tail))
|
||||||
parts.append(r"\\s*$")
|
parts.append(r"\s*$")
|
||||||
|
|
||||||
rx = "".join(parts)
|
rx = "".join(parts)
|
||||||
return re.compile(rx, flags=re.IGNORECASE), field_names
|
return re.compile(rx, flags=re.IGNORECASE), field_names
|
||||||
|
|||||||
426
cmdlet/archive_file.py
Normal file
426
cmdlet/archive_file.py
Normal file
@@ -0,0 +1,426 @@
|
|||||||
|
"""Create a single .tar.zst archive from piped file selections."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Sequence, Set
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
|
from SYS.logger import log
|
||||||
|
|
||||||
|
import pipeline as ctx
|
||||||
|
from config import resolve_output_dir
|
||||||
|
from . import _shared as sh
|
||||||
|
|
||||||
|
Cmdlet = sh.Cmdlet
|
||||||
|
CmdletArg = sh.CmdletArg
|
||||||
|
SharedArgs = sh.SharedArgs
|
||||||
|
coerce_to_pipe_object = sh.coerce_to_pipe_object
|
||||||
|
create_pipe_object_result = sh.create_pipe_object_result
|
||||||
|
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||||
|
should_show_help = sh.should_show_help
|
||||||
|
|
||||||
|
|
||||||
|
_SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_sha256_hex(item: Any) -> str:
|
||||||
|
try:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
h = item.get("hash")
|
||||||
|
else:
|
||||||
|
h = getattr(item, "hash", None)
|
||||||
|
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
|
||||||
|
return h.strip().lower()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_store_name(item: Any) -> str:
|
||||||
|
try:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
s = item.get("store")
|
||||||
|
else:
|
||||||
|
s = getattr(item, "store", None)
|
||||||
|
return str(s or "").strip()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_url(item: Any) -> str:
|
||||||
|
try:
|
||||||
|
u = sh.get_field(item, "url") or sh.get_field(item, "target")
|
||||||
|
if isinstance(u, str) and u.strip().lower().startswith(("http://", "https://")):
|
||||||
|
return u.strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_hash_from_hydrus_file_url(url: str) -> str:
|
||||||
|
try:
|
||||||
|
parsed = urlparse(str(url))
|
||||||
|
if not (parsed.path or "").endswith("/get_files/file"):
|
||||||
|
return ""
|
||||||
|
qs = parse_qs(parsed.query or "")
|
||||||
|
h = (qs.get("hash") or [""])[0]
|
||||||
|
if isinstance(h, str) and _SHA256_RE.fullmatch(h.strip()):
|
||||||
|
return h.strip().lower()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
|
||||||
|
instances: Set[str] = set()
|
||||||
|
try:
|
||||||
|
store_cfg = config.get("store") if isinstance(config, dict) else None
|
||||||
|
if isinstance(store_cfg, dict):
|
||||||
|
hydrus_cfg = store_cfg.get("hydrusnetwork")
|
||||||
|
if isinstance(hydrus_cfg, dict):
|
||||||
|
instances = {str(k).strip().lower() for k in hydrus_cfg.keys() if str(k).strip()}
|
||||||
|
except Exception:
|
||||||
|
instances = set()
|
||||||
|
return instances
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_download_hydrus_item(item: Any, config: Dict[str, Any], output_dir: Path) -> Path | None:
|
||||||
|
"""Download a Hydrus-backed item to a local temp path (best-effort).
|
||||||
|
|
||||||
|
This is intentionally side-effect free except for writing the local temp file.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from config import get_hydrus_access_key, get_hydrus_url
|
||||||
|
from API.HydrusNetwork import HydrusNetwork as HydrusClient, download_hydrus_file
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
store_name = _extract_store_name(item)
|
||||||
|
store_lower = store_name.lower()
|
||||||
|
hydrus_instances = _hydrus_instance_names(config)
|
||||||
|
store_hint = store_lower in {"hydrus", "hydrusnetwork"} or (store_lower in hydrus_instances)
|
||||||
|
|
||||||
|
url = _extract_url(item)
|
||||||
|
file_hash = _extract_sha256_hex(item) or (_extract_hash_from_hydrus_file_url(url) if url else "")
|
||||||
|
if not file_hash:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
|
||||||
|
is_hydrus_url = False
|
||||||
|
if url:
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
is_hydrus_url = (parsed.path or "").endswith("/get_files/file") and _extract_hash_from_hydrus_file_url(url) == file_hash
|
||||||
|
except Exception:
|
||||||
|
is_hydrus_url = False
|
||||||
|
if not (is_hydrus_url or store_hint):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Prefer store name as instance key; fall back to "home".
|
||||||
|
access_key = None
|
||||||
|
hydrus_url = None
|
||||||
|
for inst in [s for s in [store_lower, "home"] if s]:
|
||||||
|
try:
|
||||||
|
access_key = (get_hydrus_access_key(config, inst) or "").strip() or None
|
||||||
|
hydrus_url = (get_hydrus_url(config, inst) or "").strip() or None
|
||||||
|
if access_key and hydrus_url:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
access_key = None
|
||||||
|
hydrus_url = None
|
||||||
|
|
||||||
|
if not access_key or not hydrus_url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
client = HydrusClient(url=hydrus_url, access_key=access_key, timeout=60.0)
|
||||||
|
file_url = url if (url and is_hydrus_url) else client.file_url(file_hash)
|
||||||
|
|
||||||
|
# Best-effort extension from Hydrus metadata.
|
||||||
|
suffix = ".hydrus"
|
||||||
|
try:
|
||||||
|
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
|
||||||
|
entries = meta_response.get("metadata") if isinstance(meta_response, dict) else None
|
||||||
|
if isinstance(entries, list) and entries:
|
||||||
|
entry = entries[0]
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
ext = entry.get("ext")
|
||||||
|
if isinstance(ext, str) and ext.strip():
|
||||||
|
cleaned = ext.strip()
|
||||||
|
if not cleaned.startswith("."):
|
||||||
|
cleaned = "." + cleaned.lstrip(".")
|
||||||
|
if len(cleaned) <= 12:
|
||||||
|
suffix = cleaned
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
dest = output_dir / f"{file_hash}{suffix}"
|
||||||
|
if dest.exists():
|
||||||
|
dest = output_dir / f"{file_hash}_{uuid.uuid4().hex[:10]}{suffix}"
|
||||||
|
|
||||||
|
headers = {"Hydrus-Client-API-Access-Key": access_key}
|
||||||
|
download_hydrus_file(file_url, headers, dest, timeout=60.0)
|
||||||
|
try:
|
||||||
|
if dest.exists() and dest.is_file():
|
||||||
|
return dest
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_existing_or_fetch_path(item: Any, config: Dict[str, Any]) -> tuple[Path | None, Path | None]:
|
||||||
|
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
|
||||||
|
# 1) Direct local path
|
||||||
|
try:
|
||||||
|
po = coerce_to_pipe_object(item, None)
|
||||||
|
raw_path = getattr(po, "path", None) or getattr(po, "target", None) or sh.get_pipe_object_path(item)
|
||||||
|
if raw_path:
|
||||||
|
p = Path(str(raw_path)).expanduser()
|
||||||
|
if p.exists():
|
||||||
|
return p, None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2) Store-backed path
|
||||||
|
file_hash = _extract_sha256_hex(item)
|
||||||
|
store_name = _extract_store_name(item)
|
||||||
|
if file_hash and store_name:
|
||||||
|
try:
|
||||||
|
from Store import Store
|
||||||
|
|
||||||
|
store = Store(config)
|
||||||
|
backend = store[store_name]
|
||||||
|
src = backend.get_file(file_hash)
|
||||||
|
if isinstance(src, Path):
|
||||||
|
if src.exists():
|
||||||
|
return src, None
|
||||||
|
elif isinstance(src, str) and src.strip():
|
||||||
|
cand = Path(src).expanduser()
|
||||||
|
if cand.exists():
|
||||||
|
return cand, None
|
||||||
|
# If the backend returns a URL (HydrusNetwork), download it.
|
||||||
|
if src.strip().lower().startswith(("http://", "https://")):
|
||||||
|
tmp_base = None
|
||||||
|
try:
|
||||||
|
tmp_base = config.get("temp") if isinstance(config, dict) else None
|
||||||
|
except Exception:
|
||||||
|
tmp_base = None
|
||||||
|
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
|
||||||
|
out_dir = out_dir / "archive" / "hydrus"
|
||||||
|
downloaded = _maybe_download_hydrus_item({"hash": file_hash, "store": store_name, "url": src.strip()}, config, out_dir)
|
||||||
|
if downloaded is not None:
|
||||||
|
return downloaded, downloaded
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 3) Hydrus-backed items without backend.get_file path.
|
||||||
|
try:
|
||||||
|
tmp_base = config.get("temp") if isinstance(config, dict) else None
|
||||||
|
except Exception:
|
||||||
|
tmp_base = None
|
||||||
|
out_dir = Path(str(tmp_base)).expanduser() if tmp_base else (Path(tempfile.gettempdir()) / "Medios-Macina")
|
||||||
|
out_dir = out_dir / "archive" / "hydrus"
|
||||||
|
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
|
||||||
|
if downloaded is not None:
|
||||||
|
return downloaded, downloaded
|
||||||
|
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_arcname(name: str, seen: Set[str]) -> str:
|
||||||
|
base = str(name or "").replace("\\", "/")
|
||||||
|
base = base.lstrip("/")
|
||||||
|
if not base:
|
||||||
|
base = "file"
|
||||||
|
if base not in seen:
|
||||||
|
seen.add(base)
|
||||||
|
return base
|
||||||
|
|
||||||
|
stem = base
|
||||||
|
suffix = ""
|
||||||
|
if "/" not in base:
|
||||||
|
p = Path(base)
|
||||||
|
stem = p.stem
|
||||||
|
suffix = p.suffix
|
||||||
|
|
||||||
|
n = 2
|
||||||
|
while True:
|
||||||
|
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
|
||||||
|
if candidate not in seen:
|
||||||
|
seen.add(candidate)
|
||||||
|
return candidate
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
|
||||||
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
if should_show_help(args):
|
||||||
|
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
parsed = parse_cmdlet_args(args, CMDLET)
|
||||||
|
|
||||||
|
level_raw = parsed.get("level")
|
||||||
|
try:
|
||||||
|
level = int(level_raw) if level_raw is not None else 11
|
||||||
|
except Exception:
|
||||||
|
level = 11
|
||||||
|
if level < 1:
|
||||||
|
level = 1
|
||||||
|
if level > 22:
|
||||||
|
level = 22
|
||||||
|
|
||||||
|
# Output destination is controlled by the shared -path behavior in the pipeline runner.
|
||||||
|
# This cmdlet always creates the archive in the configured output directory and emits it.
|
||||||
|
|
||||||
|
# Collect piped items; archive-file is a batch command (single output).
|
||||||
|
items: List[Any] = []
|
||||||
|
if isinstance(result, list):
|
||||||
|
items = list(result)
|
||||||
|
elif result is not None:
|
||||||
|
items = [result]
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
log("No piped items provided to archive-file", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
temp_downloads: List[Path] = []
|
||||||
|
try:
|
||||||
|
paths: List[Path] = []
|
||||||
|
for it in items:
|
||||||
|
p, tmp = _resolve_existing_or_fetch_path(it, config)
|
||||||
|
if p is None:
|
||||||
|
continue
|
||||||
|
paths.append(p)
|
||||||
|
if tmp is not None:
|
||||||
|
temp_downloads.append(tmp)
|
||||||
|
|
||||||
|
# Keep stable order, remove duplicates.
|
||||||
|
uniq: List[Path] = []
|
||||||
|
seen_paths: Set[str] = set()
|
||||||
|
for p in paths:
|
||||||
|
key = str(p.resolve()) if p.exists() else str(p)
|
||||||
|
if key in seen_paths:
|
||||||
|
continue
|
||||||
|
seen_paths.add(key)
|
||||||
|
uniq.append(p)
|
||||||
|
paths = uniq
|
||||||
|
|
||||||
|
if not paths:
|
||||||
|
log("No existing file paths found in piped items", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
out_dir = resolve_output_dir(config)
|
||||||
|
try:
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
stamp = time.strftime("%Y%m%d_%H%M%S")
|
||||||
|
out_path = out_dir / f"archive_{stamp}.tar.zst"
|
||||||
|
try:
|
||||||
|
out_path = sh._unique_destination_path(out_path) # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to create output directory: {out_path.parent} ({exc})", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Import zstandard lazily so the rest of the CLI still runs without it.
|
||||||
|
try:
|
||||||
|
import zstandard as zstd # type: ignore
|
||||||
|
except Exception:
|
||||||
|
log("Missing dependency: zstandard (pip install zstandard)", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Write tar stream into zstd stream.
|
||||||
|
try:
|
||||||
|
with open(out_path, "wb") as out_handle:
|
||||||
|
cctx = zstd.ZstdCompressor(level=level)
|
||||||
|
with cctx.stream_writer(out_handle) as compressor:
|
||||||
|
with tarfile.open(fileobj=compressor, mode="w|", format=tarfile.PAX_FORMAT) as tf:
|
||||||
|
seen_names: Set[str] = set()
|
||||||
|
for p in paths:
|
||||||
|
arcname = _unique_arcname(p.name, seen_names)
|
||||||
|
# For directories, tarfile will include contents when recursive=True.
|
||||||
|
try:
|
||||||
|
tf.add(str(p), arcname=arcname, recursive=True)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Failed to add to archive: {p} ({exc})", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"Archive creation failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Emit a single artifact downstream.
|
||||||
|
hash_value = None
|
||||||
|
try:
|
||||||
|
from SYS.utils import sha256_file
|
||||||
|
|
||||||
|
hash_value = sha256_file(out_path)
|
||||||
|
except Exception:
|
||||||
|
hash_value = None
|
||||||
|
|
||||||
|
pipe_obj = create_pipe_object_result(
|
||||||
|
source="archive",
|
||||||
|
identifier=out_path.stem,
|
||||||
|
file_path=str(out_path),
|
||||||
|
cmdlet_name="archive-file",
|
||||||
|
title=out_path.name,
|
||||||
|
hash_value=hash_value,
|
||||||
|
is_temp=True,
|
||||||
|
store="PATH",
|
||||||
|
extra={
|
||||||
|
"target": str(out_path),
|
||||||
|
"archive_format": "tar.zst",
|
||||||
|
"compression": "zstd",
|
||||||
|
"level": level,
|
||||||
|
"source_count": len(paths),
|
||||||
|
"source_paths": [str(p) for p in paths],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
ctx.emit(pipe_obj)
|
||||||
|
return 0
|
||||||
|
finally:
|
||||||
|
# Best-effort cleanup of any temp Hydrus downloads we created.
|
||||||
|
for tmp in temp_downloads:
|
||||||
|
try:
|
||||||
|
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
|
||||||
|
except TypeError:
|
||||||
|
try:
|
||||||
|
if tmp.exists():
|
||||||
|
tmp.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Cmdlet(
|
||||||
|
name="archive-file",
|
||||||
|
summary="Archive piped files into a single .tar.zst.",
|
||||||
|
usage="@N | archive-file [-level <1-22>] [-path <path>]",
|
||||||
|
arg=[
|
||||||
|
CmdletArg("-level", type="integer", description="Zstandard compression level (default: 11)."),
|
||||||
|
SharedArgs.PATH,
|
||||||
|
],
|
||||||
|
detail=[
|
||||||
|
"- Example: @1-5 | archive-file",
|
||||||
|
"- Default zstd level is 11.",
|
||||||
|
"- Emits one output item (the archive) for downstream piping.",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
CMDLET.exec = _run
|
||||||
|
CMDLET.register()
|
||||||
@@ -297,51 +297,81 @@ class Delete_File(sh.Cmdlet):
|
|||||||
should_try_hydrus = False
|
should_try_hydrus = False
|
||||||
|
|
||||||
if should_try_hydrus and hash_hex:
|
if should_try_hydrus and hash_hex:
|
||||||
client = None
|
# Prefer deleting via the resolved store backend when it is a HydrusNetwork store.
|
||||||
if store:
|
# This ensures store-specific post-delete hooks run (e.g., clearing Hydrus deletion records).
|
||||||
# Store specified: do not fall back to a global/default Hydrus client.
|
did_backend_delete = False
|
||||||
try:
|
|
||||||
registry = Store(config)
|
|
||||||
backend = registry[str(store)]
|
|
||||||
candidate = getattr(backend, "_client", None)
|
|
||||||
if candidate is not None and hasattr(candidate, "_post"):
|
|
||||||
client = candidate
|
|
||||||
except Exception as exc:
|
|
||||||
if not local_deleted:
|
|
||||||
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
|
|
||||||
return False
|
|
||||||
if client is None:
|
|
||||||
if not local_deleted:
|
|
||||||
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
# No store context; use default Hydrus client.
|
|
||||||
try:
|
|
||||||
client = hydrus_wrapper.get_client(config)
|
|
||||||
except Exception as exc:
|
|
||||||
if not local_deleted:
|
|
||||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
|
||||||
return False
|
|
||||||
if client is None:
|
|
||||||
if not local_deleted:
|
|
||||||
log("Hydrus client unavailable", file=sys.stderr)
|
|
||||||
return False
|
|
||||||
|
|
||||||
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
|
||||||
if reason:
|
|
||||||
payload["reason"] = reason
|
|
||||||
try:
|
try:
|
||||||
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
if backend is not None:
|
||||||
|
deleter = getattr(backend, "delete_file", None)
|
||||||
|
if callable(deleter):
|
||||||
|
did_backend_delete = bool(deleter(hash_hex, reason=reason))
|
||||||
|
except Exception:
|
||||||
|
did_backend_delete = False
|
||||||
|
|
||||||
|
if did_backend_delete:
|
||||||
hydrus_deleted = True
|
hydrus_deleted = True
|
||||||
title_str = str(title_val).strip() if title_val else ""
|
title_str = str(title_val).strip() if title_val else ""
|
||||||
if title_str:
|
if title_str:
|
||||||
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
|
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
|
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
|
||||||
except Exception:
|
else:
|
||||||
# If it's not in Hydrus (e.g. 404 or similar), that's fine
|
# Fallback to direct client calls.
|
||||||
if not local_deleted:
|
client = None
|
||||||
return []
|
if store:
|
||||||
|
# Store specified: do not fall back to a global/default Hydrus client.
|
||||||
|
try:
|
||||||
|
registry = Store(config)
|
||||||
|
backend = registry[str(store)]
|
||||||
|
candidate = getattr(backend, "_client", None)
|
||||||
|
if candidate is not None and hasattr(candidate, "_post"):
|
||||||
|
client = candidate
|
||||||
|
except Exception as exc:
|
||||||
|
if not local_deleted:
|
||||||
|
log(f"Hydrus client unavailable for store '{store}': {exc}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
if client is None:
|
||||||
|
if not local_deleted:
|
||||||
|
log(f"Hydrus client unavailable for store '{store}'", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# No store context; use default Hydrus client.
|
||||||
|
try:
|
||||||
|
client = hydrus_wrapper.get_client(config)
|
||||||
|
except Exception as exc:
|
||||||
|
if not local_deleted:
|
||||||
|
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
if client is None:
|
||||||
|
if not local_deleted:
|
||||||
|
log("Hydrus client unavailable", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
payload: Dict[str, Any] = {"hashes": [hash_hex]}
|
||||||
|
if reason:
|
||||||
|
payload["reason"] = reason
|
||||||
|
try:
|
||||||
|
client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined]
|
||||||
|
# Best-effort clear deletion record if supported by this client.
|
||||||
|
try:
|
||||||
|
clearer = getattr(client, "clear_file_deletion_record", None)
|
||||||
|
if callable(clearer):
|
||||||
|
clearer([hash_hex])
|
||||||
|
else:
|
||||||
|
client._post("/add_files/clear_file_deletion_record", data={"hashes": [hash_hex]}) # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
hydrus_deleted = True
|
||||||
|
title_str = str(title_val).strip() if title_val else ""
|
||||||
|
if title_str:
|
||||||
|
debug(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
|
||||||
|
except Exception:
|
||||||
|
# If it's not in Hydrus (e.g. 404 or similar), that's fine
|
||||||
|
if not local_deleted:
|
||||||
|
return []
|
||||||
|
|
||||||
if hydrus_deleted and hash_hex:
|
if hydrus_deleted and hash_hex:
|
||||||
size_hint = None
|
size_hint = None
|
||||||
|
|||||||
267
cmdlet/download_data.py
Normal file
267
cmdlet/download_data.py
Normal file
@@ -0,0 +1,267 @@
|
|||||||
|
"""Smart downloader front-door.
|
||||||
|
|
||||||
|
Currently focused on Internet Archive item pages:
|
||||||
|
- Takes a piped InternetArchive search-provider row (table=internetarchive) or an archive.org details URL
|
||||||
|
- Displays a selectable table of available files/formats (PDF/ZIP/OCR/etc)
|
||||||
|
- Selecting a row via @N expands to download-file <direct-url>
|
||||||
|
|
||||||
|
This enables:
|
||||||
|
search-provider -provider internetarchive "..."
|
||||||
|
@3 # shows formats table
|
||||||
|
@2 | add-file ... # downloads selected file then pipes to add-file
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from typing import Any, Dict, List, Sequence, cast
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
from SYS.logger import log, debug
|
||||||
|
import pipeline as pipeline_context
|
||||||
|
from result_table import ResultTable
|
||||||
|
|
||||||
|
from . import _shared as sh
|
||||||
|
|
||||||
|
Cmdlet = sh.Cmdlet
|
||||||
|
SharedArgs = sh.SharedArgs
|
||||||
|
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||||
|
get_field = sh.get_field
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ia_identifier(text: str) -> str:
|
||||||
|
s = str(text or "").strip()
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# https://archive.org/details/<identifier>
|
||||||
|
m = re.search(r"archive\.org/(?:details|download)/([^/?#\s]+)", s, flags=re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
return str(m.group(1) or "").strip()
|
||||||
|
|
||||||
|
# internetarchive:<identifier>
|
||||||
|
if s.lower().startswith("internetarchive:"):
|
||||||
|
return s.split(":", 1)[-1].strip()
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
class Download_Data(Cmdlet):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__(
|
||||||
|
name="download-data",
|
||||||
|
summary="List downloadable files/formats for provider items (e.g., Internet Archive)",
|
||||||
|
usage="download-data <url> OR @N | download-data (provider item), then select a file with @N",
|
||||||
|
alias=[],
|
||||||
|
arg=[SharedArgs.URL],
|
||||||
|
detail=[
|
||||||
|
"For Internet Archive item pages, shows a selectable list of available files (PDF/ZIP/OCR/etc).",
|
||||||
|
"Select a file row with @N to run download-file on that direct URL.",
|
||||||
|
],
|
||||||
|
exec=self.run,
|
||||||
|
)
|
||||||
|
self.register()
|
||||||
|
|
||||||
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||||
|
try:
|
||||||
|
# parse_cmdlet_args typing varies across cmdlets; keep runtime behavior.
|
||||||
|
parsed = parse_cmdlet_args(args, cast(Any, self))
|
||||||
|
except Exception:
|
||||||
|
parsed = {}
|
||||||
|
|
||||||
|
raw_urls = parsed.get("url", [])
|
||||||
|
if isinstance(raw_urls, str):
|
||||||
|
raw_urls = [raw_urls]
|
||||||
|
url_arg = str(raw_urls[0]).strip() if raw_urls else ""
|
||||||
|
|
||||||
|
piped_items: List[Any] = []
|
||||||
|
if isinstance(result, list):
|
||||||
|
piped_items = list(result)
|
||||||
|
elif result is not None:
|
||||||
|
piped_items = [result]
|
||||||
|
|
||||||
|
# Prefer piped item target if present.
|
||||||
|
target = ""
|
||||||
|
if piped_items:
|
||||||
|
target = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "").strip()
|
||||||
|
if not target:
|
||||||
|
target = url_arg
|
||||||
|
|
||||||
|
table_name = ""
|
||||||
|
try:
|
||||||
|
table_name = str(get_field(piped_items[0], "table") or "").strip().lower() if piped_items else ""
|
||||||
|
except Exception:
|
||||||
|
table_name = ""
|
||||||
|
|
||||||
|
identifier = ""
|
||||||
|
if piped_items:
|
||||||
|
md = get_field(piped_items[0], "full_metadata")
|
||||||
|
if isinstance(md, dict):
|
||||||
|
identifier = str(md.get("identifier") or "").strip()
|
||||||
|
if not identifier:
|
||||||
|
identifier = _extract_ia_identifier(target)
|
||||||
|
|
||||||
|
if table_name == "internetarchive" or ("archive.org" in target.lower() and identifier):
|
||||||
|
return self._run_internetarchive(piped_items[0] if piped_items else None, identifier=identifier)
|
||||||
|
|
||||||
|
log("download-data: unsupported target (currently only Internet Archive item pages are supported)", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _run_internetarchive(item: Any, *, identifier: str) -> int:
|
||||||
|
try:
|
||||||
|
from Provider.internetarchive import _ia as _ia_loader
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"download-data: Internet Archive provider unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _is_ia_metadata_file(f: Dict[str, Any]) -> bool:
|
||||||
|
try:
|
||||||
|
source = str(f.get("source") or "").strip().lower()
|
||||||
|
fmt = str(f.get("format") or "").strip().lower()
|
||||||
|
except Exception:
|
||||||
|
source = ""
|
||||||
|
fmt = ""
|
||||||
|
|
||||||
|
if source == "metadata":
|
||||||
|
return True
|
||||||
|
if fmt in {"metadata", "archive bittorrent"}:
|
||||||
|
return True
|
||||||
|
if fmt.startswith("thumbnail"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
ia = None
|
||||||
|
try:
|
||||||
|
ia = _ia_loader()
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"download-data: Internet Archive module unavailable: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
get_item = getattr(ia, "get_item", None)
|
||||||
|
if not callable(get_item):
|
||||||
|
raise Exception("internetarchive.get_item is not available")
|
||||||
|
ia_item = cast(Any, get_item(str(identifier)))
|
||||||
|
except Exception as exc:
|
||||||
|
log(f"download-data: Internet Archive item lookup failed: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
files: List[Dict[str, Any]] = []
|
||||||
|
try:
|
||||||
|
raw_files = getattr(ia_item, "files", None)
|
||||||
|
if isinstance(raw_files, list):
|
||||||
|
for f in raw_files:
|
||||||
|
if isinstance(f, dict):
|
||||||
|
files.append(f)
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
try:
|
||||||
|
for f in ia_item.get_files():
|
||||||
|
name = getattr(f, "name", None)
|
||||||
|
if not name and isinstance(f, dict):
|
||||||
|
name = f.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
files.append(
|
||||||
|
{
|
||||||
|
"name": str(name),
|
||||||
|
"size": getattr(f, "size", None),
|
||||||
|
"format": getattr(f, "format", None),
|
||||||
|
"source": getattr(f, "source", None),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
log("download-data: Internet Archive item has no files", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Prefer non-metadata files for the picker.
|
||||||
|
candidates = [f for f in files if not _is_ia_metadata_file(f)]
|
||||||
|
if not candidates:
|
||||||
|
candidates = list(files)
|
||||||
|
|
||||||
|
def _key(f: Dict[str, Any]) -> tuple[str, str]:
|
||||||
|
fmt = str(f.get("format") or "").strip().lower()
|
||||||
|
name = str(f.get("name") or "").strip().lower()
|
||||||
|
return (fmt, name)
|
||||||
|
|
||||||
|
candidates.sort(key=_key)
|
||||||
|
|
||||||
|
title = ""
|
||||||
|
try:
|
||||||
|
title = str(get_field(item, "title") or "").strip()
|
||||||
|
except Exception:
|
||||||
|
title = ""
|
||||||
|
|
||||||
|
table_title = f"Internet Archive: {title}".strip().rstrip(":")
|
||||||
|
if not title:
|
||||||
|
table_title = f"Internet Archive: {identifier}".strip().rstrip(":")
|
||||||
|
|
||||||
|
table = ResultTable(table_title).set_preserve_order(True)
|
||||||
|
table.set_table("internetarchive.formats")
|
||||||
|
# Selecting a row should expand to `download-file <direct-url>`.
|
||||||
|
table.set_source_command("download-file", [])
|
||||||
|
|
||||||
|
rows: List[Dict[str, Any]] = []
|
||||||
|
for f in candidates:
|
||||||
|
name = str(f.get("name") or "").strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
fmt = str(f.get("format") or "").strip()
|
||||||
|
src = str(f.get("source") or "").strip()
|
||||||
|
|
||||||
|
size_val: Any = f.get("size")
|
||||||
|
try:
|
||||||
|
size_val = int(size_val) if size_val not in (None, "") else ""
|
||||||
|
except Exception:
|
||||||
|
# Keep as-is; ResultTable will stringify.
|
||||||
|
pass
|
||||||
|
|
||||||
|
direct_url = f"https://archive.org/download/{identifier}/{quote(name, safe='')}"
|
||||||
|
|
||||||
|
row_item: Dict[str, Any] = {
|
||||||
|
"table": "internetarchive",
|
||||||
|
"title": fmt or name,
|
||||||
|
"path": direct_url,
|
||||||
|
"url": direct_url,
|
||||||
|
"columns": [
|
||||||
|
("Format", fmt),
|
||||||
|
("Name", name),
|
||||||
|
("Size", size_val),
|
||||||
|
("Source", src),
|
||||||
|
],
|
||||||
|
# Used by @N expansion: download-file <direct-url>
|
||||||
|
"_selection_args": [direct_url],
|
||||||
|
"full_metadata": {
|
||||||
|
"identifier": identifier,
|
||||||
|
"name": name,
|
||||||
|
"format": fmt,
|
||||||
|
"source": src,
|
||||||
|
"size": f.get("size"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
rows.append(row_item)
|
||||||
|
table.add_result(row_item)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
log("download-data: no downloadable files found for this item", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
pipeline_context.set_last_result_table(table, rows, subject=item)
|
||||||
|
pipeline_context.set_current_stage_table(table)
|
||||||
|
except Exception as exc:
|
||||||
|
debug(f"[download-data] Failed to register result table: {exc}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
CMDLET = Download_Data()
|
||||||
@@ -16,6 +16,7 @@ from . import _shared as sh
|
|||||||
|
|
||||||
Cmdlet = sh.Cmdlet
|
Cmdlet = sh.Cmdlet
|
||||||
CmdletArg = sh.CmdletArg
|
CmdletArg = sh.CmdletArg
|
||||||
|
SharedArgs = sh.SharedArgs
|
||||||
create_pipe_object_result = sh.create_pipe_object_result
|
create_pipe_object_result = sh.create_pipe_object_result
|
||||||
get_field = sh.get_field
|
get_field = sh.get_field
|
||||||
get_pipe_object_hash = sh.get_pipe_object_hash
|
get_pipe_object_hash = sh.get_pipe_object_hash
|
||||||
@@ -37,7 +38,6 @@ except ImportError:
|
|||||||
try:
|
try:
|
||||||
from metadata import (
|
from metadata import (
|
||||||
read_tags_from_file,
|
read_tags_from_file,
|
||||||
dedup_tags_by_namespace,
|
|
||||||
merge_multiple_tag_lists,
|
merge_multiple_tag_lists,
|
||||||
)
|
)
|
||||||
HAS_METADATA_API = True
|
HAS_METADATA_API = True
|
||||||
@@ -87,7 +87,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|||||||
delete_after = parsed.get("delete", False)
|
delete_after = parsed.get("delete", False)
|
||||||
|
|
||||||
output_override: Optional[Path] = None
|
output_override: Optional[Path] = None
|
||||||
output_arg = parsed.get("output")
|
output_arg = parsed.get("path")
|
||||||
if output_arg:
|
if output_arg:
|
||||||
try:
|
try:
|
||||||
output_override = Path(str(output_arg)).expanduser()
|
output_override = Path(str(output_arg)).expanduser()
|
||||||
@@ -928,10 +928,10 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
|
|||||||
CMDLET = Cmdlet(
|
CMDLET = Cmdlet(
|
||||||
name="merge-file",
|
name="merge-file",
|
||||||
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
usage="merge-file [-delete] [-path <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||||
arg=[
|
arg=[
|
||||||
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||||
CmdletArg("-output", description="Override output file path."),
|
SharedArgs.PATH,
|
||||||
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||||
],
|
],
|
||||||
detail=[
|
detail=[
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ dependencies = [
|
|||||||
"pypdf>=3.0.0",
|
"pypdf>=3.0.0",
|
||||||
"mutagen>=1.46.0",
|
"mutagen>=1.46.0",
|
||||||
"cbor2>=4.0",
|
"cbor2>=4.0",
|
||||||
|
"zstandard>=0.23.0",
|
||||||
|
|
||||||
# Image and media support
|
# Image and media support
|
||||||
"Pillow>=10.0.0",
|
"Pillow>=10.0.0",
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ internetarchive>=4.1.0
|
|||||||
pypdf>=3.0.0
|
pypdf>=3.0.0
|
||||||
mutagen>=1.46.0
|
mutagen>=1.46.0
|
||||||
cbor2>=4.0
|
cbor2>=4.0
|
||||||
|
zstandard>=0.23.0
|
||||||
|
|
||||||
# Image and media support
|
# Image and media support
|
||||||
Pillow>=10.0.0
|
Pillow>=10.0.0
|
||||||
|
|||||||
Reference in New Issue
Block a user