cmdlet refactor

This commit is contained in:
2026-05-04 18:41:01 -07:00
parent 3ce339b3c1
commit 24f983473f
44 changed files with 1320 additions and 309 deletions
+3
View File
@@ -0,0 +1,3 @@
"""File action cmdlets package."""
__all__ = []
+3167
View File
File diff suppressed because it is too large Load Diff
+370
View File
@@ -0,0 +1,370 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
import sys
import re
from SYS.logger import log
from SYS import pipeline as ctx
from .. import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
QueryArg = sh.QueryArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
should_show_help = sh.should_show_help
class Add_Note(Cmdlet):
DEFAULT_QUERY_HINTS = (
"title:",
"text:",
"hash:",
"caption:",
"sub:",
"subtitle:",
)
def __init__(self) -> None:
super().__init__(
name="add-note",
summary="Add file store note",
usage=
'add-note (-query "title:<title>,text:<text>[,instance:<instance>][,hash:<sha256>]") [ -instance <store> | <piped> ]',
alias=[""],
arg=[
SharedArgs.INSTANCE,
QueryArg(
"hash",
key="hash",
aliases=["sha256"],
type="string",
required=False,
handler=normalize_hash,
description=
"(Optional) Specific file hash target, provided via -query as hash:<sha256>. When omitted, uses piped item hash.",
query_only=True,
),
SharedArgs.QUERY,
],
detail=["""
dde
"""],
exec=self.run,
)
# Populate dynamic store choices for autocomplete
try:
SharedArgs.INSTANCE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
@staticmethod
def _commas_to_spaces_outside_quotes(text: str) -> str:
buf: List[str] = []
quote: Optional[str] = None
escaped = False
for ch in str(text or ""):
if escaped:
buf.append(ch)
escaped = False
continue
if ch == "\\" and quote is not None:
buf.append(ch)
escaped = True
continue
if ch in ('"', "'"):
if quote is None:
quote = ch
elif quote == ch:
quote = None
buf.append(ch)
continue
if ch == "," and quote is None:
buf.append(" ")
continue
buf.append(ch)
return "".join(buf)
@staticmethod
def _parse_note_query(query: str) -> Tuple[Optional[str], Optional[str]]:
"""Parse note payload from -query.
Expected:
title:<title>,text:<text>
Commas are treated as separators when not inside quotes.
"""
raw = str(query or "").strip()
if not raw:
return None, None
try:
from SYS.cli_syntax import parse_query, get_field
except Exception:
parse_query = None # type: ignore
get_field = None # type: ignore
normalized = Add_Note._commas_to_spaces_outside_quotes(raw)
if callable(parse_query) and callable(get_field):
parsed = parse_query(normalized)
name = get_field(parsed, "title")
text = get_field(parsed, "text")
name_s = str(name or "").strip() if name is not None else ""
text_s = str(text or "").strip() if text is not None else ""
return (name_s or None, text_s or None)
# Fallback: best-effort regex.
name_match = re.search(
r"\btitle\s*:\s*([^,\s]+)",
normalized,
flags=re.IGNORECASE
)
text_match = re.search(r"\btext\s*:\s*(.+)$", normalized, flags=re.IGNORECASE)
note_name = name_match.group(1).strip() if name_match else ""
note_text = text_match.group(1).strip() if text_match else ""
return (note_name or None, note_text or None)
@classmethod
def _looks_like_note_query_token(cls, token: Any) -> bool:
text = str(token or "").strip().lower()
if not text:
return False
return any(hint in text for hint in cls.DEFAULT_QUERY_HINTS)
@classmethod
def _default_query_args(cls, args: Sequence[str]) -> List[str]:
tokens: List[str] = list(args or [])
lower_tokens = {str(tok).lower() for tok in tokens if tok is not None}
if "-query" in lower_tokens or "--query" in lower_tokens:
return tokens
for idx, tok in enumerate(tokens):
token_text = str(tok or "")
if not token_text or token_text.startswith("-"):
continue
if not cls._looks_like_note_query_token(token_text):
continue
combined_parts = [token_text]
end = idx + 1
while end < len(tokens):
next_text = str(tokens[end] or "")
if not next_text or next_text.startswith("-"):
break
if not cls._looks_like_note_query_token(next_text):
break
combined_parts.append(next_text)
end += 1
combined_query = " ".join(combined_parts)
tokens[idx:end] = [combined_query]
tokens.insert(idx, "-query")
return tokens
return tokens
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed_args = self._default_query_args(args)
parsed = parse_cmdlet_args(parsed_args, self)
store_override = parsed.get("instance")
hash_override = normalize_hash(parsed.get("hash"))
note_name, note_text = self._parse_note_query(str(parsed.get("query") or ""))
note_name = str(note_name or "").strip()
note_text = str(note_text or "").strip()
if not note_name or not note_text:
pass # We now support implicit pipeline notes if -query is missing
# But if explicit targeting (store+hash) is used, we still demand args below.
if hash_override and not store_override:
log(
"[add_note] Error: hash:<sha256> requires instance:<instance> in -query or -instance <store>",
file=sys.stderr,
)
return 1
explicit_target = bool(hash_override and store_override)
results = normalize_result_input(result)
if explicit_target and (not note_name or not note_text):
log(
"[add_note] Error: Explicit target (store+hash) requires -query with title/text",
file=sys.stderr,
)
return 1
if results and explicit_target:
# Direct targeting mode: apply note once to the explicit target and
# pass through any piped items unchanged.
try:
backend, _store_registry, exc = sh.get_store_backend(
config,
str(store_override),
)
if backend is None:
raise exc or KeyError(store_override)
if not bool(getattr(backend, "supports_note_association", False)):
log(
f"[add_note] Error: Store '{store_override}' does not support notes",
file=sys.stderr,
)
return 1
ok = bool(
backend.set_note(
str(hash_override),
note_name,
note_text,
config=config
)
)
if ok:
ctx.print_if_visible(
f"✓ add-note: 1 item in '{store_override}'",
file=sys.stderr
)
log(
"[add_note] Updated 1/1 item(s)",
file=sys.stderr
)
for res in results:
ctx.emit(res)
return 0
log(
"[add_note] Warning: Note write reported failure",
file=sys.stderr
)
return 1
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
return 1
if not results:
if explicit_target:
# Allow standalone use (no piped input) and enable piping the target forward.
results = [{
"store": str(store_override),
"hash": hash_override
}]
else:
log(
'[add_note] Error: Requires piped item(s) from add-file, or explicit targeting via store/hash (e.g., -query "instance:<instance> hash:<sha256> ...")',
file=sys.stderr,
)
return 1
store_registry = None
planned_ops = 0
# Batch write plan: store -> [(hash, name, text), ...]
note_ops: Dict[str,
List[Tuple[str,
str,
str]]] = {}
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
# Determine notes to write for this item
notes_to_write: List[Tuple[str, str]] = []
# 1. Explicit arguments always take precedence
if note_name and note_text:
notes_to_write.append((note_name, note_text))
# 2. Pipeline notes auto-ingestion
# Look for 'notes' dictionary in the item (propagated by pipeline/download-file)
# Structure: {'notes': {'lyric': '...', 'sub': '...'}}
# Check both root and nested 'extra'
# Check root 'notes' (dict or extra.notes)
pipeline_notes = res.get("notes")
if not isinstance(pipeline_notes, dict):
extra = res.get("extra")
if isinstance(extra, dict):
pipeline_notes = extra.get("notes")
if isinstance(pipeline_notes, dict):
for k, v in pipeline_notes.items():
# If arg-provided note conflicts effectively with pipeline note?
# We just append both.
if v and str(v).strip():
notes_to_write.append((str(k), str(v)))
if not notes_to_write:
# Pass through items that have nothing to add
ctx.emit(res)
continue
store_name, resolved_hash = sh.resolve_item_store_hash(
res,
override_store=str(store_override) if store_override else None,
override_hash=str(hash_override) if hash_override else None,
path_fields=("path",),
)
if not store_name:
log(
"[add_note] Error: Missing -instance and item has no store field",
file=sys.stderr
)
continue
if not resolved_hash:
log(
"[add_note] Warning: Item missing usable hash; skipping",
file=sys.stderr
)
ctx.emit(res)
continue
# Queue operations
if store_name not in note_ops:
note_ops[store_name] = []
for (n_name, n_text) in notes_to_write:
note_ops[store_name].append((resolved_hash, n_name, n_text))
planned_ops += 1
ctx.emit(res)
# Execute batch operations
def _on_store_error(store_name: str, exc: Exception) -> None:
log(f"[add_note] Store access failed '{store_name}': {exc}", file=sys.stderr)
def _on_unsupported_store(store_name: str) -> None:
log(f"[add_note] Store '{store_name}' does not support notes", file=sys.stderr)
def _on_item_error(store_name: str, hash_value: str, note_name_value: str, exc: Exception) -> None:
log(f"[add_note] Write failed {store_name}:{hash_value} ({note_name_value}): {exc}", file=sys.stderr)
store_registry, success_count = sh.run_store_note_batches(
config,
note_ops,
store_registry=store_registry,
on_store_error=_on_store_error,
on_unsupported_store=_on_unsupported_store,
on_item_error=_on_item_error,
)
if planned_ops > 0:
msg = f"✓ add-note: Updated {success_count}/{planned_ops} notes across {len(note_ops)} stores"
ctx.print_if_visible(msg, file=sys.stderr)
return 0
CMDLET = Add_Note()
File diff suppressed because it is too large Load Diff
+204
View File
@@ -0,0 +1,204 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Tuple
import sys
from SYS import pipeline as ctx
from .. import _shared as sh
from SYS.logger import log
from Store import Store
class Add_Url(sh.Cmdlet):
"""Add URL associations to files via hash+store."""
def __init__(self) -> None:
super().__init__(
name="add-url",
summary="Associate a URL with a file",
usage="@1 | add-url <url>",
arg=[
sh.SharedArgs.QUERY,
sh.SharedArgs.INSTANCE,
sh.CmdletArg("url",
required=True,
description="URL to associate"),
],
detail=[
"- Associates URL with file identified by hash+store",
"- Multiple url can be comma-separated",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
# Compatibility/piping fix:
# `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
# (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
# If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
try:
if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
q = str(parsed.get("query") or "").strip()
if q.startswith(("http://", "https://")):
parsed["url"] = q
parsed.pop("query", None)
except Exception:
pass
query_hash, query_valid = sh.require_single_hash_query(
parsed.get("query"),
"Error: -query must be of the form hash:<sha256>",
)
if not query_valid:
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = (
result if isinstance(result,
list) else ([result] if result is not None else [])
)
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
return 1
# Extract hash and store from result or args
file_hash = query_hash or (
sh.get_field(result,
"hash") if result is not None else None
)
store_name = parsed.get("instance") or (
sh.get_field(result,
"store") if result is not None else None
)
url_arg = parsed.get("url")
if not url_arg:
try:
inferred = sh.extract_url_from_result(result)
if inferred:
candidate = inferred[0]
if isinstance(candidate, str) and candidate.strip():
url_arg = candidate.strip()
parsed["url"] = url_arg
except Exception:
pass
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log(
'Error: No file hash provided (pipe an item or use -query "hash:<sha256>")'
)
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(",") if u.strip()]
if not urls:
log("Error: No valid url provided")
return 1
# Get backend and add url
try:
storage = Store(config)
# Build batches per store.
store_override = parsed.get("instance")
if results:
def _warn(message: str) -> None:
ctx.print_if_visible(f"[add-url] Warning: {message}", file=sys.stderr)
batch, pass_through = sh.collect_store_hash_value_batch(
results,
store_registry=storage,
value_resolver=lambda _item: list(urls),
override_hash=query_hash,
override_store=store_override,
on_warning=_warn,
)
supported_batch: Dict[str, List[Tuple[str, Sequence[str]]]] = {}
for store_text, store_pairs in batch.items():
backend, storage, _exc = sh.get_store_backend(
config,
store_text,
store_registry=storage,
)
if backend is None:
_warn(f"Store '{store_text}' not configured; skipping")
continue
if not bool(getattr(backend, "supports_url_association", False)):
_warn(f"Store '{store_text}' does not support URLs; skipping")
continue
supported_batch[store_text] = store_pairs
# Execute per-instance batches.
storage, batch_stats = sh.run_store_hash_value_batches(
config,
supported_batch,
bulk_method_name="add_url_bulk",
single_method_name="add_url",
store_registry=storage,
)
for store_text, item_count, _value_count in batch_stats:
ctx.print_if_visible(
f"✓ add-url: {len(urls)} url(s) for {item_count} item(s) in '{store_text}'",
file=sys.stderr,
)
# Pass items through unchanged (but update url field for convenience).
for item in pass_through:
existing = sh.get_field(item, "url")
merged = sh.merge_urls(existing, list(urls))
sh.set_item_urls(item, merged)
ctx.emit(item)
return 0
# Single-item mode
backend, storage, exc = sh.get_store_backend(
config,
str(store_name),
store_registry=storage,
)
if backend is None:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
if not bool(getattr(backend, "supports_url_association", False)):
log(f"Error: Store '{store_name}' does not support URL associations")
return 1
backend.add_url(str(file_hash), urls, config=config)
ctx.print_if_visible(
f"✓ add-url: {len(urls)} url(s) added",
file=sys.stderr
)
if result is not None:
existing = sh.get_field(result, "url")
merged = sh.merge_urls(existing, list(urls))
sh.set_item_urls(result, merged)
ctx.emit(result)
return 0
except Exception as exc:
log(f"Error adding URL: {exc}", file=sys.stderr)
return 1
CMDLET = Add_Url()
+394
View File
@@ -0,0 +1,394 @@
"""Create a single .tar.zst archive from piped file selections."""
from __future__ import annotations
import re
import sys
import tarfile
import tempfile
import time
import uuid
from pathlib import Path
from typing import Any, Dict, List, Sequence, Set
from urllib.parse import parse_qs, urlparse
from SYS.logger import log
from ProviderCore.registry import get_plugin
from SYS.item_accessors import get_http_url, get_sha256_hex, get_store_name
from SYS.utils import extract_hydrus_hash_from_url
from SYS import pipeline as ctx
from SYS.config import resolve_output_dir
from .. import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
coerce_to_pipe_object = sh.coerce_to_pipe_object
create_pipe_object_result = sh.create_pipe_object_result
parse_cmdlet_args = sh.parse_cmdlet_args
should_show_help = sh.should_show_help
def _extract_sha256_hex(item: Any) -> str:
return get_sha256_hex(item, "hash") or ""
def _extract_store_name(item: Any) -> str:
return get_store_name(item, "store") or ""
def _extract_url(item: Any) -> str:
return get_http_url(item, "url", "target") or ""
def _extract_hash_from_hydrus_file_url(url: str) -> str:
"""Extract hash from Hydrus URL using centralized utility."""
return extract_hydrus_hash_from_url(url) or ""
def _hydrus_instance_names(config: Dict[str, Any]) -> Set[str]:
instances: Set[str] = set()
try:
store_cfg = config.get("store") if isinstance(config, dict) else None
if isinstance(store_cfg, dict):
hydrus_cfg = store_cfg.get("hydrusnetwork")
if isinstance(hydrus_cfg, dict):
instances = {
str(k).strip().lower()
for k in hydrus_cfg.keys() if str(k).strip()
}
except Exception:
instances = set()
return instances
def _maybe_download_hydrus_item(
item: Any,
config: Dict[str,
Any],
output_dir: Path
) -> Path | None:
"""Download a Hydrus-backed item to a local temp path (best-effort).
This is intentionally side-effect free except for writing the local temp file.
"""
hydrus_provider = get_plugin("hydrusnetwork", config)
if hydrus_provider is None:
return None
store_name = _extract_store_name(item)
store_lower = store_name.lower()
hydrus_instances = _hydrus_instance_names(config)
store_hint = store_lower in {"hydrus",
"hydrusnetwork"} or (store_lower in hydrus_instances)
url = _extract_url(item)
file_hash = _extract_sha256_hex(item) or (
_extract_hash_from_hydrus_file_url(url) if url else ""
)
if not file_hash:
return None
# Only treat it as Hydrus when we have an explicit Hydrus file URL OR the store suggests it.
is_hydrus_url = False
if url:
try:
parsed = urlparse(url)
is_hydrus_url = (parsed.path or "").endswith(
"/get_files/file"
) and _extract_hash_from_hydrus_file_url(url) == file_hash
except Exception:
is_hydrus_url = False
if not (is_hydrus_url or store_hint):
return None
preferred_store = store_name or None
if url and is_hydrus_url:
return hydrus_provider.download_url(url, output_dir)
return hydrus_provider.download_hash_to_temp(file_hash, store_name=preferred_store, temp_root=output_dir)
def _resolve_existing_or_fetch_path(item: Any,
config: Dict[str,
Any]) -> tuple[Path | None,
Path | None]:
"""Return (path, temp_path) where temp_path is non-None only for files we downloaded."""
# 1) Direct local path
try:
po = coerce_to_pipe_object(item, None)
raw_path = (
getattr(po,
"path",
None) or getattr(po,
"target",
None) or sh.get_pipe_object_path(item)
)
if raw_path:
p = Path(str(raw_path)).expanduser()
if p.exists():
return p, None
except Exception:
pass
# 2) Store-backed path
file_hash = _extract_sha256_hex(item)
store_name = _extract_store_name(item)
if file_hash and store_name:
try:
backend, _store_registry, _exc = sh.get_store_backend(config, store_name)
if backend is None:
return None, None
src = backend.get_file(file_hash)
if isinstance(src, Path):
if src.exists():
return src, None
elif isinstance(src, str) and src.strip():
cand = Path(src).expanduser()
if cand.exists():
return cand, None
# If the backend returns a URL (HydrusNetwork), download it.
if src.strip().lower().startswith(("http://", "https://")):
tmp_base = None
try:
tmp_base = config.get("temp"
) if isinstance(config,
dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser() if tmp_base else
(Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(
{
"hash": file_hash,
"store": store_name,
"url": src.strip()
},
config,
out_dir,
)
if downloaded is not None:
return downloaded, downloaded
except Exception:
pass
# 3) Hydrus-backed items without backend.get_file path.
try:
tmp_base = config.get("temp") if isinstance(config, dict) else None
except Exception:
tmp_base = None
out_dir = (
Path(str(tmp_base)).expanduser() if tmp_base else
(Path(tempfile.gettempdir()) / "Medios-Macina")
)
out_dir = out_dir / "archive" / "hydrus"
downloaded = _maybe_download_hydrus_item(item, config, out_dir)
if downloaded is not None:
return downloaded, downloaded
return None, None
def _unique_arcname(name: str, seen: Set[str]) -> str:
base = str(name or "").replace("\\", "/")
base = base.lstrip("/")
if not base:
base = "file"
if base not in seen:
seen.add(base)
return base
stem = base
suffix = ""
if "/" not in base:
p = Path(base)
stem = p.stem
suffix = p.suffix
n = 2
while True:
candidate = f"{stem} ({n}){suffix}" if stem else f"file ({n}){suffix}"
if candidate not in seen:
seen.add(candidate)
return candidate
n += 1
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
parsed = parse_cmdlet_args(args, CMDLET)
level_raw = parsed.get("level")
try:
level = int(level_raw) if level_raw is not None else 11
except Exception:
level = 11
if level < 1:
level = 1
if level > 22:
level = 22
# Output destination is controlled by the shared -path behavior in the pipeline runner.
# This cmdlet always creates the archive in the configured output directory and emits it.
# Collect piped items; archive-file is a batch command (single output).
items: List[Any] = sh.normalize_result_items(
result,
include_falsey_single=True,
)
if not items:
log("No piped items provided to archive-file", file=sys.stderr)
return 1
temp_downloads: List[Path] = []
try:
paths: List[Path] = []
for it in items:
p, tmp = _resolve_existing_or_fetch_path(it, config)
if p is None:
continue
paths.append(p)
if tmp is not None:
temp_downloads.append(tmp)
# Keep stable order, remove duplicates.
uniq: List[Path] = []
seen_paths: Set[str] = set()
for p in paths:
key = str(p.resolve()) if p.exists() else str(p)
if key in seen_paths:
continue
seen_paths.add(key)
uniq.append(p)
paths = uniq
if not paths:
log("No existing file paths found in piped items", file=sys.stderr)
return 1
out_dir = resolve_output_dir(config)
try:
out_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
stamp = time.strftime("%Y%m%d_%H%M%S")
out_path = out_dir / f"archive_{stamp}.tar.zst"
try:
out_path = sh._unique_destination_path(
out_path
) # type: ignore[attr-defined]
except Exception:
pass
try:
out_path.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(
f"Failed to create output directory: {out_path.parent} ({exc})",
file=sys.stderr
)
return 1
# Import zstandard lazily so the rest of the CLI still runs without it.
try:
import zstandard as zstd # type: ignore
except Exception:
log(
"Missing dependency: zstandard (pip install zstandard)",
file=sys.stderr
)
return 1
# Write tar stream into zstd stream.
try:
with open(out_path, "wb") as out_handle:
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_writer(out_handle) as compressor:
with tarfile.open(fileobj=compressor,
mode="w|",
format=tarfile.PAX_FORMAT) as tf:
seen_names: Set[str] = set()
for p in paths:
arcname = _unique_arcname(p.name, seen_names)
# For directories, tarfile will include contents when recursive=True.
try:
tf.add(str(p), arcname=arcname, recursive=True)
except Exception as exc:
log(
f"Failed to add to archive: {p} ({exc})",
file=sys.stderr
)
except Exception as exc:
log(f"Archive creation failed: {exc}", file=sys.stderr)
return 1
# Emit a single artifact downstream.
hash_value = None
try:
from SYS.utils import sha256_file
hash_value = sha256_file(out_path)
except Exception:
hash_value = None
pipe_obj = create_pipe_object_result(
source="archive",
identifier=out_path.stem,
file_path=str(out_path),
cmdlet_name="archive-file",
title=out_path.name,
hash_value=hash_value,
is_temp=True,
store="PATH",
extra={
"target": str(out_path),
"archive_format": "tar.zst",
"compression": "zstd",
"level": level,
"source_count": len(paths),
"source_paths": [str(p) for p in paths],
},
)
ctx.emit(pipe_obj)
return 0
finally:
# Best-effort cleanup of any temp Hydrus downloads we created.
for tmp in temp_downloads:
try:
tmp.unlink(missing_ok=True) # type: ignore[arg-type]
except TypeError:
try:
if tmp.exists():
tmp.unlink()
except Exception:
pass
except Exception:
pass
CMDLET = Cmdlet(
name="archive-file",
summary="Archive piped files into a single .tar.zst.",
usage="@N | archive-file [-level <1-22>] [-path <path>]",
arg=[
CmdletArg(
"-level",
type="integer",
description="Zstandard compression level (default: 11)."
),
SharedArgs.PATH,
],
detail=[
"- Example: @1-5 | archive-file",
"- Default zstd level is 11.",
"- Emits one output item (the archive) for downstream piping.",
],
)
CMDLET.exec = _run
CMDLET.register()
+306
View File
@@ -0,0 +1,306 @@
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
from pathlib import Path
import sys
import shutil
import subprocess
from SYS.logger import log, debug
from SYS.payload_builders import build_file_result_payload
from SYS.utils import sha256_file
from .. import _shared as sh
from SYS import pipeline as ctx
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
QueryArg = sh.QueryArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_title_from_result = sh.extract_title_from_result
VIDEO_EXTS = {
"mp4",
"mkv",
"webm",
"mov",
"avi",
"flv",
"mpeg",
"mpg",
"m4v",
}
AUDIO_EXTS = {
"mp3",
"m4a",
"m4b",
"aac",
"flac",
"wav",
"ogg",
"opus",
"mka",
}
IMAGE_EXTS = {
"png",
"jpg",
"jpeg",
"webp",
"bmp",
"tif",
"tiff",
"gif",
}
DOC_EXTS = {
"pdf",
"mobi",
"epub",
"azw3",
"txt",
"rtf",
"html",
"htm",
"md",
"doc",
"docx",
}
def _detect_kind(ext: str) -> str:
e = ext.lower().lstrip(".")
if e in VIDEO_EXTS:
return "video"
if e in AUDIO_EXTS:
return "audio"
if e in IMAGE_EXTS:
return "image"
if e in DOC_EXTS:
return "doc"
return "unknown"
def _allowed(source_kind: str, target_kind: str, target_ext: str = "") -> bool:
if source_kind == target_kind:
return True
if source_kind == "video" and target_kind == "audio":
return True
if source_kind == "video" and target_kind == "image" and target_ext.lower().lstrip(".") == "gif":
return True
return False
def _ffmpeg_convert(
input_path: Path,
output_path: Path,
target_kind: str,
copy_metadata: bool,
) -> bool:
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
cmd = [ffmpeg_path, "-y", "-i", str(input_path)]
if target_kind == "audio":
cmd.extend(["-vn"])
if copy_metadata:
cmd.extend(["-map_metadata", "0"])
cmd.append(str(output_path))
debug(f"[convert-file] Running ffmpeg: {' '.join(cmd)}")
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
log(f"ffmpeg error: {proc.stderr}", file=sys.stderr)
return False
return True
def _doc_convert(input_path: Path, output_path: Path) -> bool:
try:
import pypandoc # type: ignore
except Exception:
log("pypandoc is required for document conversion; install pypandoc-binary", file=sys.stderr)
return False
target_fmt = output_path.suffix.lstrip(".").lower() or "pdf"
extra_args = []
if target_fmt == "pdf":
tectonic_path = shutil.which("tectonic")
if not tectonic_path:
log(
"tectonic is required for PDF output; install with `pip install tectonic`",
file=sys.stderr,
)
return False
extra_args = ["--pdf-engine=tectonic"]
try:
pypandoc.convert_file(
str(input_path),
to=target_fmt,
outputfile=str(output_path),
extra_args=extra_args,
)
except OSError as exc:
log(f"pandoc is missing or failed to run: {exc}", file=sys.stderr)
return False
except Exception as exc:
log(f"pypandoc conversion failed: {exc}", file=sys.stderr)
return False
if not output_path.exists():
log("pypandoc conversion did not produce an output file", file=sys.stderr)
return False
return True
CMDLET = Cmdlet(
name="convert-file",
summary="Convert files between media/container formats (video, audio, image, documents).",
usage="convert-file -to <format> [-path <file|dir>] [-delete] [-query format:<fmt>]",
arg=[
QueryArg("to", key="format", query_only=False, required=True,
description="Target format/extension (e.g., mp4, mp3, wav, jpg, pdf)."),
SharedArgs.PATH,
SharedArgs.QUERY,
SharedArgs.DELETE,
],
detail=[
"Allows video↔video, audio↔audio, image↔image, doc↔doc, video→audio, and video→gif conversions.",
"Disallows incompatible conversions (e.g., video→pdf).",
"Uses ffmpeg for media and pypandoc-binary (bundled pandoc) for document formats (mobi/epub→pdf/txt/etc); PDF output uses the tectonic LaTeX engine when available.",
],
)
def _resolve_output_path(input_path: Path, outdir: Optional[Path], target_ext: str) -> Path:
base = input_path.stem
directory = outdir if outdir is not None else input_path.parent
directory.mkdir(parents=True, exist_ok=True)
candidate = directory / f"{base}.{target_ext}"
if candidate.exists():
for i in range(1, 1000):
alt = directory / f"{base}_{i}.{target_ext}"
if not alt.exists():
candidate = alt
break
return candidate
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parsed = parse_cmdlet_args(args, CMDLET)
target_fmt_raw = parsed.get("to") or parsed.get("format")
if not target_fmt_raw:
log("-to <format> is required", file=sys.stderr)
return 1
target_fmt = str(target_fmt_raw).lower().lstrip(".")
target_kind = _detect_kind(target_fmt)
if target_kind == "unknown":
log(f"Unsupported target format: {target_fmt}", file=sys.stderr)
return 1
delete_src = bool(parsed.get("delete", False))
inputs = normalize_result_input(result)
path_arg = parsed.get("path")
outdir_override: Optional[Path] = None
if path_arg:
try:
p = Path(str(path_arg)).expanduser()
if p.exists() and p.is_dir():
outdir_override = p
else:
inputs.append({"path": p})
except Exception:
inputs.append({"path": path_arg})
if not inputs:
log("No input provided to convert-file", file=sys.stderr)
return 1
success = 0
for item in inputs:
input_path: Optional[Path] = None
if isinstance(item, dict):
p = item.get("path") or item.get("target")
elif hasattr(item, "path"):
p = getattr(item, "path")
else:
p = item
try:
input_path = Path(str(p)) if p else None
except Exception:
input_path = None
if not input_path or not input_path.exists() or not input_path.is_file():
log("convert-file: input path missing or not found", file=sys.stderr)
continue
source_ext = input_path.suffix.lower().lstrip(".")
source_kind = _detect_kind(source_ext)
if not _allowed(source_kind, target_kind, target_fmt):
log(
f"Conversion from {source_kind or 'unknown'} to {target_kind} is not allowed",
file=sys.stderr,
)
continue
output_path = _resolve_output_path(input_path, outdir_override, target_fmt)
converted = False
if target_kind in {"video", "audio", "image"}:
converted = _ffmpeg_convert(input_path, output_path, target_kind, copy_metadata=True)
elif target_kind == "doc":
converted = _doc_convert(input_path, output_path)
else:
log(f"No converter for target kind {target_kind}", file=sys.stderr)
if not converted:
continue
try:
out_hash = sha256_file(output_path)
except Exception:
out_hash = None
title = extract_title_from_result(item) or output_path.stem
ctx.emit(
build_file_result_payload(
title=title,
path=str(output_path),
hash_value=out_hash,
media_kind=target_kind,
source_path=str(input_path),
)
)
if delete_src:
try:
input_path.unlink()
log(f"Deleted source file: {input_path}", file=sys.stderr)
except Exception as exc:
log(f"Failed to delete source {input_path}: {exc}", file=sys.stderr)
success += 1
return 0 if success else 1
CMDLET.exec = _run
CMDLET.register()
+545
View File
@@ -0,0 +1,545 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence
import posixpath
import sys
from pathlib import Path
from SYS.logger import debug, log
from ProviderCore.registry import get_plugin
from Store import Store
from .. import _shared as sh
from SYS import pipeline as ctx
from SYS.result_table_helpers import add_row_columns
from SYS.result_table import Table, _format_size
from SYS.rich_display import stdout_console
class Delete_File(sh.Cmdlet):
"""Class-based delete-file cmdlet with self-registration."""
def __init__(self) -> None:
super().__init__(
name="delete-file",
summary=
"Delete a file locally and/or from Hydrus, including database entries.",
usage=
'delete-file [-query "hash:<sha256>"] [-conserve <local|hydrus>] [-lib-root <path>] [reason]',
alias=["del-file"],
arg=[
sh.SharedArgs.QUERY,
sh.CmdletArg(
"conserve",
description="Choose which copy to keep: 'local' or 'hydrus'."
),
sh.CmdletArg(
"lib-root",
description="Path to local library root for database cleanup."
),
sh.CmdletArg(
"reason",
description="Optional reason for deletion (free text)."
),
],
detail=[
"Default removes both the local file and Hydrus file.",
"Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.",
"Database entries are automatically cleaned up for local files.",
"Any remaining arguments are treated as the Hydrus reason text.",
],
exec=self.run,
)
self.register()
def _process_single_item(
self,
item: Any,
override_hash: str | None,
conserve: str | None,
lib_root: str | None,
reason: str,
config: Dict[str,
Any],
) -> List[Dict[str,
Any]]:
"""Process deletion for a single item.
Returns display rows (for the final Rich table). Returning an empty list
indicates no delete occurred.
"""
# Handle item as either dict or object
if isinstance(item, dict):
hash_hex_raw = item.get("hash_hex") or item.get("hash")
target = item.get("target") or item.get("file_path") or item.get("path")
title_val = item.get("title") or item.get("name")
else:
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = (
sh.get_field(item,
"target") or sh.get_field(item,
"file_path")
or sh.get_field(item,
"path")
)
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
def _get_ext_from_item() -> str:
try:
if isinstance(item, dict):
ext_val = item.get("ext")
if ext_val:
return str(ext_val)
extra = item.get("extra")
if isinstance(extra, dict) and extra.get("ext"):
return str(extra.get("ext"))
else:
ext_val = sh.get_field(item, "ext")
if ext_val:
return str(ext_val)
extra = sh.get_field(item, "extra")
if isinstance(extra, dict) and extra.get("ext"):
return str(extra.get("ext"))
except Exception:
pass
# Fallback: infer from target path or title if it looks like a filename
try:
if isinstance(target, str) and target:
suffix = Path(target).suffix
if suffix:
return suffix.lstrip(".")
except Exception:
pass
try:
if title_val:
suffix = Path(str(title_val)).suffix
if suffix:
return suffix.lstrip(".")
except Exception:
pass
return ""
store = None
if isinstance(item, dict):
store = item.get("store")
else:
store = sh.get_field(item, "store")
# Extract plugin/provider identity and full metadata for plugin-level dispatch
provider_name = None
full_metadata: Dict[str, Any] = {}
if isinstance(item, dict):
provider_name = item.get("provider") or item.get("table")
raw_meta = item.get("full_metadata") or item.get("metadata")
if isinstance(raw_meta, dict):
full_metadata = raw_meta
else:
try:
provider_name = sh.get_field(item, "provider") or sh.get_field(item, "table")
except Exception:
pass
try:
raw_meta = sh.get_field(item, "full_metadata") or sh.get_field(item, "metadata")
if isinstance(raw_meta, dict):
full_metadata = raw_meta
except Exception:
pass
provider_name = str(provider_name or "").strip().lower() or None
store_lower = str(store).lower() if store else ""
hydrus_provider = get_plugin("hydrusnetwork", config)
backend = None
try:
if instance:
registry = Store(config)
if registry.is_available(str(store)):
backend = registry[str(store)]
except Exception:
backend = None
# Determine whether the store backend is HydrusNetwork.
# IMPORTANT: Hydrus instances are named by the user (e.g. 'home', 'rpi'),
# so checking only the store name is unreliable.
is_hydrus_store = False
try:
if hydrus_provider is not None and backend is not None:
is_hydrus_store = bool(hydrus_provider.is_backend(backend, str(store or "")))
except Exception:
is_hydrus_store = False
# Backwards-compatible fallback heuristic (older items might only carry a name).
if (not is_hydrus_store) and hydrus_provider is not None and bool(store_lower):
try:
is_hydrus_store = bool(hydrus_provider.is_store_name(store_lower))
except Exception:
is_hydrus_store = False
store_label = str(store) if store else "default"
hydrus_prefix = f"[hydrusnetwork:{store_label}]"
# For Hydrus files, the target IS the hash
if is_hydrus_store and not hash_hex_raw:
hash_hex_raw = target
hash_hex = (
sh.normalize_hash(override_hash)
if override_hash else sh.normalize_hash(hash_hex_raw)
)
local_deleted = False
_target_str = str(target).strip().lower() if isinstance(target, str) else ""
local_target = (
isinstance(target, str) and target.strip()
and not _target_str.startswith(("http://", "https://", "ftp://", "ftps://"))
)
deleted_rows: List[Dict[str, Any]] = []
# --- Plugin-level delete dispatch ---
# When the item originates from a plugin (e.g. FTP), and that plugin exposes
# a delete_file() method, delegate to it instead of attempting a local unlink.
if conserve != "local" and provider_name and not is_hydrus_store:
try:
candidate_plugin = get_plugin(provider_name, config)
plugin_deleter = getattr(candidate_plugin, "delete_file", None) if candidate_plugin else None
if callable(plugin_deleter):
# Prefer ftp_path from full_metadata; fall back to the path/url field
remote = (
full_metadata.get("ftp_path")
or full_metadata.get("selection_url")
or full_metadata.get("ftp_url")
or (str(target).strip() if isinstance(target, str) else "")
)
instance_hint = full_metadata.get("instance") or None
if remote:
plugin_ok = bool(plugin_deleter(remote, instance=instance_hint))
if plugin_ok:
local_deleted = True
size_hint = (
full_metadata.get("size")
or (item.get("size_bytes") if isinstance(item, dict) else None)
or sh.get_field(item, "size_bytes")
)
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else posixpath.basename(str(remote).rstrip("/")),
"store": instance_hint or provider_name,
"hash": hash_hex or "",
"size_bytes": size_hint,
"ext": _get_ext_from_item(),
}
)
return deleted_rows
except Exception:
pass
# If this item references a configured non-Hydrus store backend, prefer deleting
# via the backend API. This supports store items where `path`/`target` is the hash.
if conserve != "local" and store and (not is_hydrus_store):
try:
# Re-use an already resolved backend when available.
if backend is None:
registry = Store(config)
if registry.is_available(str(store)):
backend = registry[str(store)]
if backend is not None:
# Prefer hash when available.
hash_candidate = sh.normalize_hash(
hash_hex_raw
) if hash_hex_raw else None
if not hash_candidate and isinstance(target, str):
hash_candidate = sh.normalize_hash(target)
resolved_path = None
try:
if hash_candidate and hasattr(backend, "get_file"):
candidate_path = backend.get_file(hash_candidate)
resolved_path = (
candidate_path if isinstance(candidate_path,
Path) else None
)
except Exception:
resolved_path = None
identifier = hash_candidate or (
str(target).strip() if isinstance(target,
str) else ""
)
if identifier:
deleter = getattr(backend, "delete_file", None)
if callable(deleter) and bool(deleter(identifier)):
local_deleted = True
size_bytes: int | None = None
try:
if (resolved_path is not None
and isinstance(resolved_path,
Path)
and resolved_path.exists()):
size_bytes = int(resolved_path.stat().st_size)
except Exception:
size_bytes = None
deleted_rows.append(
{
"title": (
str(title_val).strip() if title_val else (
resolved_path.name
if resolved_path else identifier
)
),
"store":
store_label,
"hash":
hash_candidate or (hash_hex or ""),
"size_bytes":
size_bytes,
"ext":
_get_ext_from_item() or (
resolved_path.suffix.lstrip(".")
if resolved_path else ""
),
}
)
# Best-effort remove sidecars if we know the resolved path.
try:
if resolved_path is not None and isinstance(
resolved_path,
Path):
for sidecar in (
resolved_path.with_suffix(".tag"),
resolved_path.with_suffix(".metadata"),
resolved_path.with_suffix(".notes"),
):
try:
if sidecar.exists() and sidecar.is_file():
sidecar.unlink()
except Exception:
pass
except Exception:
pass
# Skip legacy local-path deletion below.
local_target = False
except Exception:
pass
if conserve != "local" and local_target:
path = Path(str(target))
size_bytes: int | None = None
try:
if path.exists() and path.is_file():
size_bytes = int(path.stat().st_size)
except Exception:
size_bytes = None
# Delete the local file directly
try:
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
deleted_rows.append(
{
"title":
str(title_val).strip() if title_val else path.name,
"store": store_label,
"hash": hash_hex or sh.normalize_hash(path.stem) or "",
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or path.suffix.lstrip("."),
}
)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
# Remove common sidecars regardless of file removal success
for sidecar in (
path.with_suffix(".tag"),
path.with_suffix(".metadata"),
path.with_suffix(".notes"),
):
try:
if sidecar.exists() and sidecar.is_file():
sidecar.unlink()
except Exception:
pass
hydrus_deleted = False
should_try_hydrus = is_hydrus_store
# If conserve is set to hydrus, definitely don't delete
if conserve == "hydrus":
should_try_hydrus = False
if should_try_hydrus and hash_hex:
did_hydrus_delete = False
try:
if hydrus_provider is not None:
did_hydrus_delete = bool(
hydrus_provider.delete_hash(
hash_hex,
store_name=str(store) if store else None,
reason=reason or None,
)
)
except Exception:
did_hydrus_delete = False
if did_hydrus_delete:
hydrus_deleted = True
title_str = str(title_val).strip() if title_val else ""
if title_str:
debug(
f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}",
file=sys.stderr,
)
else:
debug(f"{hydrus_prefix} Deleted hash:{hash_hex}", file=sys.stderr)
else:
if not local_deleted:
if instance:
log(f"Hydrus store unavailable for '{store}'", file=sys.stderr)
else:
log("Hydrus delete failed", file=sys.stderr)
return []
if hydrus_deleted and hash_hex:
size_hint = None
try:
if isinstance(item, dict):
size_hint = item.get("size_bytes") or item.get("size")
else:
size_hint = sh.get_field(item,
"size_bytes"
) or sh.get_field(item,
"size")
except Exception:
size_hint = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else "",
"store": store_label,
"hash": hash_hex,
"size_bytes": size_hint,
"ext": _get_ext_from_item(),
}
)
if hydrus_deleted or local_deleted:
return deleted_rows
log("Selected result has neither Hydrus hash nor local file target")
return []
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute delete-file command."""
if sh.should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
# Parse arguments
override_query: str | None = None
override_hash: str | None = None
conserve: str | None = None
lib_root: str | None = None
reason_tokens: list[str] = []
i = 0
while i < len(args):
token = args[i]
low = str(token).lower()
if low in {"-query",
"--query",
"query"} and i + 1 < len(args):
override_query = str(args[i + 1]).strip()
i += 2
continue
if low in {"-conserve",
"--conserve"} and i + 1 < len(args):
value = str(args[i + 1]).strip().lower()
if value in {"local",
"hydrus"}:
conserve = value
i += 2
continue
if low in {"-lib-root",
"--lib-root",
"lib-root"} and i + 1 < len(args):
lib_root = str(args[i + 1]).strip()
i += 2
continue
reason_tokens.append(token)
i += 1
override_hash, query_valid = sh.require_single_hash_query(
override_query,
"Invalid -query value (expected hash:<sha256>)",
log_file=sys.stderr,
)
if not query_valid:
return 1
reason = " ".join(token for token in reason_tokens
if str(token).strip()).strip()
items = sh.normalize_result_items(result)
if not items:
log("No items to delete", file=sys.stderr)
return 1
success_count = 0
deleted_rows: List[Dict[str, Any]] = []
for item in items:
rows = self._process_single_item(
item,
override_hash,
conserve,
lib_root,
reason,
config
)
if rows:
success_count += 1
deleted_rows.extend(rows)
if deleted_rows:
table = Table("Deleted")
table._interactive(True)._perseverance(True)
for row in deleted_rows:
add_row_columns(
table,
[
("Title", row.get("title", "")),
("Store", row.get("store", "")),
("Hash", row.get("hash", "")),
("Size", _format_size(row.get("size_bytes"), integer_only=False)),
("Ext", row.get("ext", "")),
],
)
# Display-only: print directly and do not affect selection/history.
try:
stdout_console().print()
stdout_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Ensure no stale overlay/selection carries forward.
try:
ctx.set_last_result_items_only([])
ctx.set_current_stage_table(None)
except Exception:
pass
return 0 if success_count > 0 else 1
# Instantiate and register the cmdlet
Delete_File()
File diff suppressed because it is too large Load Diff
+491
View File
@@ -0,0 +1,491 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
from pathlib import Path
import os
import sys
import shutil
import subprocess
import tempfile
import threading
import time
import http.server
from urllib.parse import quote
import webbrowser
from urllib.parse import urljoin
from urllib.request import pathname2url
from SYS import pipeline as ctx
from .. import _shared as sh
from SYS.item_accessors import get_result_title
from SYS.logger import log, debug, debug_panel
from SYS.config import resolve_output_dir
from API.HTTP import _download_direct_file
from SYS.payload_builders import build_file_result_payload
class Get_File(sh.Cmdlet):
"""Export files to local path via hash+store."""
def __init__(self) -> None:
"""Initialize get-file cmdlet."""
super().__init__(
name="get-file",
summary="Export file to local path",
usage="@1 | get-file -path ./output",
arg=[
sh.SharedArgs.QUERY,
sh.SharedArgs.INSTANCE,
sh.SharedArgs.PATH,
sh.CmdletArg(
"name",
description="Output filename (default: from metadata title)"
),
],
detail=[
"- Exports file from storage backend to local path",
'- Uses selected item\'s hash, or -query "hash:<sha256>"',
"- Preserves file extension and metadata",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Export file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self)
try:
debug_panel(
"get-file",
[
("result_type", type(result).__name__),
("parsed_args", parsed),
],
border_style="cyan",
)
except Exception:
pass
query_hash, query_valid = sh.require_single_hash_query(
parsed.get("query"),
"Error: -query must be of the form hash:<sha256>",
)
if not query_valid:
return 1
# Extract hash and store from result or args
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("instance") or sh.get_field(result, "store")
output_path = parsed.get("path")
output_name = parsed.get("name")
if not file_hash:
log(
'Error: No file hash provided (pipe an item or use -query "hash:<sha256>")'
)
return 1
if not store_name:
log("Error: No store name provided")
return 1
# Normalize hash
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
try:
debug_panel(
"get-file selection",
[
("hash", file_hash),
("instance", store_name),
("output_path", output_path or "<default>"),
("output_name", output_name or "<auto>"),
],
border_style="blue",
)
except Exception:
pass
backend, _store_registry, _exc = sh.get_preferred_store_backend(
config,
store_name,
suppress_debug=True,
)
if backend is None:
log(f"Error: Storage backend '{store_name}' not found", file=sys.stderr)
return 1
# Get file metadata to determine name and extension
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"Error: File metadata not found for hash {file_hash}")
return 1
try:
debug_panel(
"get-file backend",
[
("backend", type(backend).__name__),
("title", metadata.get("title") or ""),
("ext", metadata.get("ext") or ""),
],
border_style="green",
)
except Exception:
pass
def resolve_display_title() -> str:
candidates = [
get_result_title(result, "title", "name", "filename"),
get_result_title(metadata, "title", "name", "filename"),
]
for candidate in candidates:
if candidate is None:
continue
text = str(candidate).strip()
if text:
return text
return ""
# Get file from backend (may return Path or URL string depending on backend).
# We pass url=True if no explicit path was provided, which hints the backend
# (specifically Hydrus) to return a browser-friendly URL instead of a local path.
want_url = (output_path is None)
source_path = backend.get_file(file_hash, url=want_url)
download_url = None
if isinstance(source_path, str):
if source_path.startswith("http://") or source_path.startswith("https://"):
download_url = source_path
else:
source_path = Path(source_path)
try:
debug_panel(
"get-file fetch",
[
("url_hint", want_url),
("mode", "browser-url" if download_url else "local-path"),
("source", download_url or source_path or "<missing>"),
],
border_style="magenta",
)
except Exception:
pass
if download_url and output_path is None:
# Hydrus backend returns a URL; open it only when no output path
try:
webbrowser.open(download_url)
except Exception as exc:
log(f"Error opening browser: {exc}", file=sys.stderr)
else:
try:
debug_panel(
"get-file open",
[
("action", "browser-open"),
("url", download_url),
],
file=sys.stderr,
border_style="green",
)
except Exception:
pass
ctx.emit(
build_file_result_payload(
title=resolve_display_title() or "Opened",
hash_value=file_hash,
store=store_name,
url=download_url,
)
)
return 0
if download_url is None:
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash}")
return 1
# Otherwise: export/copy to output_dir.
if output_path:
output_dir = Path(output_path).expanduser()
else:
output_dir = resolve_output_dir(config)
output_dir.mkdir(parents=True, exist_ok=True)
# Determine output filename (only when exporting)
if output_name:
filename = output_name
else:
title = (
(metadata.get("title") if isinstance(metadata,
dict) else None)
or resolve_display_title() or "export"
)
filename = self._sanitize_filename(title)
# Add extension if metadata has it
ext = metadata.get("ext")
if ext and not filename.endswith(ext):
if not ext.startswith("."):
ext = "." + ext
filename += ext
dest_path: Path
if download_url:
downloaded = _download_direct_file(
download_url,
output_dir,
quiet=True,
suggested_filename=filename,
)
dest_path = downloaded.path
else:
dest_path = self._unique_path(output_dir / filename)
# Copy file to destination
shutil.copy2(source_path, dest_path)
try:
debug_panel(
"get-file export",
[
("mode", "download" if download_url else "copy"),
("destination", dest_path),
("filename", filename),
],
file=sys.stderr,
border_style="green",
)
except Exception:
pass
log(f"Exported: {dest_path}", file=sys.stderr)
# Emit result for pipeline
ctx.emit(
build_file_result_payload(
title=filename,
hash_value=file_hash,
store=store_name,
path=str(dest_path),
)
)
return 0
def _open_file_default(self, path: Path) -> None:
"""Open a local file in the OS default application."""
try:
suffix = str(path.suffix or "").lower()
if sys.platform.startswith("win"):
# On Windows, file associations for common media types can point at
# editors (Paint/VS Code). Prefer opening a localhost URL.
if self._open_local_file_in_browser_via_http(path):
return
if suffix in {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tif",
".tiff",
".svg",
}:
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
if sys.platform.startswith("win"):
os.startfile(str(path)) # type: ignore[attr-defined]
return
if sys.platform == "darwin":
subprocess.Popen(
["open",
str(path)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
return
subprocess.Popen(
["xdg-open",
str(path)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
def _open_local_file_in_browser_via_http(self, file_path: Path) -> bool:
"""Serve a single local file via localhost HTTP and open in browser.
This avoids Windows file-association issues (e.g., PNG -> Paint, HTML -> VS Code).
The server is bound to 127.0.0.1 on an ephemeral port and is shut down after
a timeout.
"""
try:
resolved = file_path.resolve()
directory = resolved.parent
filename = resolved.name
except Exception:
return False
class OneFileHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *handler_args, **handler_kwargs):
super().__init__(
*handler_args,
directory=str(directory),
**handler_kwargs
)
def log_message(self, format: str, *args) -> None: # noqa: A003
# Keep normal output clean.
return
def do_GET(self) -> None: # noqa: N802
if self.path in {"/",
""}:
self.path = "/" + filename
return super().do_GET()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_GET()
self.send_error(404)
def do_HEAD(self) -> None: # noqa: N802
if self.path in {"/",
""}:
self.path = "/" + filename
return super().do_HEAD()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_HEAD()
self.send_error(404)
try:
httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 0), OneFileHandler)
except Exception:
return False
port = httpd.server_address[1]
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
server_thread = threading.Thread(
target=httpd.serve_forever,
kwargs={
"poll_interval": 0.2
},
daemon=True
)
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
def shutdown_later() -> None:
time.sleep(10 * 60)
try:
httpd.shutdown()
except Exception:
pass
try:
httpd.server_close()
except Exception:
pass
threading.Thread(target=shutdown_later, daemon=True).start()
try:
debug(f"[get-file] Opening via localhost: {url}")
return bool(webbrowser.open(url))
except Exception:
return False
def _open_image_in_default_browser(self, image_path: Path) -> bool:
"""Open an image file in the user's default web browser.
We intentionally avoid opening the image path directly on Windows because
file associations may point to editors/viewers (e.g., Paint). Instead we
generate a tiny HTML wrapper and open that (HTML is typically associated
with the default browser).
"""
try:
resolved = image_path.resolve()
image_url = urljoin("file:", pathname2url(str(resolved)))
except Exception:
return False
# Create a stable wrapper filename to reduce temp-file spam.
wrapper_path = Path(
tempfile.gettempdir()
) / f"medeia-open-image-{resolved.stem}.html"
try:
wrapper_path.write_text(
"\n".join(
[
"<!doctype html>",
'<meta charset="utf-8">',
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
f'<img src="{image_url}" alt="{resolved.name}">',
]
),
encoding="utf-8",
)
except Exception:
return False
# Prefer localhost server when possible (reliable on Windows).
if self._open_local_file_in_browser_via_http(image_path):
return True
wrapper_url = wrapper_path.as_uri()
try:
return bool(webbrowser.open(wrapper_url))
except Exception:
return False
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""
allowed_chars = []
for ch in str(name):
if ch.isalnum() or ch in {"-",
"_",
" ",
"."}:
allowed_chars.append(ch)
else:
allowed_chars.append(" ")
# Collapse multiple spaces
sanitized = " ".join("".join(allowed_chars).split())
return sanitized or "export"
def _unique_path(self, path: Path) -> Path:
"""Generate unique path by adding (1), (2), etc. if file exists."""
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
parent = path.parent
counter = 1
while True:
new_path = parent / f"{stem} ({counter}){suffix}"
if not new_path.exists():
return new_path
counter += 1
# Instantiate and register cmdlet
Add_File_Instance = Get_File()
+1091
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+506
View File
@@ -0,0 +1,506 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
from pathlib import Path
import sys
import subprocess
import shutil
import re
import time
from urllib.parse import urlparse
from SYS.logger import log, debug
from SYS.item_accessors import get_store_name
from SYS.utils import sha256_file
from .. import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
get_field = sh.get_field
from SYS import pipeline as ctx
CMDLET = Cmdlet(
name="trim-file",
summary="Trim a media file using ffmpeg.",
usage=
"trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
arg=[
CmdletArg("-path",
description="Path to the file (optional if piped)."),
CmdletArg(
"-input",
description=
"Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL.",
),
CmdletArg(
"-range",
required=True,
description=
"Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s').",
),
CmdletArg(
"-outdir",
description=
"Output directory for the clip (defaults to source folder for local files; otherwise uses system temp).",
),
CmdletArg(
"-delete",
type="flag",
description="Delete the original file after trimming."
),
],
detail=[
"Creates a new file with 'clip_' prefix in the filename.",
"Adds the trim range to the title as: [1h3m-1h3m10s] - <title>.",
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
],
)
def _format_hms(total_seconds: float) -> str:
"""Format seconds as compact h/m/s (no colons), e.g. 1h3m10s, 3m5s, 2s."""
try:
total = int(round(float(total_seconds)))
except Exception:
total = 0
if total < 0:
total = 0
hours = total // 3600
minutes = (total % 3600) // 60
seconds = total % 60
parts: list[str] = []
if hours > 0:
parts.append(f"{hours}h")
if minutes > 0:
parts.append(f"{minutes}m")
if seconds > 0:
parts.append(f"{seconds}s")
# Ensure we always output something.
if not parts:
return "0s"
return "".join(parts)
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
return bool(p.scheme and p.netloc)
except Exception:
return False
def _parse_time(time_str: str) -> float:
"""Convert time string into seconds.
Supports:
- HH:MM:SS(.sss)
- MM:SS(.sss)
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
raw = str(time_str or "").strip()
if not raw:
raise ValueError("Empty time")
# h/m/s format (case-insensitive)
hms = re.fullmatch(
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
if hms and (hms.group("h") or hms.group("m") or hms.group("s")):
hours = float(hms.group("h") or 0)
minutes = float(hms.group("m") or 0)
seconds = float(hms.group("s") or 0)
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
parts = [p.strip() for p in raw.split(":")]
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
if len(parts) == 2:
return float(parts[0]) * 60 + float(parts[1])
if len(parts) == 1:
return float(parts[0])
raise ValueError(f"Invalid time format: {time_str}")
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
name = str(name or "").strip()
if not name:
return "clip"
# Windows-forbidden characters: <>:"/\\|?* plus control chars
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', "_", name)
name = re.sub(r"\s+", " ", name).strip()
name = name.rstrip(".")
if not name:
return "clip"
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
def _extract_store_name(item: Any) -> Optional[str]:
return get_store_name(item, "store")
def _persist_alt_relationship(
*,
config: Dict[str,
Any],
store_name: str,
alt_hash: str,
king_hash: str
) -> None:
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
backend: Any = store[str(store_name)]
except Exception:
return
alt_norm = str(alt_hash or "").strip().lower()
king_norm = str(king_hash or "").strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
return
# Hydrus-like backend
try:
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "set_relationship"):
client.set_relationship(alt_norm, king_norm, "alt")
except Exception:
return
def _trim_media(
input_source: str,
output_path: Path,
start_seconds: float,
duration_seconds: float
) -> bool:
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
ffmpeg_path = shutil.which("ffmpeg")
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
if duration_seconds <= 0:
log(f"Invalid range: duration <= 0 ({duration_seconds})", file=sys.stderr)
return False
cmd = [
ffmpeg_path,
"-y",
"-ss",
str(float(start_seconds)),
"-i",
str(input_source),
"-t",
str(float(duration_seconds)),
"-c",
"copy",
"-map_metadata",
"0",
str(output_path),
]
debug(f"Running ffmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
return False
return True
except Exception as e:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
range_arg = parsed.get("range")
if not range_arg or "-" not in range_arg:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = [s.strip() for s in range_arg.split("-", 1)]
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
try:
start_seconds = _parse_time(start_str)
end_seconds = _parse_time(end_str)
except Exception as exc:
log(f"Error parsing -range: {exc}", file=sys.stderr)
return 1
duration_seconds = end_seconds - start_seconds
if duration_seconds <= 0:
log(f"Invalid range: start {start_str} >= end {end_str}", file=sys.stderr)
return 1
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
input_override = parsed.get("input")
outdir_arg = parsed.get("outdir")
# Collect inputs
inputs = normalize_result_input(result)
# If path arg provided, add it to inputs
if path_arg:
inputs.append({
"path": path_arg
})
if not inputs:
log("No input files provided.", file=sys.stderr)
return 1
success_count = 0
for item in inputs:
store_name = _extract_store_name(item)
# Resolve file path
file_path: Optional[str] = None
if isinstance(item, dict):
file_path = item.get("path") or item.get("target")
elif hasattr(item, "path"):
file_path = item.path
elif isinstance(item, str):
file_path = item
if not file_path and not input_override:
continue
media_source = str(input_override or file_path)
is_url = _is_url(media_source)
path_obj: Optional[Path] = None
if not is_url:
try:
path_obj = Path(str(media_source))
except Exception:
path_obj = None
if not path_obj or not path_obj.exists():
log(f"File not found: {media_source}", file=sys.stderr)
continue
# Determine output directory
output_dir: Path
if outdir_arg:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from SYS.config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from SYS.config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
# Determine output filename
output_ext = ""
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
else:
# Prefer title from metadata if present
title = extract_title_from_result(item)
if title:
base_name = _sanitize_filename(str(title))
else:
base_name = time.strftime("%Y%m%d-%H%M%S")
if base_name.lower().startswith("clip_"):
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
last = (p.path or "").split("/")[-1]
if last and "." in last:
output_ext = "." + last.split(".")[-1]
except Exception:
pass
if not output_ext or len(output_ext) > 8:
output_ext = ".mkv"
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
# Avoid clobbering existing files
if output_path.exists():
stem = output_path.stem
suffix = output_path.suffix
for i in range(1, 1000):
candidate = output_dir / f"{stem}_{i}{suffix}"
if not candidate.exists():
output_path = candidate
break
# Trim
source_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
# Prepare result for pipeline
# 1. Get source hash for relationship
source_hash = None
if isinstance(item, dict):
source_hash = item.get("hash")
elif hasattr(item, "hash"):
source_hash = item.hash
if not source_hash:
if path_obj is not None:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
# 2. Get tag values
# Do not inherit tags from the source (per UX request).
new_tags: list[str] = []
# Copy URL(s) when present.
urls: list[str] = []
try:
urls = extract_url_from_result(item) or []
except Exception:
urls = []
try:
src_u = get_field(item, "source_url")
if isinstance(src_u, str) and src_u.strip():
if src_u.strip() not in urls:
urls.append(src_u.strip())
except Exception:
pass
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
title = path_obj.stem if path_obj is not None else base_name
range_hms = f"{_format_hms(start_seconds)}-{_format_hms(end_seconds)}"
new_title = f"[{range_hms}] - {title}"
# 4. Calculate clip hash
clip_hash = None
try:
clip_hash = sha256_file(output_path)
except Exception:
pass
# If this was a store item, ingest the clip into the same store.
stored_instance: Optional[str] = None
stored_hash: Optional[str] = None
stored_path: Optional[str] = None
if store_name:
try:
backend, _store_registry, _exc = sh.get_store_backend(
config,
store_name,
)
if backend is not None:
stored_hash = backend.add_file(
Path(str(output_path)),
title=new_title,
tag=new_tags,
url=urls,
move=False,
)
stored_store = store_name
except Exception as exc:
log(
f"Failed to add clip to store '{store_name}': {exc}",
file=sys.stderr
)
# If we stored it, persist relationship alt -> king in that store.
if stored_store and stored_hash and source_hash:
_persist_alt_relationship(
config=config,
store_name=stored_store,
alt_hash=stored_hash,
king_hash=str(source_hash),
)
if stored_hash:
clip_hash = stored_hash
# 5. Construct result
result_dict = {
"path": stored_path or str(output_path),
"title": new_title,
"tag": new_tags,
"url": urls,
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"store": stored_store,
"relationships": {
# Clip is an ALT of the source; store semantics are directional alt -> king.
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
},
}
# Emit result
ctx.emit(result_dict)
# Delete original if requested
if delete_original:
try:
if path_obj is not None:
path_obj.unlink()
log(f"Deleted original file: {path_obj}", file=sys.stderr)
# Also try to delete sidecars?
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
failed_label = path_obj.name if path_obj is not None else str(media_source)
log(f"Failed to trim {failed_label}", file=sys.stderr)
return 0 if success_count > 0 else 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()