Files
Medios-Macina/cmdlet/delete_url.py

203 lines
7.2 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
2026-01-19 03:14:30 -08:00
from typing import Any, Dict, List, Sequence, Tuple
2025-12-01 01:10:16 -08:00
import sys
2025-11-25 20:09:33 -08:00
from SYS import pipeline as ctx
2026-03-25 22:39:30 -07:00
from . import _shared as sh
2026-01-19 06:24:09 -08:00
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
parse_cmdlet_args,
get_field,
normalize_hash,
2025-12-16 23:23:43 -08:00
)
2025-12-11 19:04:02 -08:00
from SYS.logger import log
from Store import Store
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
class Delete_Url(Cmdlet):
"""Delete URL associations from files via hash+store."""
2025-12-14 00:53:52 -08:00
def __init__(self) -> None:
super().__init__(
name="delete-url",
summary="Remove a URL association from a file",
usage="@1 | delete-url <url>",
arg=[
2025-12-20 02:12:45 -08:00
SharedArgs.QUERY,
2025-12-14 00:53:52 -08:00
SharedArgs.STORE,
2025-12-29 17:05:03 -08:00
CmdletArg(
"url",
required=False,
description="URL to remove (optional when piping url rows)",
),
2025-12-14 00:53:52 -08:00
],
detail=[
"- Removes URL association from file identified by hash+store",
"- Multiple url can be comma-separated",
],
exec=self.run,
)
self.register()
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
2025-12-20 02:12:45 -08:00
2026-03-25 22:39:30 -07:00
query_hash, query_valid = sh.require_single_hash_query(
parsed.get("query"),
"Error: -query must be of the form hash:<sha256>",
)
if not query_valid:
2025-12-20 02:12:45 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
2025-12-29 17:05:03 -08:00
results: List[Any] = (
result if isinstance(result,
list) else ([result] if result is not None else [])
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
return 1
2025-12-11 12:47:30 -08:00
# Extract hash and store from result or args
file_hash = query_hash or (
get_field(result,
"hash") if result is not None else None
)
2025-12-29 17:05:03 -08:00
store_name = parsed.get("store") or (
get_field(result,
"store") if result is not None else None
2025-12-29 17:05:03 -08:00
)
2025-12-11 12:47:30 -08:00
url_arg = parsed.get("url")
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log(
'Error: No file hash provided (pipe an item or use -query "hash:<sha256>")'
)
2025-12-20 23:57:44 -08:00
return 1
if not store_name:
log("Error: No store name provided")
return 1
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
2025-12-22 02:11:53 -08:00
from SYS.metadata import normalize_urls
2025-12-22 02:11:53 -08:00
def _urls_from_arg(raw: Any) -> List[str]:
if raw is None:
return []
# Support comma-separated input for backwards compatibility
if isinstance(raw, str) and "," in raw:
return [u.strip() for u in raw.split(",") if u.strip()]
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
urls_from_cli = _urls_from_arg(url_arg)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get backend and delete url
2025-12-01 01:10:16 -08:00
try:
2025-12-11 19:04:02 -08:00
storage = Store(config)
2025-12-20 23:57:44 -08:00
store_override = parsed.get("store")
if results:
2026-03-25 22:39:30 -07:00
def _warn(message: str) -> None:
ctx.print_if_visible(f"[delete-url] Warning: {message}", file=sys.stderr)
2025-12-20 23:57:44 -08:00
2026-03-25 22:39:30 -07:00
def _resolve_item_urls(item: Any) -> List[str]:
2025-12-22 02:11:53 -08:00
item_urls = list(urls_from_cli)
if not item_urls:
2025-12-29 17:05:03 -08:00
item_urls = [
u.strip() for u in normalize_urls(
2025-12-29 17:05:03 -08:00
get_field(item, "url") or get_field(item, "source_url")
) if str(u).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-22 02:11:53 -08:00
if not item_urls:
2026-03-25 22:39:30 -07:00
_warn("Item has no url field; skipping")
return item_urls
batch, pass_through = sh.collect_store_hash_value_batch(
results,
store_registry=storage,
value_resolver=_resolve_item_urls,
override_hash=query_hash,
override_store=store_override,
on_warning=_warn,
)
2025-12-20 23:57:44 -08:00
2026-03-25 22:39:30 -07:00
storage, batch_stats = sh.run_store_hash_value_batches(
config,
batch,
bulk_method_name="delete_url_bulk",
single_method_name="delete_url",
store_registry=storage,
)
for store_text, item_count, deleted_count in batch_stats:
2025-12-20 23:57:44 -08:00
ctx.print_if_visible(
2026-03-25 22:39:30 -07:00
f"✓ delete-url: {deleted_count} url(s) for {item_count} item(s) in '{store_text}'",
2025-12-20 23:57:44 -08:00
file=sys.stderr,
)
for item in pass_through:
existing = get_field(item, "url")
2025-12-22 02:11:53 -08:00
# In batch mode we removed the union of requested urls for the file.
# Using urls_from_cli (if present) matches the user's explicit intent; otherwise
# remove the piped url row(s).
remove_set = urls_from_cli
if not remove_set:
2025-12-29 17:05:03 -08:00
remove_set = [
u.strip() for u in normalize_urls(
2025-12-29 17:05:03 -08:00
get_field(item, "url") or get_field(item, "source_url")
) if str(u).strip()
2025-12-29 17:05:03 -08:00
]
2026-03-25 22:39:30 -07:00
sh.set_item_urls(item, sh.remove_urls(existing, list(remove_set)))
2025-12-20 23:57:44 -08:00
ctx.emit(item)
return 0
# Single-item mode
2025-12-22 02:11:53 -08:00
if not urls_from_cli:
2025-12-29 17:05:03 -08:00
urls_from_cli = [
u.strip() for u in normalize_urls(
2025-12-29 17:05:03 -08:00
get_field(result, "url") or get_field(result, "source_url")
) if str(u).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-22 02:11:53 -08:00
if not urls_from_cli:
log("Error: No URL provided")
return 1
2026-03-25 22:39:30 -07:00
backend, storage, exc = sh.get_store_backend(
config,
str(store_name),
store_registry=storage,
)
if backend is None:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
2025-12-22 02:11:53 -08:00
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
2025-12-29 17:05:03 -08:00
ctx.print_if_visible(
f"✓ delete-url: {len(urls_from_cli)} url(s) removed",
file=sys.stderr
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
if result is not None:
existing = get_field(result, "url")
2026-03-25 22:39:30 -07:00
sh.set_item_urls(result, sh.remove_urls(existing, list(urls_from_cli)))
2025-12-20 23:57:44 -08:00
ctx.emit(result)
2025-12-11 12:47:30 -08:00
return 0
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
log(f"Error deleting URL: {exc}", file=sys.stderr)
return 1
2025-12-06 00:10:19 -08:00
2025-12-14 00:53:52 -08:00
CMDLET = Delete_Url()