This commit is contained in:
2026-03-25 22:39:30 -07:00
parent c31402c8f1
commit 562acd809c
46 changed files with 2367 additions and 1868 deletions

View File

@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Sequence, Tuple
import sys
from SYS import pipeline as ctx
from . import _shared as sh
from ._shared import (
Cmdlet,
CmdletArg,
@@ -45,9 +46,11 @@ class Delete_Url(Cmdlet):
"""Delete URL from file via hash+store backend."""
parsed = parse_cmdlet_args(args, self)
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>")
query_hash, query_valid = sh.require_single_hash_query(
parsed.get("query"),
"Error: -query must be of the form hash:<sha256>",
)
if not query_valid:
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
@@ -105,77 +108,13 @@ class Delete_Url(Cmdlet):
try:
storage = Store(config)
def _remove_urls(existing: Any, remove: List[str]) -> Any:
# Preserve prior shape: keep str when 1 url, list when multiple.
current: List[str] = []
try:
if isinstance(existing, str):
current = [p.strip() for p in existing.split(",") if p.strip()]
elif isinstance(existing, (list, tuple)):
current = [str(u).strip() for u in existing if str(u).strip()]
except Exception:
current = []
remove_set = {u
for u in (remove or []) if u}
new_urls = [u for u in current if u not in remove_set]
if len(new_urls) == 1:
return new_urls[0]
return new_urls
def _set_item_url(item: Any, merged: Any) -> None:
try:
if isinstance(item, dict):
item["url"] = merged
return
if hasattr(item, "url"):
setattr(item, "url", merged)
except Exception:
return
store_override = parsed.get("store")
batch: Dict[str,
List[Tuple[str,
List[str]]]] = {}
pass_through: List[Any] = []
if results:
for item in results:
pass_through.append(item)
def _warn(message: str) -> None:
ctx.print_if_visible(f"[delete-url] Warning: {message}", file=sys.stderr)
raw_hash = query_hash or get_field(item, "hash")
raw_store = store_override or get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible(
"[delete-url] Warning: Item missing hash/store; skipping",
file=sys.stderr,
)
continue
normalized = normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible(
"[delete-url] Warning: Item has invalid hash; skipping",
file=sys.stderr
)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible(
"[delete-url] Warning: Item has empty store; skipping",
file=sys.stderr
)
continue
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[delete-url] Warning: Store '{store_text}' not configured; skipping",
file=sys.stderr,
)
continue
# Determine which URLs to delete.
# - If user passed an explicit <url>, apply it to all items.
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
def _resolve_item_urls(item: Any) -> List[str]:
item_urls = list(urls_from_cli)
if not item_urls:
item_urls = [
@@ -184,41 +123,28 @@ class Delete_Url(Cmdlet):
) if str(u).strip()
]
if not item_urls:
ctx.print_if_visible(
"[delete-url] Warning: Item has no url field; skipping",
file=sys.stderr
)
continue
_warn("Item has no url field; skipping")
return item_urls
batch.setdefault(store_text, []).append((normalized, item_urls))
batch, pass_through = sh.collect_store_hash_value_batch(
results,
store_registry=storage,
value_resolver=_resolve_item_urls,
override_hash=query_hash,
override_store=store_override,
on_warning=_warn,
)
for store_text, pairs in batch.items():
try:
backend = storage[store_text]
except Exception:
continue
merged: Dict[str,
List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in ulist or []:
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
bulk_fn = getattr(backend, "delete_url_bulk", None)
if callable(bulk_fn):
bulk_fn(bulk_pairs, config=config)
else:
for h, ulist in bulk_pairs:
backend.delete_url(h, ulist, config=config)
deleted_count = 0
for _h, ulist in bulk_pairs:
deleted_count += len(ulist or [])
storage, batch_stats = sh.run_store_hash_value_batches(
config,
batch,
bulk_method_name="delete_url_bulk",
single_method_name="delete_url",
store_registry=storage,
)
for store_text, item_count, deleted_count in batch_stats:
ctx.print_if_visible(
f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
f"✓ delete-url: {deleted_count} url(s) for {item_count} item(s) in '{store_text}'",
file=sys.stderr,
)
@@ -234,7 +160,7 @@ class Delete_Url(Cmdlet):
get_field(item, "url") or get_field(item, "source_url")
) if str(u).strip()
]
_set_item_url(item, _remove_urls(existing, list(remove_set)))
sh.set_item_urls(item, sh.remove_urls(existing, list(remove_set)))
ctx.emit(item)
return 0
@@ -249,7 +175,14 @@ class Delete_Url(Cmdlet):
log("Error: No URL provided")
return 1
backend = storage[str(store_name)]
backend, storage, exc = sh.get_store_backend(
config,
str(store_name),
store_registry=storage,
)
if backend is None:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
ctx.print_if_visible(
f"✓ delete-url: {len(urls_from_cli)} url(s) removed",
@@ -257,13 +190,10 @@ class Delete_Url(Cmdlet):
)
if result is not None:
existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
sh.set_item_urls(result, sh.remove_urls(existing, list(urls_from_cli)))
ctx.emit(result)
return 0
except KeyError:
log(f"Error: Storage backend '{store_name}' not configured")
return 1
except Exception as exc:
log(f"Error deleting URL: {exc}", file=sys.stderr)
return 1