2025-11-25 20:09:33 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
2025-12-01 01:10:16 -08:00
|
|
|
import sys
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-01 01:10:16 -08:00
|
|
|
import pipeline as ctx
|
2025-12-16 23:23:43 -08:00
|
|
|
from . import _shared as sh
|
|
|
|
|
|
|
|
|
|
Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash = (
|
|
|
|
|
sh.Cmdlet,
|
|
|
|
|
sh.CmdletArg,
|
|
|
|
|
sh.SharedArgs,
|
|
|
|
|
sh.parse_cmdlet_args,
|
|
|
|
|
sh.get_field,
|
|
|
|
|
sh.normalize_hash,
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import log
|
|
|
|
|
from Store import Store
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
class Delete_Url(Cmdlet):
|
|
|
|
|
"""Delete URL associations from files via hash+store."""
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
super().__init__(
|
|
|
|
|
name="delete-url",
|
|
|
|
|
summary="Remove a URL association from a file",
|
|
|
|
|
usage="@1 | delete-url <url>",
|
|
|
|
|
arg=[
|
2025-12-20 02:12:45 -08:00
|
|
|
SharedArgs.QUERY,
|
2025-12-14 00:53:52 -08:00
|
|
|
SharedArgs.STORE,
|
2025-12-22 02:11:53 -08:00
|
|
|
CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
|
2025-12-14 00:53:52 -08:00
|
|
|
],
|
|
|
|
|
detail=[
|
|
|
|
|
"- Removes URL association from file identified by hash+store",
|
|
|
|
|
"- Multiple url can be comma-separated",
|
|
|
|
|
],
|
|
|
|
|
exec=self.run,
|
|
|
|
|
)
|
|
|
|
|
self.register()
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Delete URL from file via hash+store backend."""
|
|
|
|
|
parsed = parse_cmdlet_args(args, self)
|
2025-12-20 02:12:45 -08:00
|
|
|
|
|
|
|
|
query_hash = sh.parse_single_hash_query(parsed.get("query"))
|
|
|
|
|
if parsed.get("query") and not query_hash:
|
|
|
|
|
log("Error: -query must be of the form hash:<sha256>")
|
|
|
|
|
return 1
|
2025-12-01 01:10:16 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
|
|
|
|
|
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
|
|
|
|
|
|
|
|
|
|
if query_hash and len(results) > 1:
|
|
|
|
|
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
|
|
|
|
|
return 1
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Extract hash and store from result or args
|
2025-12-20 23:57:44 -08:00
|
|
|
file_hash = query_hash or (get_field(result, "hash") if result is not None else None)
|
|
|
|
|
store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None)
|
2025-12-11 12:47:30 -08:00
|
|
|
url_arg = parsed.get("url")
|
2025-12-01 01:10:16 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# If we have multiple piped items, we will resolve hash/store per item below.
|
|
|
|
|
if not results:
|
|
|
|
|
if not file_hash:
|
|
|
|
|
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
|
|
|
|
|
return 1
|
|
|
|
|
if not store_name:
|
|
|
|
|
log("Error: No store name provided")
|
|
|
|
|
return 1
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Normalize hash (single-item mode)
|
|
|
|
|
if not results and file_hash:
|
|
|
|
|
file_hash = normalize_hash(file_hash)
|
|
|
|
|
if not file_hash:
|
|
|
|
|
log("Error: Invalid hash format")
|
|
|
|
|
return 1
|
2025-12-22 02:11:53 -08:00
|
|
|
|
|
|
|
|
from metadata import normalize_urls
|
|
|
|
|
|
|
|
|
|
def _urls_from_arg(raw: Any) -> List[str]:
|
|
|
|
|
if raw is None:
|
|
|
|
|
return []
|
|
|
|
|
# Support comma-separated input for backwards compatibility
|
|
|
|
|
if isinstance(raw, str) and "," in raw:
|
|
|
|
|
return [u.strip() for u in raw.split(",") if u.strip()]
|
|
|
|
|
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
|
|
|
|
|
|
|
|
|
|
urls_from_cli = _urls_from_arg(url_arg)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Get backend and delete url
|
2025-12-01 01:10:16 -08:00
|
|
|
try:
|
2025-12-11 19:04:02 -08:00
|
|
|
storage = Store(config)
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
def _remove_urls(existing: Any, remove: List[str]) -> Any:
|
|
|
|
|
# Preserve prior shape: keep str when 1 url, list when multiple.
|
|
|
|
|
current: List[str] = []
|
|
|
|
|
try:
|
|
|
|
|
if isinstance(existing, str):
|
|
|
|
|
current = [p.strip() for p in existing.split(",") if p.strip()]
|
|
|
|
|
elif isinstance(existing, (list, tuple)):
|
|
|
|
|
current = [str(u).strip() for u in existing if str(u).strip()]
|
|
|
|
|
except Exception:
|
|
|
|
|
current = []
|
|
|
|
|
remove_set = {u for u in (remove or []) if u}
|
|
|
|
|
new_urls = [u for u in current if u not in remove_set]
|
|
|
|
|
if len(new_urls) == 1:
|
|
|
|
|
return new_urls[0]
|
|
|
|
|
return new_urls
|
|
|
|
|
|
|
|
|
|
def _set_item_url(item: Any, merged: Any) -> None:
|
|
|
|
|
try:
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
item["url"] = merged
|
|
|
|
|
return
|
|
|
|
|
if hasattr(item, "url"):
|
|
|
|
|
setattr(item, "url", merged)
|
|
|
|
|
except Exception:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
store_override = parsed.get("store")
|
|
|
|
|
batch: Dict[str, List[Tuple[str, List[str]]]] = {}
|
|
|
|
|
pass_through: List[Any] = []
|
|
|
|
|
|
|
|
|
|
if results:
|
|
|
|
|
for item in results:
|
|
|
|
|
pass_through.append(item)
|
|
|
|
|
|
|
|
|
|
raw_hash = query_hash or get_field(item, "hash")
|
|
|
|
|
raw_store = store_override or get_field(item, "store")
|
|
|
|
|
if not raw_hash or not raw_store:
|
|
|
|
|
ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
normalized = normalize_hash(raw_hash)
|
|
|
|
|
if not normalized:
|
|
|
|
|
ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
store_text = str(raw_store).strip()
|
|
|
|
|
if not store_text:
|
|
|
|
|
ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
if not storage.is_available(store_text):
|
|
|
|
|
ctx.print_if_visible(
|
|
|
|
|
f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
|
|
|
|
|
)
|
|
|
|
|
continue
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
# Determine which URLs to delete.
|
|
|
|
|
# - If user passed an explicit <url>, apply it to all items.
|
|
|
|
|
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
|
|
|
|
|
item_urls = list(urls_from_cli)
|
|
|
|
|
if not item_urls:
|
|
|
|
|
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
|
|
|
|
|
if not item_urls:
|
|
|
|
|
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
batch.setdefault(store_text, []).append((normalized, item_urls))
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
for store_text, pairs in batch.items():
|
|
|
|
|
try:
|
|
|
|
|
backend = storage[store_text]
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
merged: Dict[str, List[str]] = {}
|
|
|
|
|
for h, ulist in pairs:
|
|
|
|
|
merged.setdefault(h, [])
|
|
|
|
|
for u in (ulist or []):
|
|
|
|
|
if u and u not in merged[h]:
|
|
|
|
|
merged[h].append(u)
|
|
|
|
|
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
|
|
|
|
|
|
|
|
|
|
bulk_fn = getattr(backend, "delete_url_bulk", None)
|
|
|
|
|
if callable(bulk_fn):
|
|
|
|
|
bulk_fn(bulk_pairs, config=config)
|
|
|
|
|
else:
|
|
|
|
|
for h, ulist in bulk_pairs:
|
|
|
|
|
backend.delete_url(h, ulist, config=config)
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
deleted_count = 0
|
|
|
|
|
for _h, ulist in bulk_pairs:
|
|
|
|
|
deleted_count += len(ulist or [])
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.print_if_visible(
|
2025-12-22 02:11:53 -08:00
|
|
|
f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
|
2025-12-20 23:57:44 -08:00
|
|
|
file=sys.stderr,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for item in pass_through:
|
|
|
|
|
existing = get_field(item, "url")
|
2025-12-22 02:11:53 -08:00
|
|
|
# In batch mode we removed the union of requested urls for the file.
|
|
|
|
|
# Using urls_from_cli (if present) matches the user's explicit intent; otherwise
|
|
|
|
|
# remove the piped url row(s).
|
|
|
|
|
remove_set = urls_from_cli
|
|
|
|
|
if not remove_set:
|
|
|
|
|
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
|
|
|
|
|
_set_item_url(item, _remove_urls(existing, list(remove_set)))
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.emit(item)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Single-item mode
|
2025-12-22 02:11:53 -08:00
|
|
|
if not urls_from_cli:
|
|
|
|
|
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
|
|
|
|
|
if not urls_from_cli:
|
|
|
|
|
log("Error: No URL provided")
|
|
|
|
|
return 1
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
backend = storage[str(store_name)]
|
2025-12-22 02:11:53 -08:00
|
|
|
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
|
|
|
|
|
ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
|
2025-12-20 23:57:44 -08:00
|
|
|
if result is not None:
|
|
|
|
|
existing = get_field(result, "url")
|
2025-12-22 02:11:53 -08:00
|
|
|
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.emit(result)
|
2025-12-11 12:47:30 -08:00
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
|
log(f"Error: Storage backend '{store_name}' not configured")
|
|
|
|
|
return 1
|
2025-12-01 01:10:16 -08:00
|
|
|
except Exception as exc:
|
2025-12-11 12:47:30 -08:00
|
|
|
log(f"Error deleting URL: {exc}", file=sys.stderr)
|
|
|
|
|
return 1
|
2025-12-06 00:10:19 -08:00
|
|
|
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
CMDLET = Delete_Url()
|