2025-11-25 20:09:33 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-01-19 03:14:30 -08:00
|
|
|
from typing import Any, Dict, List, Sequence, Tuple
|
2025-12-01 01:10:16 -08:00
|
|
|
import sys
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-29 23:28:15 -08:00
|
|
|
from SYS import pipeline as ctx
|
2025-12-16 23:23:43 -08:00
|
|
|
from . import _shared as sh
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import log
|
|
|
|
|
from Store import Store
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
class Add_Url(sh.Cmdlet):
|
2025-12-11 12:47:30 -08:00
|
|
|
"""Add URL associations to files via hash+store."""
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
super().__init__(
|
|
|
|
|
name="add-url",
|
|
|
|
|
summary="Associate a URL with a file",
|
|
|
|
|
usage="@1 | add-url <url>",
|
|
|
|
|
arg=[
|
2025-12-20 02:12:45 -08:00
|
|
|
sh.SharedArgs.QUERY,
|
2025-12-16 23:23:43 -08:00
|
|
|
sh.SharedArgs.STORE,
|
2025-12-29 18:42:02 -08:00
|
|
|
sh.CmdletArg("url",
|
|
|
|
|
required=True,
|
|
|
|
|
description="URL to associate"),
|
2025-12-14 00:53:52 -08:00
|
|
|
],
|
|
|
|
|
detail=[
|
|
|
|
|
"- Associates URL with file identified by hash+store",
|
|
|
|
|
"- Multiple url can be comma-separated",
|
|
|
|
|
],
|
|
|
|
|
exec=self.run,
|
|
|
|
|
)
|
|
|
|
|
self.register()
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Add URL to file via hash+store backend."""
|
2025-12-16 23:23:43 -08:00
|
|
|
parsed = sh.parse_cmdlet_args(args, self)
|
2025-12-20 02:12:45 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
# Compatibility/piping fix:
|
|
|
|
|
# `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
|
|
|
|
|
# (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
|
|
|
|
|
# If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
|
|
|
|
|
try:
|
|
|
|
|
if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
|
|
|
|
|
q = str(parsed.get("query") or "").strip()
|
|
|
|
|
if q.startswith(("http://", "https://")):
|
|
|
|
|
parsed["url"] = q
|
|
|
|
|
parsed.pop("query", None)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2026-03-25 22:39:30 -07:00
|
|
|
query_hash, query_valid = sh.require_single_hash_query(
|
|
|
|
|
parsed.get("query"),
|
|
|
|
|
"Error: -query must be of the form hash:<sha256>",
|
|
|
|
|
)
|
|
|
|
|
if not query_valid:
|
2025-12-20 02:12:45 -08:00
|
|
|
return 1
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
|
2025-12-29 17:05:03 -08:00
|
|
|
results: List[Any] = (
|
2025-12-29 18:42:02 -08:00
|
|
|
result if isinstance(result,
|
|
|
|
|
list) else ([result] if result is not None else [])
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
if query_hash and len(results) > 1:
|
|
|
|
|
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
|
|
|
|
|
return 1
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Extract hash and store from result or args
|
2025-12-29 18:42:02 -08:00
|
|
|
file_hash = query_hash or (
|
|
|
|
|
sh.get_field(result,
|
|
|
|
|
"hash") if result is not None else None
|
|
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
store_name = parsed.get("store") or (
|
2025-12-29 18:42:02 -08:00
|
|
|
sh.get_field(result,
|
|
|
|
|
"store") if result is not None else None
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-11 12:47:30 -08:00
|
|
|
url_arg = parsed.get("url")
|
2026-01-12 04:05:52 -08:00
|
|
|
if not url_arg:
|
|
|
|
|
try:
|
|
|
|
|
inferred = sh.extract_url_from_result(result)
|
|
|
|
|
if inferred:
|
|
|
|
|
candidate = inferred[0]
|
|
|
|
|
if isinstance(candidate, str) and candidate.strip():
|
|
|
|
|
url_arg = candidate.strip()
|
|
|
|
|
parsed["url"] = url_arg
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# If we have multiple piped items, we will resolve hash/store per item below.
|
|
|
|
|
if not results:
|
|
|
|
|
if not file_hash:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
'Error: No file hash provided (pipe an item or use -query "hash:<sha256>")'
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
return 1
|
|
|
|
|
if not store_name:
|
|
|
|
|
log("Error: No store name provided")
|
|
|
|
|
return 1
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
if not url_arg:
|
|
|
|
|
log("Error: No URL provided")
|
|
|
|
|
return 1
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Normalize hash (single-item mode)
|
|
|
|
|
if not results and file_hash:
|
|
|
|
|
file_hash = sh.normalize_hash(file_hash)
|
|
|
|
|
if not file_hash:
|
|
|
|
|
log("Error: Invalid hash format")
|
|
|
|
|
return 1
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Parse url (comma-separated)
|
2025-12-29 17:05:03 -08:00
|
|
|
urls = [u.strip() for u in str(url_arg).split(",") if u.strip()]
|
2025-12-11 19:04:02 -08:00
|
|
|
if not urls:
|
2025-12-11 12:47:30 -08:00
|
|
|
log("Error: No valid url provided")
|
|
|
|
|
return 1
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Get backend and add url
|
2025-12-01 01:10:16 -08:00
|
|
|
try:
|
2025-12-11 19:04:02 -08:00
|
|
|
storage = Store(config)
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Build batches per store.
|
|
|
|
|
store_override = parsed.get("store")
|
|
|
|
|
|
|
|
|
|
if results:
|
2026-03-25 22:39:30 -07:00
|
|
|
def _warn(message: str) -> None:
|
|
|
|
|
ctx.print_if_visible(f"[add-url] Warning: {message}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
batch, pass_through = sh.collect_store_hash_value_batch(
|
|
|
|
|
results,
|
|
|
|
|
store_registry=storage,
|
|
|
|
|
value_resolver=lambda _item: list(urls),
|
|
|
|
|
override_hash=query_hash,
|
|
|
|
|
override_store=store_override,
|
|
|
|
|
on_warning=_warn,
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
# Execute per-store batches.
|
2026-03-25 22:39:30 -07:00
|
|
|
storage, batch_stats = sh.run_store_hash_value_batches(
|
|
|
|
|
config,
|
|
|
|
|
batch,
|
|
|
|
|
bulk_method_name="add_url_bulk",
|
|
|
|
|
single_method_name="add_url",
|
|
|
|
|
store_registry=storage,
|
|
|
|
|
)
|
|
|
|
|
for store_text, item_count, _value_count in batch_stats:
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.print_if_visible(
|
2026-03-25 22:39:30 -07:00
|
|
|
f"✓ add-url: {len(urls)} url(s) for {item_count} item(s) in '{store_text}'",
|
2025-12-20 23:57:44 -08:00
|
|
|
file=sys.stderr,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Pass items through unchanged (but update url field for convenience).
|
|
|
|
|
for item in pass_through:
|
|
|
|
|
existing = sh.get_field(item, "url")
|
2026-03-25 22:39:30 -07:00
|
|
|
merged = sh.merge_urls(existing, list(urls))
|
|
|
|
|
sh.set_item_urls(item, merged)
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.emit(item)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Single-item mode
|
2026-03-25 22:39:30 -07:00
|
|
|
backend, storage, exc = sh.get_store_backend(
|
|
|
|
|
config,
|
|
|
|
|
str(store_name),
|
|
|
|
|
store_registry=storage,
|
|
|
|
|
)
|
|
|
|
|
if backend is None:
|
|
|
|
|
log(f"Error: Storage backend '{store_name}' not configured")
|
|
|
|
|
return 1
|
2025-12-20 23:57:44 -08:00
|
|
|
backend.add_url(str(file_hash), urls, config=config)
|
2025-12-29 18:42:02 -08:00
|
|
|
ctx.print_if_visible(
|
|
|
|
|
f"✓ add-url: {len(urls)} url(s) added",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
if result is not None:
|
|
|
|
|
existing = sh.get_field(result, "url")
|
2026-03-25 22:39:30 -07:00
|
|
|
merged = sh.merge_urls(existing, list(urls))
|
|
|
|
|
sh.set_item_urls(result, merged)
|
2025-12-20 23:57:44 -08:00
|
|
|
ctx.emit(result)
|
2025-12-11 12:47:30 -08:00
|
|
|
return 0
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-01 01:10:16 -08:00
|
|
|
except Exception as exc:
|
2025-12-11 12:47:30 -08:00
|
|
|
log(f"Error adding URL: {exc}", file=sys.stderr)
|
|
|
|
|
return 1
|
2025-12-01 01:10:16 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
CMDLET = Add_Url()
|