This commit is contained in:
2026-03-25 22:39:30 -07:00
parent c31402c8f1
commit 562acd809c
46 changed files with 2367 additions and 1868 deletions

View File

@@ -11,14 +11,18 @@ import sys
import tempfile
import time
from collections.abc import Iterable as IterableABC
from functools import lru_cache
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from SYS.logger import log, debug
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
from dataclasses import dataclass, field
from SYS import models
from SYS import pipeline as pipeline_context
from SYS.item_accessors import get_field as _item_accessor_get_field
from SYS.payload_builders import build_file_result_payload, build_table_result_payload
from SYS.result_publication import publish_result_table
from SYS.result_table import Table
from SYS.rich_display import stderr_console as get_stderr_console
from rich.prompt import Confirm
@@ -944,6 +948,18 @@ def build_pipeline_preview(raw_urls: Sequence[str], piped_items: Sequence[Any])
return preview
@lru_cache(maxsize=4096)
def _normalize_hash_cached(hash_hex: str) -> Optional[str]:
text = hash_hex.strip().lower()
if not text:
return None
if len(text) != 64:
return None
if not all(ch in "0123456789abcdef" for ch in text):
return None
return text
def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
"""Normalize a hash string to lowercase, or return None if invalid.
@@ -955,14 +971,7 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
"""
if not isinstance(hash_hex, str):
return None
text = hash_hex.strip().lower()
if not text:
return None
if len(text) != 64:
return None
if not all(ch in "0123456789abcdef" for ch in text):
return None
return text
return _normalize_hash_cached(hash_hex)
def resolve_hash_for_cmdlet(
@@ -1007,6 +1016,270 @@ def resolve_hash_for_cmdlet(
return None
def resolve_item_store_hash(
item: Any,
*,
override_store: Optional[str] = None,
override_hash: Optional[str] = None,
hash_field: str = "hash",
store_field: str = "store",
path_fields: Sequence[str] = ("path", "target"),
) -> Tuple[str, Optional[str]]:
"""Resolve store name and normalized hash from a result item."""
store_name = str(override_store or get_field(item, store_field) or "").strip()
raw_hash = get_field(item, hash_field)
raw_path = None
for field_name in path_fields:
candidate = get_field(item, field_name)
if candidate:
raw_path = candidate
break
resolved_hash = resolve_hash_for_cmdlet(
str(raw_hash) if raw_hash else None,
str(raw_path) if raw_path else None,
str(override_hash) if override_hash else None,
)
return store_name, resolved_hash
def get_store_backend(
config: Optional[Dict[str, Any]],
store_name: Optional[str],
*,
store_registry: Any = None,
suppress_debug: bool = False,
) -> Tuple[Optional[Any], Any, Optional[Exception]]:
"""Resolve a store backend, optionally reusing an existing registry.
Returns a tuple of ``(backend, store_registry, exc)`` so callers can keep
their command-specific error messages while avoiding repeated registry setup
and ``store[name]`` boilerplate.
"""
registry = store_registry
if registry is None:
try:
from Store import Store
registry = Store(config or {}, suppress_debug=suppress_debug)
except Exception as exc:
return None, None, exc
backend_name = str(store_name or "").strip()
if not backend_name:
return None, registry, KeyError("Missing store name")
try:
return registry[backend_name], registry, None
except Exception as exc:
return None, registry, exc
def get_preferred_store_backend(
config: Optional[Dict[str, Any]],
store_name: Optional[str],
*,
store_registry: Any = None,
suppress_debug: bool = True,
) -> Tuple[Optional[Any], Any, Optional[Exception]]:
"""Prefer a targeted backend instance before falling back to registry lookup."""
direct_exc: Optional[Exception] = None
try:
from Store.registry import get_backend_instance
backend = get_backend_instance(
config or {},
str(store_name or ""),
suppress_debug=suppress_debug,
)
if backend is not None:
return backend, store_registry, None
except Exception as exc:
direct_exc = exc
backend, registry, lookup_exc = get_store_backend(
config,
store_name,
store_registry=store_registry,
suppress_debug=suppress_debug,
)
if backend is not None:
return backend, registry, None
return None, registry, direct_exc or lookup_exc
def coalesce_hash_value_pairs(
pairs: Sequence[Tuple[str, Sequence[str]]],
) -> List[Tuple[str, List[str]]]:
"""Merge duplicate hash/value pairs while preserving first-seen value order."""
merged: Dict[str, List[str]] = {}
for hash_value, values in pairs:
normalized_hash = str(hash_value or "").strip()
if not normalized_hash:
continue
bucket = merged.setdefault(normalized_hash, [])
seen = set(bucket)
for value in values or []:
text = str(value or "").strip()
if not text or text in seen:
continue
seen.add(text)
bucket.append(text)
return [(hash_value, items) for hash_value, items in merged.items() if items]
def run_store_hash_value_batches(
config: Optional[Dict[str, Any]],
batch: Dict[str, List[Tuple[str, Sequence[str]]]],
*,
bulk_method_name: str,
single_method_name: str,
store_registry: Any = None,
suppress_debug: bool = False,
pass_config_to_bulk: bool = True,
pass_config_to_single: bool = True,
) -> Tuple[Any, List[Tuple[str, int, int]]]:
"""Dispatch grouped hash/value batches across stores.
Returns ``(store_registry, stats)`` where ``stats`` contains
``(store_name, item_count, value_count)`` for each dispatched store.
Missing stores are skipped so callers can preserve existing warning behavior.
"""
registry = store_registry
stats: List[Tuple[str, int, int]] = []
for store_name, pairs in batch.items():
backend, registry, _exc = get_store_backend(
config,
store_name,
store_registry=registry,
suppress_debug=suppress_debug,
)
if backend is None:
continue
bulk_pairs = coalesce_hash_value_pairs(pairs)
if not bulk_pairs:
continue
bulk_fn = getattr(backend, bulk_method_name, None)
if callable(bulk_fn):
if pass_config_to_bulk:
bulk_fn(bulk_pairs, config=config)
else:
bulk_fn(bulk_pairs)
else:
single_fn = getattr(backend, single_method_name)
for hash_value, values in bulk_pairs:
if pass_config_to_single:
single_fn(hash_value, values, config=config)
else:
single_fn(hash_value, values)
stats.append(
(
store_name,
len(bulk_pairs),
sum(len(values or []) for _hash_value, values in bulk_pairs),
)
)
return registry, stats
def run_store_note_batches(
config: Optional[Dict[str, Any]],
batch: Dict[str, List[Tuple[str, str, str]]],
*,
store_registry: Any = None,
suppress_debug: bool = False,
on_store_error: Optional[Callable[[str, Exception], None]] = None,
on_unsupported_store: Optional[Callable[[str], None]] = None,
on_item_error: Optional[Callable[[str, str, str, Exception], None]] = None,
) -> Tuple[Any, int]:
"""Dispatch grouped note writes across stores while preserving item-level errors."""
registry = store_registry
success_count = 0
for store_name, items in batch.items():
backend, registry, exc = get_store_backend(
config,
store_name,
store_registry=registry,
suppress_debug=suppress_debug,
)
if backend is None:
if on_store_error is not None and exc is not None:
on_store_error(store_name, exc)
continue
if not hasattr(backend, "set_note"):
if on_unsupported_store is not None:
on_unsupported_store(store_name)
continue
for hash_value, note_name, note_text in items:
try:
if backend.set_note(hash_value, note_name, note_text, config=config):
success_count += 1
except Exception as item_exc:
if on_item_error is not None:
on_item_error(store_name, hash_value, note_name, item_exc)
return registry, success_count
def collect_store_hash_value_batch(
items: Sequence[Any],
*,
store_registry: Any,
value_resolver: Callable[[Any], Optional[Sequence[str]]],
override_hash: Optional[str] = None,
override_store: Optional[str] = None,
on_warning: Optional[Callable[[str], None]] = None,
) -> Tuple[Dict[str, List[Tuple[str, List[str]]]], List[Any]]:
"""Collect validated store/hash/value batches while preserving passthrough items."""
batch: Dict[str, List[Tuple[str, List[str]]]] = {}
pass_through: List[Any] = []
for item in items:
pass_through.append(item)
raw_hash = override_hash or get_field(item, "hash")
raw_store = override_store or get_field(item, "store")
if not raw_hash or not raw_store:
if on_warning is not None:
on_warning("Item missing hash/store; skipping")
continue
normalized = normalize_hash(raw_hash)
if not normalized:
if on_warning is not None:
on_warning("Item has invalid hash; skipping")
continue
store_text = str(raw_store).strip()
if not store_text:
if on_warning is not None:
on_warning("Item has empty store; skipping")
continue
try:
is_available = bool(store_registry.is_available(store_text))
except Exception:
is_available = False
if not is_available:
if on_warning is not None:
on_warning(f"Store '{store_text}' not configured; skipping")
continue
values = [str(value).strip() for value in (value_resolver(item) or []) if str(value).strip()]
if not values:
continue
batch.setdefault(store_text, []).append((normalized, values))
return batch, pass_through
def parse_hash_query(query: Optional[str]) -> List[str]:
"""Parse a unified query string for `hash:` into normalized SHA256 hashes.
@@ -1054,6 +1327,36 @@ def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
return hashes[0]
def require_hash_query(
query: Optional[str],
error_message: str,
*,
log_file: Any = None,
) -> Tuple[List[str], bool]:
"""Parse a multi-hash query and log a caller-provided error on invalid input."""
hashes = parse_hash_query(query)
if query and not hashes:
kwargs = {"file": log_file} if log_file is not None else {}
log(error_message, **kwargs)
return [], False
return hashes, True
def require_single_hash_query(
query: Optional[str],
error_message: str,
*,
log_file: Any = None,
) -> Tuple[Optional[str], bool]:
"""Parse a single-hash query and log a caller-provided error on invalid input."""
query_hash = parse_single_hash_query(query)
if query and not query_hash:
kwargs = {"file": log_file} if log_file is not None else {}
log(error_message, **kwargs)
return None, False
return query_hash, True
def get_hash_for_operation(
override_hash: Optional[str],
result: Any,
@@ -1180,26 +1483,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
get_field(result, "hash") # From dict or object
get_field(result, "table", "unknown") # With default
"""
# Handle lists by accessing the first element
if isinstance(obj, list):
if not obj:
return default
obj = obj[0]
if isinstance(obj, dict):
return obj.get(field, default)
else:
# Try direct attribute access first
value = getattr(obj, field, None)
if value is not None:
return value
# For PipeObjects, also check the extra field
extra_val = getattr(obj, "extra", None)
if isinstance(extra_val, dict):
return extra_val.get(field, default)
return default
return _item_accessor_get_field(obj, field, default)
def should_show_help(args: Sequence[str]) -> bool:
@@ -1636,33 +1920,22 @@ def create_pipe_object_result(
Returns:
Dict with all PipeObject fields for emission
"""
result: Dict[str, Any] = {
"source": source,
"id": identifier,
"path": file_path,
"action": f"cmdlet:{cmdlet_name}", # Format: cmdlet:cmdlet_name
}
result = build_file_result_payload(
title=title,
path=file_path,
hash_value=hash_value,
store=source,
tag=tag,
source=source,
id=identifier,
action=f"cmdlet:{cmdlet_name}",
**extra,
)
if title:
result["title"] = title
if hash_value:
result["hash"] = hash_value
if is_temp:
result["is_temp"] = True
if parent_hash:
result["parent_hash"] = parent_hash
if tag:
result["tag"] = tag
# Canonical store field: use source for compatibility
try:
if source:
result["store"] = source
except Exception:
pass
# Add any extra fields
result.update(extra)
return result
@@ -2153,6 +2426,32 @@ def normalize_result_input(result: Any) -> List[Dict[str, Any]]:
return []
def normalize_result_items(
result: Any,
*,
include_falsey_single: bool = False,
) -> List[Any]:
"""Normalize piped input to a raw item list without converting item types."""
if isinstance(result, list):
return list(result)
if result is None:
return []
if include_falsey_single or result:
return [result]
return []
def value_has_content(value: Any) -> bool:
"""Return True when a value should be treated as present for payload building."""
if value is None:
return False
if isinstance(value, str):
return bool(value.strip())
if isinstance(value, (list, tuple, set)):
return len(value) > 0
return True
def filter_results_by_temp(results: List[Any], include_temp: bool = False) -> List[Any]:
"""Filter results by temporary status.
@@ -2380,6 +2679,46 @@ def extract_url_from_result(result: Any) -> list[str]:
return normalize_urls(url)
def merge_urls(existing: Any, incoming: Sequence[Any]) -> list[str]:
"""Merge URL values into a normalized, de-duplicated list."""
from SYS.metadata import normalize_urls
merged: list[str] = []
for value in normalize_urls(existing):
if value not in merged:
merged.append(value)
for value in normalize_urls(list(incoming or [])):
if value not in merged:
merged.append(value)
return merged
def remove_urls(existing: Any, remove: Sequence[Any]) -> list[str]:
"""Remove URL values from an existing URL field and return survivors."""
from SYS.metadata import normalize_urls
current = normalize_urls(existing)
remove_set = {value for value in normalize_urls(list(remove or [])) if value}
if not remove_set:
return current
return [value for value in current if value not in remove_set]
def set_item_urls(item: Any, urls: Sequence[Any]) -> None:
"""Persist normalized URL values back onto a dict/object result item."""
normalized = merge_urls([], list(urls or []))
payload: Any = normalized[0] if len(normalized) == 1 else list(normalized)
try:
if isinstance(item, dict):
item["url"] = payload
return
if hasattr(item, "url"):
setattr(item, "url", payload)
except Exception:
return
def extract_relationships(result: Any) -> Optional[Dict[str, Any]]:
if isinstance(result, models.PipeObject):
relationships = result.get_relationships()
@@ -3270,14 +3609,9 @@ def check_url_exists_in_storage(
ext = extracted.get("ext") if isinstance(extracted, dict) else ""
size_val = extracted.get("size") if isinstance(extracted, dict) else None
return {
"title": str(title),
"store": str(get_field(hit, "store") or backend_name),
"hash": str(file_hash or ""),
"ext": str(ext or ""),
"size": size_val,
"url": original_url,
"columns": [
return build_table_result_payload(
title=str(title),
columns=[
("Title", str(title)),
("Store", str(get_field(hit, "store") or backend_name)),
("Hash", str(file_hash or "")),
@@ -3285,7 +3619,12 @@ def check_url_exists_in_storage(
("Size", size_val),
("URL", original_url),
],
}
store=str(get_field(hit, "store") or backend_name),
hash=str(file_hash or ""),
ext=str(ext or ""),
size=size_val,
url=original_url,
)
def _search_backend_url_hits(
backend: Any,
@@ -3443,18 +3782,18 @@ def check_url_exists_in_storage(
seen_pairs.add((original_url, str(backend_name)))
matched_urls.add(original_url)
display_row = {
"title": "(exists)",
"store": str(backend_name),
"hash": found_hash or "",
"url": original_url,
"columns": [
display_row = build_table_result_payload(
title="(exists)",
columns=[
("Title", "(exists)"),
("Store", str(backend_name)),
("Hash", found_hash or ""),
("URL", original_url),
],
}
store=str(backend_name),
hash=found_hash or "",
url=original_url,
)
match_rows.append(display_row)
continue
@@ -3700,11 +4039,7 @@ def display_and_persist_items(
setattr(table, "_rendered_by_cmdlet", True)
# Use provided subject or default to first item
if subject is None:
subject = items[0] if len(items) == 1 else list(items)
# Persist table for @N selection across command boundaries
pipeline_context.set_last_result_table(table, list(items), subject=subject)
publish_result_table(pipeline_context, table, items, subject=subject)
except Exception:
pass