fdf
This commit is contained in:
@@ -11,14 +11,18 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
from collections.abc import Iterable as IterableABC
|
||||
from functools import lru_cache
|
||||
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
from SYS import models
|
||||
from SYS import pipeline as pipeline_context
|
||||
from SYS.item_accessors import get_field as _item_accessor_get_field
|
||||
from SYS.payload_builders import build_file_result_payload, build_table_result_payload
|
||||
from SYS.result_publication import publish_result_table
|
||||
from SYS.result_table import Table
|
||||
from SYS.rich_display import stderr_console as get_stderr_console
|
||||
from rich.prompt import Confirm
|
||||
@@ -944,6 +948,18 @@ def build_pipeline_preview(raw_urls: Sequence[str], piped_items: Sequence[Any])
|
||||
return preview
|
||||
|
||||
|
||||
@lru_cache(maxsize=4096)
|
||||
def _normalize_hash_cached(hash_hex: str) -> Optional[str]:
|
||||
text = hash_hex.strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
if len(text) != 64:
|
||||
return None
|
||||
if not all(ch in "0123456789abcdef" for ch in text):
|
||||
return None
|
||||
return text
|
||||
|
||||
|
||||
def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
|
||||
"""Normalize a hash string to lowercase, or return None if invalid.
|
||||
|
||||
@@ -955,14 +971,7 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
if not isinstance(hash_hex, str):
|
||||
return None
|
||||
text = hash_hex.strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
if len(text) != 64:
|
||||
return None
|
||||
if not all(ch in "0123456789abcdef" for ch in text):
|
||||
return None
|
||||
return text
|
||||
return _normalize_hash_cached(hash_hex)
|
||||
|
||||
|
||||
def resolve_hash_for_cmdlet(
|
||||
@@ -1007,6 +1016,270 @@ def resolve_hash_for_cmdlet(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_item_store_hash(
|
||||
item: Any,
|
||||
*,
|
||||
override_store: Optional[str] = None,
|
||||
override_hash: Optional[str] = None,
|
||||
hash_field: str = "hash",
|
||||
store_field: str = "store",
|
||||
path_fields: Sequence[str] = ("path", "target"),
|
||||
) -> Tuple[str, Optional[str]]:
|
||||
"""Resolve store name and normalized hash from a result item."""
|
||||
store_name = str(override_store or get_field(item, store_field) or "").strip()
|
||||
raw_hash = get_field(item, hash_field)
|
||||
|
||||
raw_path = None
|
||||
for field_name in path_fields:
|
||||
candidate = get_field(item, field_name)
|
||||
if candidate:
|
||||
raw_path = candidate
|
||||
break
|
||||
|
||||
resolved_hash = resolve_hash_for_cmdlet(
|
||||
str(raw_hash) if raw_hash else None,
|
||||
str(raw_path) if raw_path else None,
|
||||
str(override_hash) if override_hash else None,
|
||||
)
|
||||
return store_name, resolved_hash
|
||||
|
||||
|
||||
def get_store_backend(
|
||||
config: Optional[Dict[str, Any]],
|
||||
store_name: Optional[str],
|
||||
*,
|
||||
store_registry: Any = None,
|
||||
suppress_debug: bool = False,
|
||||
) -> Tuple[Optional[Any], Any, Optional[Exception]]:
|
||||
"""Resolve a store backend, optionally reusing an existing registry.
|
||||
|
||||
Returns a tuple of ``(backend, store_registry, exc)`` so callers can keep
|
||||
their command-specific error messages while avoiding repeated registry setup
|
||||
and ``store[name]`` boilerplate.
|
||||
"""
|
||||
registry = store_registry
|
||||
if registry is None:
|
||||
try:
|
||||
from Store import Store
|
||||
|
||||
registry = Store(config or {}, suppress_debug=suppress_debug)
|
||||
except Exception as exc:
|
||||
return None, None, exc
|
||||
|
||||
backend_name = str(store_name or "").strip()
|
||||
if not backend_name:
|
||||
return None, registry, KeyError("Missing store name")
|
||||
|
||||
try:
|
||||
return registry[backend_name], registry, None
|
||||
except Exception as exc:
|
||||
return None, registry, exc
|
||||
|
||||
|
||||
def get_preferred_store_backend(
|
||||
config: Optional[Dict[str, Any]],
|
||||
store_name: Optional[str],
|
||||
*,
|
||||
store_registry: Any = None,
|
||||
suppress_debug: bool = True,
|
||||
) -> Tuple[Optional[Any], Any, Optional[Exception]]:
|
||||
"""Prefer a targeted backend instance before falling back to registry lookup."""
|
||||
direct_exc: Optional[Exception] = None
|
||||
try:
|
||||
from Store.registry import get_backend_instance
|
||||
|
||||
backend = get_backend_instance(
|
||||
config or {},
|
||||
str(store_name or ""),
|
||||
suppress_debug=suppress_debug,
|
||||
)
|
||||
if backend is not None:
|
||||
return backend, store_registry, None
|
||||
except Exception as exc:
|
||||
direct_exc = exc
|
||||
|
||||
backend, registry, lookup_exc = get_store_backend(
|
||||
config,
|
||||
store_name,
|
||||
store_registry=store_registry,
|
||||
suppress_debug=suppress_debug,
|
||||
)
|
||||
if backend is not None:
|
||||
return backend, registry, None
|
||||
return None, registry, direct_exc or lookup_exc
|
||||
|
||||
|
||||
def coalesce_hash_value_pairs(
|
||||
pairs: Sequence[Tuple[str, Sequence[str]]],
|
||||
) -> List[Tuple[str, List[str]]]:
|
||||
"""Merge duplicate hash/value pairs while preserving first-seen value order."""
|
||||
merged: Dict[str, List[str]] = {}
|
||||
for hash_value, values in pairs:
|
||||
normalized_hash = str(hash_value or "").strip()
|
||||
if not normalized_hash:
|
||||
continue
|
||||
bucket = merged.setdefault(normalized_hash, [])
|
||||
seen = set(bucket)
|
||||
for value in values or []:
|
||||
text = str(value or "").strip()
|
||||
if not text or text in seen:
|
||||
continue
|
||||
seen.add(text)
|
||||
bucket.append(text)
|
||||
return [(hash_value, items) for hash_value, items in merged.items() if items]
|
||||
|
||||
|
||||
def run_store_hash_value_batches(
|
||||
config: Optional[Dict[str, Any]],
|
||||
batch: Dict[str, List[Tuple[str, Sequence[str]]]],
|
||||
*,
|
||||
bulk_method_name: str,
|
||||
single_method_name: str,
|
||||
store_registry: Any = None,
|
||||
suppress_debug: bool = False,
|
||||
pass_config_to_bulk: bool = True,
|
||||
pass_config_to_single: bool = True,
|
||||
) -> Tuple[Any, List[Tuple[str, int, int]]]:
|
||||
"""Dispatch grouped hash/value batches across stores.
|
||||
|
||||
Returns ``(store_registry, stats)`` where ``stats`` contains
|
||||
``(store_name, item_count, value_count)`` for each dispatched store.
|
||||
Missing stores are skipped so callers can preserve existing warning behavior.
|
||||
"""
|
||||
registry = store_registry
|
||||
stats: List[Tuple[str, int, int]] = []
|
||||
for store_name, pairs in batch.items():
|
||||
backend, registry, _exc = get_store_backend(
|
||||
config,
|
||||
store_name,
|
||||
store_registry=registry,
|
||||
suppress_debug=suppress_debug,
|
||||
)
|
||||
if backend is None:
|
||||
continue
|
||||
|
||||
bulk_pairs = coalesce_hash_value_pairs(pairs)
|
||||
if not bulk_pairs:
|
||||
continue
|
||||
|
||||
bulk_fn = getattr(backend, bulk_method_name, None)
|
||||
if callable(bulk_fn):
|
||||
if pass_config_to_bulk:
|
||||
bulk_fn(bulk_pairs, config=config)
|
||||
else:
|
||||
bulk_fn(bulk_pairs)
|
||||
else:
|
||||
single_fn = getattr(backend, single_method_name)
|
||||
for hash_value, values in bulk_pairs:
|
||||
if pass_config_to_single:
|
||||
single_fn(hash_value, values, config=config)
|
||||
else:
|
||||
single_fn(hash_value, values)
|
||||
|
||||
stats.append(
|
||||
(
|
||||
store_name,
|
||||
len(bulk_pairs),
|
||||
sum(len(values or []) for _hash_value, values in bulk_pairs),
|
||||
)
|
||||
)
|
||||
|
||||
return registry, stats
|
||||
|
||||
|
||||
def run_store_note_batches(
|
||||
config: Optional[Dict[str, Any]],
|
||||
batch: Dict[str, List[Tuple[str, str, str]]],
|
||||
*,
|
||||
store_registry: Any = None,
|
||||
suppress_debug: bool = False,
|
||||
on_store_error: Optional[Callable[[str, Exception], None]] = None,
|
||||
on_unsupported_store: Optional[Callable[[str], None]] = None,
|
||||
on_item_error: Optional[Callable[[str, str, str, Exception], None]] = None,
|
||||
) -> Tuple[Any, int]:
|
||||
"""Dispatch grouped note writes across stores while preserving item-level errors."""
|
||||
registry = store_registry
|
||||
success_count = 0
|
||||
for store_name, items in batch.items():
|
||||
backend, registry, exc = get_store_backend(
|
||||
config,
|
||||
store_name,
|
||||
store_registry=registry,
|
||||
suppress_debug=suppress_debug,
|
||||
)
|
||||
if backend is None:
|
||||
if on_store_error is not None and exc is not None:
|
||||
on_store_error(store_name, exc)
|
||||
continue
|
||||
if not hasattr(backend, "set_note"):
|
||||
if on_unsupported_store is not None:
|
||||
on_unsupported_store(store_name)
|
||||
continue
|
||||
|
||||
for hash_value, note_name, note_text in items:
|
||||
try:
|
||||
if backend.set_note(hash_value, note_name, note_text, config=config):
|
||||
success_count += 1
|
||||
except Exception as item_exc:
|
||||
if on_item_error is not None:
|
||||
on_item_error(store_name, hash_value, note_name, item_exc)
|
||||
|
||||
return registry, success_count
|
||||
|
||||
|
||||
def collect_store_hash_value_batch(
|
||||
items: Sequence[Any],
|
||||
*,
|
||||
store_registry: Any,
|
||||
value_resolver: Callable[[Any], Optional[Sequence[str]]],
|
||||
override_hash: Optional[str] = None,
|
||||
override_store: Optional[str] = None,
|
||||
on_warning: Optional[Callable[[str], None]] = None,
|
||||
) -> Tuple[Dict[str, List[Tuple[str, List[str]]]], List[Any]]:
|
||||
"""Collect validated store/hash/value batches while preserving passthrough items."""
|
||||
batch: Dict[str, List[Tuple[str, List[str]]]] = {}
|
||||
pass_through: List[Any] = []
|
||||
|
||||
for item in items:
|
||||
pass_through.append(item)
|
||||
|
||||
raw_hash = override_hash or get_field(item, "hash")
|
||||
raw_store = override_store or get_field(item, "store")
|
||||
if not raw_hash or not raw_store:
|
||||
if on_warning is not None:
|
||||
on_warning("Item missing hash/store; skipping")
|
||||
continue
|
||||
|
||||
normalized = normalize_hash(raw_hash)
|
||||
if not normalized:
|
||||
if on_warning is not None:
|
||||
on_warning("Item has invalid hash; skipping")
|
||||
continue
|
||||
|
||||
store_text = str(raw_store).strip()
|
||||
if not store_text:
|
||||
if on_warning is not None:
|
||||
on_warning("Item has empty store; skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
is_available = bool(store_registry.is_available(store_text))
|
||||
except Exception:
|
||||
is_available = False
|
||||
if not is_available:
|
||||
if on_warning is not None:
|
||||
on_warning(f"Store '{store_text}' not configured; skipping")
|
||||
continue
|
||||
|
||||
values = [str(value).strip() for value in (value_resolver(item) or []) if str(value).strip()]
|
||||
if not values:
|
||||
continue
|
||||
|
||||
batch.setdefault(store_text, []).append((normalized, values))
|
||||
|
||||
return batch, pass_through
|
||||
|
||||
|
||||
def parse_hash_query(query: Optional[str]) -> List[str]:
|
||||
"""Parse a unified query string for `hash:` into normalized SHA256 hashes.
|
||||
|
||||
@@ -1054,6 +1327,36 @@ def parse_single_hash_query(query: Optional[str]) -> Optional[str]:
|
||||
return hashes[0]
|
||||
|
||||
|
||||
def require_hash_query(
|
||||
query: Optional[str],
|
||||
error_message: str,
|
||||
*,
|
||||
log_file: Any = None,
|
||||
) -> Tuple[List[str], bool]:
|
||||
"""Parse a multi-hash query and log a caller-provided error on invalid input."""
|
||||
hashes = parse_hash_query(query)
|
||||
if query and not hashes:
|
||||
kwargs = {"file": log_file} if log_file is not None else {}
|
||||
log(error_message, **kwargs)
|
||||
return [], False
|
||||
return hashes, True
|
||||
|
||||
|
||||
def require_single_hash_query(
|
||||
query: Optional[str],
|
||||
error_message: str,
|
||||
*,
|
||||
log_file: Any = None,
|
||||
) -> Tuple[Optional[str], bool]:
|
||||
"""Parse a single-hash query and log a caller-provided error on invalid input."""
|
||||
query_hash = parse_single_hash_query(query)
|
||||
if query and not query_hash:
|
||||
kwargs = {"file": log_file} if log_file is not None else {}
|
||||
log(error_message, **kwargs)
|
||||
return None, False
|
||||
return query_hash, True
|
||||
|
||||
|
||||
def get_hash_for_operation(
|
||||
override_hash: Optional[str],
|
||||
result: Any,
|
||||
@@ -1180,26 +1483,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
||||
get_field(result, "hash") # From dict or object
|
||||
get_field(result, "table", "unknown") # With default
|
||||
"""
|
||||
# Handle lists by accessing the first element
|
||||
if isinstance(obj, list):
|
||||
if not obj:
|
||||
return default
|
||||
obj = obj[0]
|
||||
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(field, default)
|
||||
else:
|
||||
# Try direct attribute access first
|
||||
value = getattr(obj, field, None)
|
||||
if value is not None:
|
||||
return value
|
||||
|
||||
# For PipeObjects, also check the extra field
|
||||
extra_val = getattr(obj, "extra", None)
|
||||
if isinstance(extra_val, dict):
|
||||
return extra_val.get(field, default)
|
||||
|
||||
return default
|
||||
return _item_accessor_get_field(obj, field, default)
|
||||
|
||||
|
||||
def should_show_help(args: Sequence[str]) -> bool:
|
||||
@@ -1636,33 +1920,22 @@ def create_pipe_object_result(
|
||||
Returns:
|
||||
Dict with all PipeObject fields for emission
|
||||
"""
|
||||
result: Dict[str, Any] = {
|
||||
"source": source,
|
||||
"id": identifier,
|
||||
"path": file_path,
|
||||
"action": f"cmdlet:{cmdlet_name}", # Format: cmdlet:cmdlet_name
|
||||
}
|
||||
result = build_file_result_payload(
|
||||
title=title,
|
||||
path=file_path,
|
||||
hash_value=hash_value,
|
||||
store=source,
|
||||
tag=tag,
|
||||
source=source,
|
||||
id=identifier,
|
||||
action=f"cmdlet:{cmdlet_name}",
|
||||
**extra,
|
||||
)
|
||||
|
||||
if title:
|
||||
result["title"] = title
|
||||
if hash_value:
|
||||
result["hash"] = hash_value
|
||||
if is_temp:
|
||||
result["is_temp"] = True
|
||||
if parent_hash:
|
||||
result["parent_hash"] = parent_hash
|
||||
if tag:
|
||||
result["tag"] = tag
|
||||
|
||||
# Canonical store field: use source for compatibility
|
||||
try:
|
||||
if source:
|
||||
result["store"] = source
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Add any extra fields
|
||||
result.update(extra)
|
||||
|
||||
return result
|
||||
|
||||
@@ -2153,6 +2426,32 @@ def normalize_result_input(result: Any) -> List[Dict[str, Any]]:
|
||||
return []
|
||||
|
||||
|
||||
def normalize_result_items(
|
||||
result: Any,
|
||||
*,
|
||||
include_falsey_single: bool = False,
|
||||
) -> List[Any]:
|
||||
"""Normalize piped input to a raw item list without converting item types."""
|
||||
if isinstance(result, list):
|
||||
return list(result)
|
||||
if result is None:
|
||||
return []
|
||||
if include_falsey_single or result:
|
||||
return [result]
|
||||
return []
|
||||
|
||||
|
||||
def value_has_content(value: Any) -> bool:
|
||||
"""Return True when a value should be treated as present for payload building."""
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, str):
|
||||
return bool(value.strip())
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
return len(value) > 0
|
||||
return True
|
||||
|
||||
|
||||
def filter_results_by_temp(results: List[Any], include_temp: bool = False) -> List[Any]:
|
||||
"""Filter results by temporary status.
|
||||
|
||||
@@ -2380,6 +2679,46 @@ def extract_url_from_result(result: Any) -> list[str]:
|
||||
return normalize_urls(url)
|
||||
|
||||
|
||||
def merge_urls(existing: Any, incoming: Sequence[Any]) -> list[str]:
|
||||
"""Merge URL values into a normalized, de-duplicated list."""
|
||||
from SYS.metadata import normalize_urls
|
||||
|
||||
merged: list[str] = []
|
||||
for value in normalize_urls(existing):
|
||||
if value not in merged:
|
||||
merged.append(value)
|
||||
for value in normalize_urls(list(incoming or [])):
|
||||
if value not in merged:
|
||||
merged.append(value)
|
||||
return merged
|
||||
|
||||
|
||||
def remove_urls(existing: Any, remove: Sequence[Any]) -> list[str]:
|
||||
"""Remove URL values from an existing URL field and return survivors."""
|
||||
from SYS.metadata import normalize_urls
|
||||
|
||||
current = normalize_urls(existing)
|
||||
remove_set = {value for value in normalize_urls(list(remove or [])) if value}
|
||||
if not remove_set:
|
||||
return current
|
||||
return [value for value in current if value not in remove_set]
|
||||
|
||||
|
||||
def set_item_urls(item: Any, urls: Sequence[Any]) -> None:
|
||||
"""Persist normalized URL values back onto a dict/object result item."""
|
||||
normalized = merge_urls([], list(urls or []))
|
||||
payload: Any = normalized[0] if len(normalized) == 1 else list(normalized)
|
||||
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
item["url"] = payload
|
||||
return
|
||||
if hasattr(item, "url"):
|
||||
setattr(item, "url", payload)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
def extract_relationships(result: Any) -> Optional[Dict[str, Any]]:
|
||||
if isinstance(result, models.PipeObject):
|
||||
relationships = result.get_relationships()
|
||||
@@ -3270,14 +3609,9 @@ def check_url_exists_in_storage(
|
||||
ext = extracted.get("ext") if isinstance(extracted, dict) else ""
|
||||
size_val = extracted.get("size") if isinstance(extracted, dict) else None
|
||||
|
||||
return {
|
||||
"title": str(title),
|
||||
"store": str(get_field(hit, "store") or backend_name),
|
||||
"hash": str(file_hash or ""),
|
||||
"ext": str(ext or ""),
|
||||
"size": size_val,
|
||||
"url": original_url,
|
||||
"columns": [
|
||||
return build_table_result_payload(
|
||||
title=str(title),
|
||||
columns=[
|
||||
("Title", str(title)),
|
||||
("Store", str(get_field(hit, "store") or backend_name)),
|
||||
("Hash", str(file_hash or "")),
|
||||
@@ -3285,7 +3619,12 @@ def check_url_exists_in_storage(
|
||||
("Size", size_val),
|
||||
("URL", original_url),
|
||||
],
|
||||
}
|
||||
store=str(get_field(hit, "store") or backend_name),
|
||||
hash=str(file_hash or ""),
|
||||
ext=str(ext or ""),
|
||||
size=size_val,
|
||||
url=original_url,
|
||||
)
|
||||
|
||||
def _search_backend_url_hits(
|
||||
backend: Any,
|
||||
@@ -3443,18 +3782,18 @@ def check_url_exists_in_storage(
|
||||
|
||||
seen_pairs.add((original_url, str(backend_name)))
|
||||
matched_urls.add(original_url)
|
||||
display_row = {
|
||||
"title": "(exists)",
|
||||
"store": str(backend_name),
|
||||
"hash": found_hash or "",
|
||||
"url": original_url,
|
||||
"columns": [
|
||||
display_row = build_table_result_payload(
|
||||
title="(exists)",
|
||||
columns=[
|
||||
("Title", "(exists)"),
|
||||
("Store", str(backend_name)),
|
||||
("Hash", found_hash or ""),
|
||||
("URL", original_url),
|
||||
],
|
||||
}
|
||||
store=str(backend_name),
|
||||
hash=found_hash or "",
|
||||
url=original_url,
|
||||
)
|
||||
match_rows.append(display_row)
|
||||
continue
|
||||
|
||||
@@ -3700,11 +4039,7 @@ def display_and_persist_items(
|
||||
|
||||
setattr(table, "_rendered_by_cmdlet", True)
|
||||
|
||||
# Use provided subject or default to first item
|
||||
if subject is None:
|
||||
subject = items[0] if len(items) == 1 else list(items)
|
||||
|
||||
# Persist table for @N selection across command boundaries
|
||||
pipeline_context.set_last_result_table(table, list(items), subject=subject)
|
||||
publish_result_table(pipeline_context, table, items, subject=subject)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user