This commit is contained in:
2026-02-11 18:16:07 -08:00
parent cc715e1fef
commit 1d0de1118b
27 changed files with 1167 additions and 1075 deletions

37
SYS/field_access.py Normal file
View File

@@ -0,0 +1,37 @@
"""Lightweight helpers for accessing fields on mixed pipeline objects.
This intentionally avoids importing cmdlet modules so it can be used from
providers and core pipeline code without pulling in the full cmdlet stack.
"""
from __future__ import annotations
from typing import Any, Optional
def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
"""Extract a field from either a dict or object with fallback default.
- Supports dict.get(field)
- Supports getattr(obj, field)
- If obj is a list, uses the first element
- If obj has `.extra` dict (PipeObject pattern), also checks extra[field]
"""
if isinstance(obj, list):
if not obj:
return default
obj = obj[0]
if isinstance(obj, dict):
return obj.get(field, default)
value = getattr(obj, field, None)
if value is not None:
return value
extra_val = getattr(obj, "extra", None)
if isinstance(extra_val, dict):
return extra_val.get(field, default)
return default

265
SYS/pipe_object.py Normal file
View File

@@ -0,0 +1,265 @@
from __future__ import annotations
from typing import Any, Dict, Optional
from SYS import models
def coerce_to_pipe_object(
value: Any,
default_path: Optional[str] = None,
) -> models.PipeObject:
"""Normalize any incoming result to a PipeObject for single-source-of-truth state.
Uses hash+store canonical pattern.
"""
# Debug: Print ResultItem details if coming from search_file.py
try:
from SYS.logger import is_debug_enabled, debug
if (
is_debug_enabled()
and hasattr(value, "__class__")
and value.__class__.__name__ == "ResultItem"
):
debug("[ResultItem -> PipeObject conversion]")
debug(f" title={getattr(value, 'title', None)}")
debug(f" target={getattr(value, 'target', None)}")
debug(f" hash={getattr(value, 'hash', None)}")
debug(f" media_kind={getattr(value, 'media_kind', None)}")
debug(f" tag={getattr(value, 'tag', None)}")
debug(f" tag_summary={getattr(value, 'tag_summary', None)}")
debug(f" size_bytes={getattr(value, 'size_bytes', None)}")
debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}")
debug(f" relationships={getattr(value, 'relationships', None)}")
debug(f" url={getattr(value, 'url', None)}")
debug(
f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}"
)
except Exception:
pass
if isinstance(value, models.PipeObject):
return value
known_keys = {
"hash",
"store",
"tag",
"title",
"url",
"source_url",
"duration",
"metadata",
"warnings",
"path",
"relationships",
"is_temp",
"action",
"parent_hash",
}
# Convert common object-like results into a dict so we can preserve fields like
# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
#
# Priority:
# 1) explicit to_dict()
# 2) best-effort attribute extraction for known PipeObject-ish fields
if hasattr(value, "to_dict"):
value = value.to_dict()
elif not isinstance(value, dict):
try:
obj_map: Dict[str, Any] = {}
for k in (
"hash",
"store",
"provider",
"prov",
"tag",
"title",
"url",
"source_url",
"duration",
"duration_seconds",
"metadata",
"full_metadata",
"warnings",
"path",
"target",
"relationships",
"is_temp",
"action",
"parent_hash",
"extra",
"media_kind",
):
if hasattr(value, k):
obj_map[k] = getattr(value, k)
if obj_map:
value = obj_map
except Exception:
pass
if isinstance(value, dict):
# Extract hash and store (canonical identifiers)
hash_val = value.get("hash")
store_val = value.get("store") or "PATH"
if not store_val or store_val == "PATH":
try:
extra_store = value.get("extra", {}).get("store")
except Exception:
extra_store = None
if extra_store:
store_val = extra_store
# If no hash, try to compute from path or use placeholder
if not hash_val:
path_val = value.get("path")
if path_val:
try:
from pathlib import Path
from SYS.utils import sha256_file
hash_val = sha256_file(Path(path_val))
except Exception:
hash_val = "unknown"
else:
hash_val = "unknown"
# Extract title from filename if not provided
title_val = value.get("title")
if not title_val:
path_val = value.get("path")
if path_val:
try:
from pathlib import Path
title_val = Path(path_val).stem
except Exception:
pass
extra = {k: v for k, v in value.items() if k not in known_keys}
# Extract URL: prefer direct url field, then url list
from SYS.metadata import normalize_urls
url_list = normalize_urls(value.get("url"))
url_val = url_list[0] if url_list else None
if len(url_list) > 1:
extra["url"] = url_list
# Extract relationships
rels = value.get("relationships") or {}
# Canonical tag: accept list or single string
tag_val: list[str] = []
if "tag" in value:
raw_tag = value["tag"]
if isinstance(raw_tag, list):
tag_val = [str(t) for t in raw_tag if t is not None]
elif isinstance(raw_tag, str):
tag_val = [raw_tag]
# Consolidate path: prefer explicit path key, but NOT target if it's a URL
path_val = value.get("path")
# Only use target as path if it's not a URL (url should stay in url field)
if not path_val and "target" in value:
target = value["target"]
if target and not (
isinstance(target, str)
and (target.startswith("http://") or target.startswith("https://"))
):
path_val = target
# If the path value is actually a URL, move it to url_val and clear path_val
try:
if isinstance(path_val, str) and (
path_val.startswith("http://") or path_val.startswith("https://")
):
# Prefer existing url_val if present, otherwise move path_val into url_val
if not url_val:
url_val = path_val
path_val = None
except Exception:
pass
# Extract media_kind if available
if "media_kind" in value:
extra["media_kind"] = value["media_kind"]
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=str(
value.get("provider")
or value.get("prov")
or value.get("source")
or extra.get("provider")
or extra.get("source")
or ""
).strip()
or None,
tag=tag_val,
title=title_val,
url=url_val,
source_url=value.get("source_url"),
duration=value.get("duration") or value.get("duration_seconds"),
metadata=value.get("metadata") or value.get("full_metadata") or {},
warnings=list(value.get("warnings") or []),
path=path_val,
relationships=rels,
is_temp=bool(value.get("is_temp", False)),
action=value.get("action"),
parent_hash=value.get("parent_hash"),
extra=extra,
)
return pipe_obj
# Fallback: build from path argument or bare value
hash_val = "unknown"
path_val = default_path or getattr(value, "path", None)
url_val: Optional[str] = None
title_val = None
# If the raw value is a string, treat it as either a URL or a file path.
# This is important for @-selection results that are plain URL strings.
if isinstance(value, str):
s = value.strip()
if s.lower().startswith(("http://", "https://")):
url_val = s
path_val = None
else:
path_val = s
if path_val and path_val != "unknown":
try:
from pathlib import Path
from SYS.utils import sha256_file
path_obj = Path(path_val)
hash_val = sha256_file(path_obj)
# Extract title from filename (without extension)
title_val = path_obj.stem
except Exception:
pass
# When coming from a raw URL string, mark it explicitly as URL.
# Otherwise treat it as a local path.
store_val = "URL" if url_val else "PATH"
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=None,
path=str(path_val) if path_val and path_val != "unknown" else None,
title=title_val,
url=url_val,
source_url=url_val,
tag=[],
extra={},
)
return pipe_obj

View File

@@ -1607,7 +1607,7 @@ class PipelineExecutor:
item = selected_items[0]
url = None
try:
from cmdlet._shared import get_field
from SYS.field_access import get_field
url = get_field(item, "url")
except Exception:
@@ -2043,7 +2043,7 @@ class PipelineExecutor:
return False, None
debug(f"@N: _maybe_run_class_selector returned False, continuing")
from cmdlet._shared import coerce_to_pipe_object
from SYS.pipe_object import coerce_to_pipe_object
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
piped_result = (
@@ -2467,7 +2467,7 @@ class PipelineExecutor:
last_items = None
if last_items:
from cmdlet._shared import coerce_to_pipe_object
from SYS.pipe_object import coerce_to_pipe_object
try:
pipe_items = [
@@ -2706,7 +2706,7 @@ class PipelineExecutor:
if (next_cmd in {"delete-tag",
"delete_tag"} and len(filtered) > 1
and all(_is_tag_row(x) for x in filtered)):
from cmdlet._shared import get_field
from SYS.field_access import get_field
tags: List[str] = []
first_hash = None
@@ -2739,7 +2739,7 @@ class PipelineExecutor:
piped_result = grouped
continue
from cmdlet._shared import coerce_to_pipe_object
from SYS.pipe_object import coerce_to_pipe_object
filtered_pipe_objs = [
coerce_to_pipe_object(item) for item in filtered

View File

@@ -8,7 +8,7 @@ import logging
from pathlib import Path
from typing import Optional, Dict, Any, List, Callable, Tuple
from datetime import datetime
from threading import Thread, Lock
from threading import Thread, Lock, Event
import time
from SYS.logger import log
@@ -273,6 +273,7 @@ class WorkerManager:
self.refresh_callbacks: List[Callable] = []
self.refresh_thread: Optional[Thread] = None
self._stop_refresh = False
self._refresh_stop_event = Event()
self._lock = Lock()
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {}
self._worker_last_step: Dict[str, str] = {}
@@ -658,6 +659,7 @@ class WorkerManager:
f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval"
)
self._stop_refresh = False
self._refresh_stop_event.clear()
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
self.refresh_thread.start()
@@ -665,6 +667,7 @@ class WorkerManager:
"""Stop the auto-refresh thread."""
logger.info("[WorkerManager] Stopping auto-refresh")
self._stop_refresh = True
self._refresh_stop_event.set()
if self.refresh_thread:
self.refresh_thread.join(timeout=5)
self.refresh_thread = None
@@ -679,7 +682,8 @@ class WorkerManager:
"""Main auto-refresh loop that periodically queries and notifies."""
try:
while not self._stop_refresh:
time.sleep(self.auto_refresh_interval)
if self._refresh_stop_event.wait(self.auto_refresh_interval):
break
# Check if there are active workers
active = self.get_active_workers()