from __future__ import annotations from typing import Any, Dict, Optional from SYS import models def coerce_to_pipe_object( value: Any, default_path: Optional[str] = None, ) -> models.PipeObject: """Normalize any incoming result to a PipeObject for single-source-of-truth state. Uses hash+store canonical pattern. """ # Debug: Print ResultItem details if coming from search_file.py try: from SYS.logger import is_debug_enabled, debug if ( is_debug_enabled() and hasattr(value, "__class__") and value.__class__.__name__ == "ResultItem" ): debug("[ResultItem -> PipeObject conversion]") debug(f" title={getattr(value, 'title', None)}") debug(f" target={getattr(value, 'target', None)}") debug(f" hash={getattr(value, 'hash', None)}") debug(f" media_kind={getattr(value, 'media_kind', None)}") debug(f" tag={getattr(value, 'tag', None)}") debug(f" tag_summary={getattr(value, 'tag_summary', None)}") debug(f" size_bytes={getattr(value, 'size_bytes', None)}") debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}") debug(f" relationships={getattr(value, 'relationships', None)}") debug(f" url={getattr(value, 'url', None)}") debug( f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}" ) except Exception: pass if isinstance(value, models.PipeObject): return value known_keys = { "hash", "store", "tag", "title", "url", "source_url", "duration", "metadata", "warnings", "path", "relationships", "is_temp", "action", "parent_hash", } # Convert common object-like results into a dict so we can preserve fields like # hash/store/url when they come from result tables (e.g., get-url emits UrlItem). # # Priority: # 1) explicit to_dict() # 2) best-effort attribute extraction for known PipeObject-ish fields if hasattr(value, "to_dict"): value = value.to_dict() elif not isinstance(value, dict): try: obj_map: Dict[str, Any] = {} for k in ( "hash", "store", "provider", "prov", "tag", "title", "url", "source_url", "duration", "duration_seconds", "metadata", "full_metadata", "warnings", "path", "target", "relationships", "is_temp", "action", "parent_hash", "extra", "media_kind", ): if hasattr(value, k): obj_map[k] = getattr(value, k) if obj_map: value = obj_map except Exception: pass if isinstance(value, dict): # Extract hash and store (canonical identifiers) hash_val = value.get("hash") store_val = value.get("store") or "PATH" if not store_val or store_val == "PATH": try: extra_store = value.get("extra", {}).get("store") except Exception: extra_store = None if extra_store: store_val = extra_store # If no hash, try to compute from path or use placeholder if not hash_val: path_val = value.get("path") if path_val: try: from pathlib import Path from SYS.utils import sha256_file hash_val = sha256_file(Path(path_val)) except Exception: hash_val = "unknown" else: hash_val = "unknown" # Extract title from filename if not provided title_val = value.get("title") if not title_val: path_val = value.get("path") if path_val: try: from pathlib import Path title_val = Path(path_val).stem except Exception: pass extra = {k: v for k, v in value.items() if k not in known_keys} # Extract URL: prefer direct url field, then url list from SYS.metadata import normalize_urls url_list = normalize_urls(value.get("url")) url_val = url_list[0] if url_list else None if len(url_list) > 1: extra["url"] = url_list # Extract relationships rels = value.get("relationships") or {} # Canonical tag: accept list or single string tag_val: list[str] = [] if "tag" in value: raw_tag = value["tag"] if isinstance(raw_tag, list): tag_val = [str(t) for t in raw_tag if t is not None] elif isinstance(raw_tag, str): tag_val = [raw_tag] # Consolidate path: prefer explicit path key, but NOT target if it's a URL path_val = value.get("path") # Only use target as path if it's not a URL (url should stay in url field) if not path_val and "target" in value: target = value["target"] if target and not ( isinstance(target, str) and (target.startswith("http://") or target.startswith("https://")) ): path_val = target # If the path value is actually a URL, move it to url_val and clear path_val try: if isinstance(path_val, str) and ( path_val.startswith("http://") or path_val.startswith("https://") ): # Prefer existing url_val if present, otherwise move path_val into url_val if not url_val: url_val = path_val path_val = None except Exception: pass # Extract media_kind if available if "media_kind" in value: extra["media_kind"] = value["media_kind"] pipe_obj = models.PipeObject( hash=hash_val, store=store_val, provider=str( value.get("provider") or value.get("prov") or value.get("source") or extra.get("provider") or extra.get("source") or "" ).strip() or None, tag=tag_val, title=title_val, url=url_val, source_url=value.get("source_url"), duration=value.get("duration") or value.get("duration_seconds"), metadata=value.get("metadata") or value.get("full_metadata") or {}, warnings=list(value.get("warnings") or []), path=path_val, relationships=rels, is_temp=bool(value.get("is_temp", False)), action=value.get("action"), parent_hash=value.get("parent_hash"), extra=extra, ) return pipe_obj # Fallback: build from path argument or bare value hash_val = "unknown" path_val = default_path or getattr(value, "path", None) url_val: Optional[str] = None title_val = None # If the raw value is a string, treat it as either a URL or a file path. # This is important for @-selection results that are plain URL strings. if isinstance(value, str): s = value.strip() if s.lower().startswith(("http://", "https://")): url_val = s path_val = None else: path_val = s if path_val and path_val != "unknown": try: from pathlib import Path from SYS.utils import sha256_file path_obj = Path(path_val) hash_val = sha256_file(path_obj) # Extract title from filename (without extension) title_val = path_obj.stem except Exception: pass # When coming from a raw URL string, mark it explicitly as URL. # Otherwise treat it as a local path. store_val = "URL" if url_val else "PATH" pipe_obj = models.PipeObject( hash=hash_val, store=store_val, provider=None, path=str(path_val) if path_val and path_val != "unknown" else None, title=title_val, url=url_val, source_url=url_val, tag=[], extra={}, ) return pipe_obj