Files
Medios-Macina/SYS/pipe_object.py

266 lines
8.5 KiB
Python
Raw Normal View History

2026-02-11 18:16:07 -08:00
from __future__ import annotations
from typing import Any, Dict, Optional
from SYS import models
def coerce_to_pipe_object(
value: Any,
default_path: Optional[str] = None,
) -> models.PipeObject:
"""Normalize any incoming result to a PipeObject for single-source-of-truth state.
Uses hash+store canonical pattern.
"""
# Debug: Print ResultItem details if coming from search_file.py
try:
from SYS.logger import is_debug_enabled, debug
if (
is_debug_enabled()
and hasattr(value, "__class__")
and value.__class__.__name__ == "ResultItem"
):
debug("[ResultItem -> PipeObject conversion]")
debug(f" title={getattr(value, 'title', None)}")
debug(f" target={getattr(value, 'target', None)}")
debug(f" hash={getattr(value, 'hash', None)}")
debug(f" media_kind={getattr(value, 'media_kind', None)}")
debug(f" tag={getattr(value, 'tag', None)}")
debug(f" tag_summary={getattr(value, 'tag_summary', None)}")
debug(f" size_bytes={getattr(value, 'size_bytes', None)}")
debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}")
debug(f" relationships={getattr(value, 'relationships', None)}")
debug(f" url={getattr(value, 'url', None)}")
debug(
f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}"
)
except Exception:
pass
if isinstance(value, models.PipeObject):
return value
known_keys = {
"hash",
"store",
"tag",
"title",
"url",
"source_url",
"duration",
"metadata",
"warnings",
"path",
"relationships",
"is_temp",
"action",
"parent_hash",
}
# Convert common object-like results into a dict so we can preserve fields like
# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
#
# Priority:
# 1) explicit to_dict()
# 2) best-effort attribute extraction for known PipeObject-ish fields
if hasattr(value, "to_dict"):
value = value.to_dict()
elif not isinstance(value, dict):
try:
obj_map: Dict[str, Any] = {}
for k in (
"hash",
"store",
"provider",
"prov",
"tag",
"title",
"url",
"source_url",
"duration",
"duration_seconds",
"metadata",
"full_metadata",
"warnings",
"path",
"target",
"relationships",
"is_temp",
"action",
"parent_hash",
"extra",
"media_kind",
):
if hasattr(value, k):
obj_map[k] = getattr(value, k)
if obj_map:
value = obj_map
except Exception:
pass
if isinstance(value, dict):
# Extract hash and store (canonical identifiers)
hash_val = value.get("hash")
store_val = value.get("store") or "PATH"
if not store_val or store_val == "PATH":
try:
extra_store = value.get("extra", {}).get("store")
except Exception:
extra_store = None
if extra_store:
store_val = extra_store
# If no hash, try to compute from path or use placeholder
if not hash_val:
path_val = value.get("path")
if path_val:
try:
from pathlib import Path
from SYS.utils import sha256_file
hash_val = sha256_file(Path(path_val))
except Exception:
hash_val = "unknown"
else:
hash_val = "unknown"
# Extract title from filename if not provided
title_val = value.get("title")
if not title_val:
path_val = value.get("path")
if path_val:
try:
from pathlib import Path
title_val = Path(path_val).stem
except Exception:
pass
extra = {k: v for k, v in value.items() if k not in known_keys}
# Extract URL: prefer direct url field, then url list
from SYS.metadata import normalize_urls
url_list = normalize_urls(value.get("url"))
url_val = url_list[0] if url_list else None
if len(url_list) > 1:
extra["url"] = url_list
# Extract relationships
rels = value.get("relationships") or {}
# Canonical tag: accept list or single string
tag_val: list[str] = []
if "tag" in value:
raw_tag = value["tag"]
if isinstance(raw_tag, list):
tag_val = [str(t) for t in raw_tag if t is not None]
elif isinstance(raw_tag, str):
tag_val = [raw_tag]
# Consolidate path: prefer explicit path key, but NOT target if it's a URL
path_val = value.get("path")
# Only use target as path if it's not a URL (url should stay in url field)
if not path_val and "target" in value:
target = value["target"]
if target and not (
isinstance(target, str)
and (target.startswith("http://") or target.startswith("https://"))
):
path_val = target
# If the path value is actually a URL, move it to url_val and clear path_val
try:
if isinstance(path_val, str) and (
path_val.startswith("http://") or path_val.startswith("https://")
):
# Prefer existing url_val if present, otherwise move path_val into url_val
if not url_val:
url_val = path_val
path_val = None
except Exception:
pass
# Extract media_kind if available
if "media_kind" in value:
extra["media_kind"] = value["media_kind"]
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=str(
value.get("provider")
or value.get("prov")
or value.get("source")
or extra.get("provider")
or extra.get("source")
or ""
).strip()
or None,
tag=tag_val,
title=title_val,
url=url_val,
source_url=value.get("source_url"),
duration=value.get("duration") or value.get("duration_seconds"),
metadata=value.get("metadata") or value.get("full_metadata") or {},
warnings=list(value.get("warnings") or []),
path=path_val,
relationships=rels,
is_temp=bool(value.get("is_temp", False)),
action=value.get("action"),
parent_hash=value.get("parent_hash"),
extra=extra,
)
return pipe_obj
# Fallback: build from path argument or bare value
hash_val = "unknown"
path_val = default_path or getattr(value, "path", None)
url_val: Optional[str] = None
title_val = None
# If the raw value is a string, treat it as either a URL or a file path.
# This is important for @-selection results that are plain URL strings.
if isinstance(value, str):
s = value.strip()
if s.lower().startswith(("http://", "https://")):
url_val = s
path_val = None
else:
path_val = s
if path_val and path_val != "unknown":
try:
from pathlib import Path
from SYS.utils import sha256_file
path_obj = Path(path_val)
hash_val = sha256_file(path_obj)
# Extract title from filename (without extension)
title_val = path_obj.stem
except Exception:
pass
# When coming from a raw URL string, mark it explicitly as URL.
# Otherwise treat it as a local path.
store_val = "URL" if url_val else "PATH"
pipe_obj = models.PipeObject(
hash=hash_val,
store=store_val,
provider=None,
path=str(path_val) if path_val and path_val != "unknown" else None,
title=title_val,
url=url_val,
source_url=url_val,
tag=[],
extra={},
)
return pipe_obj