This commit is contained in:
nose
2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions

View File

@@ -5,10 +5,9 @@ from __future__ import annotations
import json
import sys
import inspect
from collections.abc import Iterable as IterableABC
from SYS.logger import log, debug
from SYS.logger import log
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
from dataclasses import dataclass, field
@@ -690,7 +689,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
get_field(result, "table", "unknown") # With default
"""
# Handle lists by accessing the first element
if isinstance(obj, list) and obj:
if isinstance(obj, list):
if not obj:
return default
obj = obj[0]
if isinstance(obj, dict):
@@ -702,8 +703,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
return value
# For PipeObjects, also check the extra field
if hasattr(obj, 'extra') and isinstance(obj.extra, dict):
return obj.extra.get(field, default)
extra_val = getattr(obj, 'extra', None)
if isinstance(extra_val, dict):
return extra_val.get(field, default)
return default
@@ -1118,7 +1120,7 @@ def create_pipe_object_result(
Returns:
Dict with all PipeObject fields for emission
"""
result = {
result: Dict[str, Any] = {
'source': source,
'id': identifier,
'path': file_path,
@@ -1546,14 +1548,11 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
extra = {k: v for k, v in value.items() if k not in known_keys}
# Extract URL: prefer direct url field, then url list
url_val = value.get("url")
if not url_val:
url = value.get("url") or value.get("url") or []
if url and isinstance(url, list) and len(url) > 0:
url_val = url[0]
# Preserve url in extra if multiple url exist
if url and len(url) > 1:
extra["url"] = url
from metadata import normalize_urls
url_list = normalize_urls(value.get("url"))
url_val = url_list[0] if url_list else None
if len(url_list) > 1:
extra["url"] = url_list
# Extract relationships
rels = value.get("relationships") or {}