This commit is contained in:
nose
2025-12-19 15:20:08 -08:00
parent d3edd6420c
commit 900a37e210
13 changed files with 729 additions and 32 deletions

View File

@@ -38,6 +38,7 @@ try:
from metadata import (
read_tags_from_file,
dedup_tags_by_namespace,
merge_multiple_tag_lists,
)
HAS_METADATA_API = True
except ImportError:
@@ -58,6 +59,17 @@ except ImportError:
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
return tags
def merge_multiple_tag_lists(sources: List[List[str]], strategy: str = 'first') -> List[str]:
out: List[str] = []
seen: set[str] = set()
for src in sources:
for t in (src or []):
s = str(t)
if s and s not in seen:
out.append(s)
seen.add(s)
return out
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
return None
@@ -105,7 +117,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
source_files: List[Path] = []
source_hashes: List[str] = []
source_url: List[str] = []
source_tags: List[str] = [] # NEW: collect tags from source files
source_tags: List[str] = [] # tags read from .tag sidecars
source_item_tag_lists: List[List[str]] = [] # tags carried in-memory on piped items
for item in files_to_merge:
raw_path = get_pipe_object_path(item)
target_path = None
@@ -119,6 +132,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if target_path and target_path.exists():
source_files.append(target_path)
# Track tags carried in the piped items (e.g. add-tag stage) so they survive merge.
try:
raw_tags = get_field(item, 'tag', [])
if isinstance(raw_tags, str) and raw_tags.strip():
source_item_tag_lists.append([raw_tags.strip()])
elif isinstance(raw_tags, list):
source_item_tag_lists.append([str(t) for t in raw_tags if t is not None and str(t).strip()])
except Exception:
pass
# Track tags from the .tag sidecar for this source (if present)
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
if tags_file.exists() and HAS_METADATA_API:
@@ -217,17 +240,46 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
merged_tags: List[str] = [f"title:{output_path.stem}"]
# Merge tags from source files into the emitted PipeObject only.
# Sidecar files (.tag/.metadata) are written only during explicit filesystem export (add-file to a path).
if source_tags and HAS_METADATA_API:
merged_source_tags = dedup_tags_by_namespace(source_tags)
merged_tags.extend(merged_source_tags)
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
elif source_tags:
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
def _title_value_from_tags(tags: List[str]) -> Optional[str]:
for t in tags:
try:
s = str(t)
except Exception:
continue
if s.lower().startswith('title:'):
val = s.split(':', 1)[1].strip()
return val or None
return None
# Determine best title:
# - prefer a title tag shared across all inputs (typical when user did add-tag title:...)
# - otherwise fall back to first title tag encountered
shared_title: Optional[str] = None
try:
if source_item_tag_lists:
per_item_titles: List[Optional[str]] = [_title_value_from_tags(tl) for tl in source_item_tag_lists]
non_empty = [t for t in per_item_titles if t]
if non_empty:
candidate = non_empty[0]
if candidate and all((t == candidate) for t in non_empty):
shared_title = candidate
else:
shared_title = non_empty[0]
except Exception:
shared_title = None
merged_title = shared_title or output_path.stem
# Merge tags from:
# - in-memory PipeObject tags (from add-tag etc)
# - .tag sidecars (if present)
# Keep all unique plain tags, and keep the first value for namespaced tags.
merged_tags = merge_multiple_tag_lists(source_item_tag_lists + ([source_tags] if source_tags else []), strategy='combine')
# Ensure we always have a title tag (and make sure it's the chosen title)
merged_tags = [t for t in merged_tags if not str(t).lower().startswith('title:')]
merged_tags.insert(0, f"title:{merged_title}")
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
try:
@@ -238,7 +290,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifier=output_path.name,
file_path=str(output_path),
cmdlet_name="merge-file",
title=output_path.stem,
title=merged_title,
hash_value=merged_hash,
tag=merged_tags,
url=source_url,