hh

2025-12-13 00:18:30 -08:00
parent 85750247cc
commit 30eb628aa3
18 changed files with 1056 additions and 407 deletions
--- a/cmdlet/add_tag.py
+++ b/cmdlet/add_tag.py
@@ -34,6 +34,28 @@ def _extract_title_tag(tags: List[str]) -> Optional[str]:
    return None


+def _extract_item_tags(res: Any) -> List[str]:
+    if isinstance(res, models.PipeObject):
+        raw = getattr(res, "tag", None)
+    elif isinstance(res, dict):
+        raw = res.get("tag")
+    else:
+        raw = None
+
+    if isinstance(raw, list):
+        return [str(t) for t in raw if t is not None]
+    if isinstance(raw, str) and raw.strip():
+        return [raw]
+    return []
+
+
+def _set_item_tags(res: Any, tags: List[str]) -> None:
+    if isinstance(res, models.PipeObject):
+        res.tag = tags
+    elif isinstance(res, dict):
+        res["tag"] = tags
+
+
 def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
    """Update result object/dict title fields and columns in-place."""
    if not title_value:
@@ -332,6 +354,8 @@ class Add_Tag(Cmdlet):

        store_override = parsed.get("store")

+        store_registry = Store(config)
+
        for res in results:
            store_name: Optional[str]
            raw_hash: Optional[str]
@@ -350,8 +374,90 @@ class Add_Tag(Cmdlet):
                continue

            if not store_name:
-                log("[add_tag] Error: Missing -store and item has no store field", file=sys.stderr)
-                return 1
+                store_name = None
+
+            # If the item isn't in a configured store backend yet (e.g., store=PATH) but has a local file,
+            # treat add-tag as a pipeline mutation (carry tags forward for add-file) instead of a store write.
+            if not store_override:
+                store_name_str = str(store_name) if store_name is not None else ""
+                local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
+                is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
+
+                if local_mode_requested and raw_path:
+                    try:
+                        if Path(str(raw_path)).expanduser().exists():
+                            existing_tag_list = _extract_item_tags(res)
+                            existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
+
+                            item_tag_to_add = list(tag_to_add)
+                            item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
+
+                            if duplicate_arg:
+                                parts = str(duplicate_arg).split(':')
+                                source_ns = ""
+                                targets: list[str] = []
+
+                                if len(parts) > 1:
+                                    source_ns = parts[0]
+                                    targets = [t.strip() for t in parts[1].split(',') if t.strip()]
+                                else:
+                                    parts2 = str(duplicate_arg).split(',')
+                                    if len(parts2) > 1:
+                                        source_ns = parts2[0]
+                                        targets = [t.strip() for t in parts2[1:] if t.strip()]
+
+                                if source_ns and targets:
+                                    source_prefix = source_ns.lower() + ":"
+                                    for t in existing_tag_list:
+                                        if not t.lower().startswith(source_prefix):
+                                            continue
+                                        value = t.split(":", 1)[1]
+                                        for target_ns in targets:
+                                            new_tag = f"{target_ns}:{value}"
+                                            if new_tag.lower() not in existing_lower:
+                                                item_tag_to_add.append(new_tag)
+
+                            removed_namespace_tag: list[str] = []
+                            for new_tag in item_tag_to_add:
+                                if not isinstance(new_tag, str) or ":" not in new_tag:
+                                    continue
+                                ns = new_tag.split(":", 1)[0].strip()
+                                if not ns:
+                                    continue
+                                ns_prefix = ns.lower() + ":"
+                                for t in existing_tag_list:
+                                    if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
+                                        removed_namespace_tag.append(t)
+                            removed_namespace_tag = sorted({t for t in removed_namespace_tag})
+
+                            actual_tag_to_add = [
+                                t
+                                for t in item_tag_to_add
+                                if isinstance(t, str) and t.lower() not in existing_lower
+                            ]
+
+                            updated_tag_list = [t for t in existing_tag_list if t not in removed_namespace_tag]
+                            updated_tag_list.extend(actual_tag_to_add)
+
+                            _set_item_tags(res, updated_tag_list)
+                            final_title = _extract_title_tag(updated_tag_list)
+                            _apply_title_to_result(res, final_title)
+
+                            total_added += len(actual_tag_to_add)
+                            total_modified += 1 if (removed_namespace_tag or actual_tag_to_add) else 0
+
+                            ctx.emit(res)
+                            continue
+                    except Exception:
+                        pass
+
+                if local_mode_requested:
+                    log("[add_tag] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
+                    return 1
+
+                if store_name_str and not is_known_backend:
+                    log(f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
+                    return 1

            resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
            if not resolved_hash and raw_path:
@@ -371,7 +477,7 @@ class Add_Tag(Cmdlet):
                continue

            try:
-                backend = Store(config)[str(store_name)]
+                backend = store_registry[str(store_name)]
            except Exception as exc:
                log(f"[add_tag] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
                return 1
--- a/cmdlet/add_tags.py
+++ b/cmdlet/add_tags.py
@@ -35,6 +35,28 @@ def _extract_title_tag(tags: List[str]) -> Optional[str]:
    return None


+def _extract_item_tags(res: Any) -> List[str]:
+    if isinstance(res, models.PipeObject):
+        raw = getattr(res, "tag", None)
+    elif isinstance(res, dict):
+        raw = res.get("tag")
+    else:
+        raw = None
+
+    if isinstance(raw, list):
+        return [str(t) for t in raw if t is not None]
+    if isinstance(raw, str) and raw.strip():
+        return [raw]
+    return []
+
+
+def _set_item_tags(res: Any, tags: List[str]) -> None:
+    if isinstance(res, models.PipeObject):
+        res.tag = tags
+    elif isinstance(res, dict):
+        res["tag"] = tags
+
+
 def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
    """Update result object/dict title fields and columns in-place."""
    if not title_value:
@@ -304,6 +326,8 @@ class Add_Tag(Cmdlet):

        store_override = parsed.get("store")

+        store_registry = Store(config)
+
        for res in results:
            store_name: Optional[str]
            raw_hash: Optional[str]
@@ -322,8 +346,90 @@ class Add_Tag(Cmdlet):
                continue

            if not store_name:
-                log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
-                return 1
+                store_name = None
+
+            # If the item isn't in a configured store backend yet (e.g., store=PATH) but has a local file,
+            # treat add-tags as a pipeline mutation (carry tags forward for add-file) instead of a store write.
+            if not store_override:
+                store_name_str = str(store_name) if store_name is not None else ""
+                local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
+                is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
+
+                if local_mode_requested and raw_path:
+                    try:
+                        if Path(str(raw_path)).expanduser().exists():
+                            existing_tags_list = _extract_item_tags(res)
+                            existing_lower = {t.lower() for t in existing_tags_list if isinstance(t, str)}
+
+                            item_tags_to_add = list(tags_to_add)
+                            item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
+
+                            if duplicate_arg:
+                                parts = str(duplicate_arg).split(':')
+                                source_ns = ""
+                                targets: list[str] = []
+
+                                if len(parts) > 1:
+                                    source_ns = parts[0]
+                                    targets = [t.strip() for t in parts[1].split(',') if t.strip()]
+                                else:
+                                    parts2 = str(duplicate_arg).split(',')
+                                    if len(parts2) > 1:
+                                        source_ns = parts2[0]
+                                        targets = [t.strip() for t in parts2[1:] if t.strip()]
+
+                                if source_ns and targets:
+                                    source_prefix = source_ns.lower() + ":"
+                                    for t in existing_tags_list:
+                                        if not t.lower().startswith(source_prefix):
+                                            continue
+                                        value = t.split(":", 1)[1]
+                                        for target_ns in targets:
+                                            new_tag = f"{target_ns}:{value}"
+                                            if new_tag.lower() not in existing_lower:
+                                                item_tags_to_add.append(new_tag)
+
+                            removed_namespace_tags: list[str] = []
+                            for new_tag in item_tags_to_add:
+                                if not isinstance(new_tag, str) or ":" not in new_tag:
+                                    continue
+                                ns = new_tag.split(":", 1)[0].strip()
+                                if not ns:
+                                    continue
+                                ns_prefix = ns.lower() + ":"
+                                for t in existing_tags_list:
+                                    if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
+                                        removed_namespace_tags.append(t)
+                            removed_namespace_tags = sorted({t for t in removed_namespace_tags})
+
+                            actual_tags_to_add = [
+                                t
+                                for t in item_tags_to_add
+                                if isinstance(t, str) and t.lower() not in existing_lower
+                            ]
+
+                            updated_tags_list = [t for t in existing_tags_list if t not in removed_namespace_tags]
+                            updated_tags_list.extend(actual_tags_to_add)
+
+                            _set_item_tags(res, updated_tags_list)
+                            final_title = _extract_title_tag(updated_tags_list)
+                            _apply_title_to_result(res, final_title)
+
+                            total_added += len(actual_tags_to_add)
+                            total_modified += 1 if (removed_namespace_tags or actual_tags_to_add) else 0
+
+                            ctx.emit(res)
+                            continue
+                    except Exception:
+                        pass
+
+                if local_mode_requested:
+                    log("[add_tags] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
+                    return 1
+
+                if store_name_str and not is_known_backend:
+                    log(f"[add_tags] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
+                    return 1

            resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
            if not resolved_hash and raw_path:
@@ -343,7 +449,7 @@ class Add_Tag(Cmdlet):
                continue

            try:
-                backend = Store(config)[str(store_name)]
+                backend = store_registry[str(store_name)]
            except Exception as exc:
                log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
                return 1
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
@@ -267,15 +267,13 @@ class Download_File(Cmdlet):
                log(f"Invalid storage location: {e}", file=sys.stderr)
                return None

-        # Priority 2: Config outfile
-        if config and config.get("outfile"):
-            try:
-                return Path(config["outfile"]).expanduser()
-            except Exception:
-                pass
+        # Priority 2: Config default output/temp directory
+        try:
+            from config import resolve_output_dir
+            final_output_dir = resolve_output_dir(config)
+        except Exception:
+            final_output_dir = Path.home() / "Downloads"

-        # Priority 3: Default (home/Downloads)
-        final_output_dir = Path.home() / "Downloads"
        debug(f"Using default directory: {final_output_dir}")

        # Ensure directory exists
--- a/cmdlet/download_media.py
+++ b/cmdlet/download_media.py
@@ -338,6 +338,30 @@ def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dic
    raise FileNotFoundError("yt-dlp did not report a downloaded media file")


+def _resolve_entries_and_paths(info: Dict[str, Any], output_dir: Path) -> List[tuple[Dict[str, Any], Path]]:
+    resolved: List[tuple[Dict[str, Any], Path]] = []
+    seen: set[str] = set()
+    for entry in _iter_download_entries(info):
+        chosen: Optional[Path] = None
+        for candidate in _candidate_paths(entry, output_dir):
+            if candidate.is_file():
+                chosen = candidate
+                break
+            if not candidate.is_absolute():
+                maybe = output_dir / candidate
+                if maybe.is_file():
+                    chosen = maybe
+                    break
+        if chosen is None:
+            continue
+        key = str(chosen.resolve())
+        if key in seen:
+            continue
+        seen.add(key)
+        resolved.append((entry, chosen))
+    return resolved
+
+
 def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
    for payload in [info] + info.get("entries", []):
        if not isinstance(payload, dict):
@@ -679,7 +703,7 @@ def download_media(
    opts: DownloadOptions,
    *,
    debug_logger: Optional[DebugLogger] = None,
-) -> DownloadMediaResult:
+) -> Any:
    """Download media from URL using yt-dlp or direct HTTP download.
    
    Args:
@@ -935,6 +959,48 @@ def download_media(
            },
        )

+    # Playlist/album handling: resolve ALL downloaded entries and return multiple results.
+    # The cmdlet will emit one PipeObject per downloaded file.
+    if info_dict.get("entries") and not opts.no_playlist:
+        resolved = _resolve_entries_and_paths(info_dict, opts.output_dir)
+        if resolved:
+            results: List[DownloadMediaResult] = []
+            for entry, media_path in resolved:
+                hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
+                if not hash_value:
+                    try:
+                        hash_value = sha256_file(media_path)
+                    except OSError:
+                        hash_value = None
+
+                tags: List[str] = []
+                if extract_ytdlp_tags:
+                    try:
+                        tags = extract_ytdlp_tags(entry)
+                    except Exception as e:
+                        log(f"Error extracting tags: {e}", file=sys.stderr)
+
+                source_url = (
+                    entry.get("webpage_url")
+                    or entry.get("original_url")
+                    or entry.get("url")
+                    or opts.url
+                )
+
+                results.append(
+                    DownloadMediaResult(
+                        path=media_path,
+                        info=entry,
+                        tag=tags,
+                        source_url=source_url,
+                        hash_value=hash_value,
+                    )
+                )
+
+            if not opts.quiet:
+                debug(f"✓ Downloaded playlist items: {len(results)}")
+            return results
+
    try:
        entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
    except FileNotFoundError as exc:
@@ -1009,7 +1075,7 @@ def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) ->
        timeout_seconds: Max seconds to wait (default 300s = 5 min)
        
    Returns:
-        DownloadMediaResult
+        DownloadMediaResult or List[DownloadMediaResult]
        
    Raises:
        DownloadError: If timeout exceeded
@@ -1333,16 +1399,20 @@ class Download_Media(Cmdlet):
                    debug(f"Starting download with 5-minute timeout...")
                    result_obj = _download_with_timeout(opts, timeout_seconds=300)
                    debug(f"Download completed, building pipe object...")
-                    pipe_obj_dict = self._build_pipe_object(result_obj, url, opts)
-                    debug(f"Emitting result to pipeline...")
-                    pipeline_context.emit(pipe_obj_dict)
-                    
-                    # Automatically register url with local library
-                    if pipe_obj_dict.get("url"):
-                        pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
-                        register_url_with_local_library(pipe_obj, config)
-                    
-                    downloaded_count += 1
+
+                    # Emit one PipeObject per downloaded file (playlists/albums return a list)
+                    results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj]
+                    debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...")
+                    for downloaded in results_to_emit:
+                        pipe_obj_dict = self._build_pipe_object(downloaded, url, opts)
+                        pipeline_context.emit(pipe_obj_dict)
+
+                        # Automatically register url with local library
+                        if pipe_obj_dict.get("url"):
+                            pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
+                            register_url_with_local_library(pipe_obj, config)
+
+                    downloaded_count += len(results_to_emit)
                    debug("✓ Downloaded and emitted")

                except DownloadError as e:
@@ -1373,18 +1443,15 @@ class Download_Media(Cmdlet):
                log(f"Invalid storage location: {e}", file=sys.stderr)
                return None

-        # Priority 2: Config outfile
-        if config and config.get("outfile"):
-            try:
-                return Path(config["outfile"]).expanduser()
-            except Exception:
-                pass
+        # Priority 2: Config default output/temp directory
+        try:
+            from config import resolve_output_dir
+            final_output_dir = resolve_output_dir(config)
+        except Exception:
+            final_output_dir = Path.home() / "Videos"

-        # Priority 3: Default (home/Videos)
-        final_output_dir = Path.home() / "Videos"
        debug(f"Using default directory: {final_output_dir}")

-        # Ensure directory exists
        try:
            final_output_dir.mkdir(parents=True, exist_ok=True)
        except Exception as e:
--- a/cmdlet/merge_file.py
+++ b/cmdlet/merge_file.py
@@ -8,6 +8,9 @@ import sys
 from SYS.logger import log
 import subprocess as _subprocess
 import shutil as _shutil
+import re as _re
+
+from config import resolve_output_dir

 from ._shared import (
    Cmdlet,
@@ -34,9 +37,7 @@ except ImportError:
 try:
    from metadata import (
        read_tags_from_file,
-        write_tags_to_file,
        dedup_tags_by_namespace,
-        write_metadata
    )
    HAS_METADATA_API = True
 except ImportError:
@@ -105,8 +106,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    source_hashes: List[str] = []
    source_url: List[str] = []
    source_tags: List[str] = []  # NEW: collect tags from source files
-    source_relationships: List[str] = []  # NEW: collect relationships from source files
-    
    for item in files_to_merge:
        raw_path = get_pipe_object_path(item)
        target_path = None
@@ -191,7 +190,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            output_path = output_override
    else:
        first_file = source_files[0]
-        output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
+        try:
+            base_dir = resolve_output_dir(config)
+        except Exception:
+            base_dir = first_file.parent
+        output_path = Path(base_dir) / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
    
    # Ensure output directory exists
    output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -217,61 +220,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    
    merged_tags: List[str] = [f"title:{output_path.stem}"]

-    # Create .tag sidecar file for the merged output using unified API
-    tags_path = output_path.with_suffix(output_path.suffix + '.tag')
-    try:
-        # Merge tags from source files using metadata API
-        if source_tags and HAS_METADATA_API:
-            # Use dedup function to normalize and deduplicate
-            merged_source_tags = dedup_tags_by_namespace(source_tags)
-            merged_tags.extend(merged_source_tags)
-            log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
-        elif source_tags:
-            # Fallback: simple deduplication if metadata API unavailable
-            merged_tags.extend(list(dict.fromkeys(source_tags)))  # Preserve order, remove duplicates
-        
-        # Write merged tags to sidecar file
-        if HAS_METADATA_API and write_tags_to_file:
-            # Use unified API for file writing
-            source_hashes_list = source_hashes if source_hashes else None
-            source_url_list = source_url if source_url else None
-            write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_url_list)
-        else:
-            # Fallback: manual file writing
-            tags_lines = []
-            
-            # Add hash first (if available)
-            if source_hashes:
-                tags_lines.append(f"hash:{source_hashes[0]}")
-            
-            # Add regular tags
-            tags_lines.extend(merged_tags)
-            
-            # Add known url
-            if source_url:
-                for url in source_url:
-                    tags_lines.append(f"url:{url}")
-            
-            # Add relationships (if available)
-            if source_relationships:
-                for rel in source_relationships:
-                    tags_lines.append(f"relationship:{rel}")
-            
-            with open(tags_path, 'w', encoding='utf-8') as f:
-                f.write('\n'.join(tags_lines) + '\n')
-        
-        log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
-        
-        # Also create .metadata file using centralized function
-        try:
-            if HAS_METADATA_API and write_metadata:
-                write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
-                log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
-        except Exception as e:
-            log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
-    
-    except Exception as e:
-        log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
+    # Merge tags from source files into the emitted PipeObject only.
+    # Sidecar files (.tag/.metadata) are written only during explicit filesystem export (add-file to a path).
+    if source_tags and HAS_METADATA_API:
+        merged_source_tags = dedup_tags_by_namespace(source_tags)
+        merged_tags.extend(merged_source_tags)
+        log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
+    elif source_tags:
+        merged_tags.extend(list(dict.fromkeys(source_tags)))  # Preserve order, remove duplicates
    
    # Emit a PipeObject-compatible dict so the merged file can be piped to next command
    try:
@@ -287,6 +243,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            tag=merged_tags,
            url=source_url,
            media_kind=file_kind,
+            store="PATH",
        )
        # Clear previous results to ensure only the merged file is passed down
        ctx.clear_last_result()
@@ -424,6 +381,33 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
            
            logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
            current_time_ms += int(duration_sec * 1000)
+
+        # If these came from a playlist/album, titles often look like:
+        #   "Book Name - Chapter"
+        # If *all* titles share the same "Book Name" prefix, strip it.
+        if len(chapters) >= 2:
+            split_re = _re.compile(r"^(?P<prefix>.+?)\s+-\s+(?P<chapter>.+)$")
+            prefixes: List[str] = []
+            stripped_titles: List[str] = []
+            all_match = True
+            for ch in chapters:
+                raw_title = str(ch.get('title') or '').strip()
+                m = split_re.match(raw_title)
+                if not m:
+                    all_match = False
+                    break
+                prefix = m.group('prefix').strip()
+                chapter_title = m.group('chapter').strip()
+                if not prefix or not chapter_title:
+                    all_match = False
+                    break
+                prefixes.append(prefix.casefold())
+                stripped_titles.append(chapter_title)
+
+            if all_match and prefixes and len(set(prefixes)) == 1:
+                for idx, ch in enumerate(chapters):
+                    ch['title'] = stripped_titles[idx]
+                logger.info(f"[merge-file] Stripped common title prefix for chapters: {prefixes[0]}")
        
        # Step 2: Create concat demuxer file
        concat_file = output.parent / f".concat_{output.stem}.txt"