This commit is contained in:
nose
2025-12-13 00:18:30 -08:00
parent 85750247cc
commit 30eb628aa3
18 changed files with 1056 additions and 407 deletions

View File

@@ -34,6 +34,28 @@ def _extract_title_tag(tags: List[str]) -> Optional[str]:
return None
def _extract_item_tags(res: Any) -> List[str]:
if isinstance(res, models.PipeObject):
raw = getattr(res, "tag", None)
elif isinstance(res, dict):
raw = res.get("tag")
else:
raw = None
if isinstance(raw, list):
return [str(t) for t in raw if t is not None]
if isinstance(raw, str) and raw.strip():
return [raw]
return []
def _set_item_tags(res: Any, tags: List[str]) -> None:
if isinstance(res, models.PipeObject):
res.tag = tags
elif isinstance(res, dict):
res["tag"] = tags
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
@@ -332,6 +354,8 @@ class Add_Tag(Cmdlet):
store_override = parsed.get("store")
store_registry = Store(config)
for res in results:
store_name: Optional[str]
raw_hash: Optional[str]
@@ -350,8 +374,90 @@ class Add_Tag(Cmdlet):
continue
if not store_name:
log("[add_tag] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
store_name = None
# If the item isn't in a configured store backend yet (e.g., store=PATH) but has a local file,
# treat add-tag as a pipeline mutation (carry tags forward for add-file) instead of a store write.
if not store_override:
store_name_str = str(store_name) if store_name is not None else ""
local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
if local_mode_requested and raw_path:
try:
if Path(str(raw_path)).expanduser().exists():
existing_tag_list = _extract_item_tags(res)
existing_lower = {t.lower() for t in existing_tag_list if isinstance(t, str)}
item_tag_to_add = list(tag_to_add)
item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last")
if duplicate_arg:
parts = str(duplicate_arg).split(':')
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
if source_ns and targets:
source_prefix = source_ns.lower() + ":"
for t in existing_tag_list:
if not t.lower().startswith(source_prefix):
continue
value = t.split(":", 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
removed_namespace_tag: list[str] = []
for new_tag in item_tag_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
actual_tag_to_add = [
t
for t in item_tag_to_add
if isinstance(t, str) and t.lower() not in existing_lower
]
updated_tag_list = [t for t in existing_tag_list if t not in removed_namespace_tag]
updated_tag_list.extend(actual_tag_to_add)
_set_item_tags(res, updated_tag_list)
final_title = _extract_title_tag(updated_tag_list)
_apply_title_to_result(res, final_title)
total_added += len(actual_tag_to_add)
total_modified += 1 if (removed_namespace_tag or actual_tag_to_add) else 0
ctx.emit(res)
continue
except Exception:
pass
if local_mode_requested:
log("[add_tag] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
return 1
if store_name_str and not is_known_backend:
log(f"[add_tag] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
if not resolved_hash and raw_path:
@@ -371,7 +477,7 @@ class Add_Tag(Cmdlet):
continue
try:
backend = Store(config)[str(store_name)]
backend = store_registry[str(store_name)]
except Exception as exc:
log(f"[add_tag] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1

View File

@@ -35,6 +35,28 @@ def _extract_title_tag(tags: List[str]) -> Optional[str]:
return None
def _extract_item_tags(res: Any) -> List[str]:
if isinstance(res, models.PipeObject):
raw = getattr(res, "tag", None)
elif isinstance(res, dict):
raw = res.get("tag")
else:
raw = None
if isinstance(raw, list):
return [str(t) for t in raw if t is not None]
if isinstance(raw, str) and raw.strip():
return [raw]
return []
def _set_item_tags(res: Any, tags: List[str]) -> None:
if isinstance(res, models.PipeObject):
res.tag = tags
elif isinstance(res, dict):
res["tag"] = tags
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
@@ -304,6 +326,8 @@ class Add_Tag(Cmdlet):
store_override = parsed.get("store")
store_registry = Store(config)
for res in results:
store_name: Optional[str]
raw_hash: Optional[str]
@@ -322,8 +346,90 @@ class Add_Tag(Cmdlet):
continue
if not store_name:
log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
store_name = None
# If the item isn't in a configured store backend yet (e.g., store=PATH) but has a local file,
# treat add-tags as a pipeline mutation (carry tags forward for add-file) instead of a store write.
if not store_override:
store_name_str = str(store_name) if store_name is not None else ""
local_mode_requested = (not store_name_str) or (store_name_str.upper() == "PATH") or (store_name_str.lower() == "local")
is_known_backend = bool(store_name_str) and store_registry.is_available(store_name_str)
if local_mode_requested and raw_path:
try:
if Path(str(raw_path)).expanduser().exists():
existing_tags_list = _extract_item_tags(res)
existing_lower = {t.lower() for t in existing_tags_list if isinstance(t, str)}
item_tags_to_add = list(tags_to_add)
item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
if duplicate_arg:
parts = str(duplicate_arg).split(':')
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
if source_ns and targets:
source_prefix = source_ns.lower() + ":"
for t in existing_tags_list:
if not t.lower().startswith(source_prefix):
continue
value = t.split(":", 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag.lower() not in existing_lower:
item_tags_to_add.append(new_tag)
removed_namespace_tags: list[str] = []
for new_tag in item_tags_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tags_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tags.append(t)
removed_namespace_tags = sorted({t for t in removed_namespace_tags})
actual_tags_to_add = [
t
for t in item_tags_to_add
if isinstance(t, str) and t.lower() not in existing_lower
]
updated_tags_list = [t for t in existing_tags_list if t not in removed_namespace_tags]
updated_tags_list.extend(actual_tags_to_add)
_set_item_tags(res, updated_tags_list)
final_title = _extract_title_tag(updated_tags_list)
_apply_title_to_result(res, final_title)
total_added += len(actual_tags_to_add)
total_modified += 1 if (removed_namespace_tags or actual_tags_to_add) else 0
ctx.emit(res)
continue
except Exception:
pass
if local_mode_requested:
log("[add_tags] Error: Missing usable local path for tagging (or provide -store)", file=sys.stderr)
return 1
if store_name_str and not is_known_backend:
log(f"[add_tags] Error: Unknown store '{store_name_str}'. Available: {store_registry.list_backends()}", file=sys.stderr)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
if not resolved_hash and raw_path:
@@ -343,7 +449,7 @@ class Add_Tag(Cmdlet):
continue
try:
backend = Store(config)[str(store_name)]
backend = store_registry[str(store_name)]
except Exception as exc:
log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1

View File

@@ -267,15 +267,13 @@ class Download_File(Cmdlet):
log(f"Invalid storage location: {e}", file=sys.stderr)
return None
# Priority 2: Config outfile
if config and config.get("outfile"):
try:
return Path(config["outfile"]).expanduser()
except Exception:
pass
# Priority 2: Config default output/temp directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Downloads"
# Priority 3: Default (home/Downloads)
final_output_dir = Path.home() / "Downloads"
debug(f"Using default directory: {final_output_dir}")
# Ensure directory exists

View File

@@ -338,6 +338,30 @@ def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dic
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
def _resolve_entries_and_paths(info: Dict[str, Any], output_dir: Path) -> List[tuple[Dict[str, Any], Path]]:
resolved: List[tuple[Dict[str, Any], Path]] = []
seen: set[str] = set()
for entry in _iter_download_entries(info):
chosen: Optional[Path] = None
for candidate in _candidate_paths(entry, output_dir):
if candidate.is_file():
chosen = candidate
break
if not candidate.is_absolute():
maybe = output_dir / candidate
if maybe.is_file():
chosen = maybe
break
if chosen is None:
continue
key = str(chosen.resolve())
if key in seen:
continue
seen.add(key)
resolved.append((entry, chosen))
return resolved
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
for payload in [info] + info.get("entries", []):
if not isinstance(payload, dict):
@@ -679,7 +703,7 @@ def download_media(
opts: DownloadOptions,
*,
debug_logger: Optional[DebugLogger] = None,
) -> DownloadMediaResult:
) -> Any:
"""Download media from URL using yt-dlp or direct HTTP download.
Args:
@@ -935,6 +959,48 @@ def download_media(
},
)
# Playlist/album handling: resolve ALL downloaded entries and return multiple results.
# The cmdlet will emit one PipeObject per downloaded file.
if info_dict.get("entries") and not opts.no_playlist:
resolved = _resolve_entries_and_paths(info_dict, opts.output_dir)
if resolved:
results: List[DownloadMediaResult] = []
for entry, media_path in resolved:
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
if not hash_value:
try:
hash_value = sha256_file(media_path)
except OSError:
hash_value = None
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags = extract_ytdlp_tags(entry)
except Exception as e:
log(f"Error extracting tags: {e}", file=sys.stderr)
source_url = (
entry.get("webpage_url")
or entry.get("original_url")
or entry.get("url")
or opts.url
)
results.append(
DownloadMediaResult(
path=media_path,
info=entry,
tag=tags,
source_url=source_url,
hash_value=hash_value,
)
)
if not opts.quiet:
debug(f"✓ Downloaded playlist items: {len(results)}")
return results
try:
entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
except FileNotFoundError as exc:
@@ -1009,7 +1075,7 @@ def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) ->
timeout_seconds: Max seconds to wait (default 300s = 5 min)
Returns:
DownloadMediaResult
DownloadMediaResult or List[DownloadMediaResult]
Raises:
DownloadError: If timeout exceeded
@@ -1333,16 +1399,20 @@ class Download_Media(Cmdlet):
debug(f"Starting download with 5-minute timeout...")
result_obj = _download_with_timeout(opts, timeout_seconds=300)
debug(f"Download completed, building pipe object...")
pipe_obj_dict = self._build_pipe_object(result_obj, url, opts)
debug(f"Emitting result to pipeline...")
pipeline_context.emit(pipe_obj_dict)
# Automatically register url with local library
if pipe_obj_dict.get("url"):
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
register_url_with_local_library(pipe_obj, config)
downloaded_count += 1
# Emit one PipeObject per downloaded file (playlists/albums return a list)
results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj]
debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...")
for downloaded in results_to_emit:
pipe_obj_dict = self._build_pipe_object(downloaded, url, opts)
pipeline_context.emit(pipe_obj_dict)
# Automatically register url with local library
if pipe_obj_dict.get("url"):
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
register_url_with_local_library(pipe_obj, config)
downloaded_count += len(results_to_emit)
debug("✓ Downloaded and emitted")
except DownloadError as e:
@@ -1373,18 +1443,15 @@ class Download_Media(Cmdlet):
log(f"Invalid storage location: {e}", file=sys.stderr)
return None
# Priority 2: Config outfile
if config and config.get("outfile"):
try:
return Path(config["outfile"]).expanduser()
except Exception:
pass
# Priority 2: Config default output/temp directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Videos"
# Priority 3: Default (home/Videos)
final_output_dir = Path.home() / "Videos"
debug(f"Using default directory: {final_output_dir}")
# Ensure directory exists
try:
final_output_dir.mkdir(parents=True, exist_ok=True)
except Exception as e:

View File

@@ -8,6 +8,9 @@ import sys
from SYS.logger import log
import subprocess as _subprocess
import shutil as _shutil
import re as _re
from config import resolve_output_dir
from ._shared import (
Cmdlet,
@@ -34,9 +37,7 @@ except ImportError:
try:
from metadata import (
read_tags_from_file,
write_tags_to_file,
dedup_tags_by_namespace,
write_metadata
)
HAS_METADATA_API = True
except ImportError:
@@ -105,8 +106,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
source_hashes: List[str] = []
source_url: List[str] = []
source_tags: List[str] = [] # NEW: collect tags from source files
source_relationships: List[str] = [] # NEW: collect relationships from source files
for item in files_to_merge:
raw_path = get_pipe_object_path(item)
target_path = None
@@ -191,7 +190,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
output_path = output_override
else:
first_file = source_files[0]
output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
try:
base_dir = resolve_output_dir(config)
except Exception:
base_dir = first_file.parent
output_path = Path(base_dir) / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -217,61 +220,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
merged_tags: List[str] = [f"title:{output_path.stem}"]
# Create .tag sidecar file for the merged output using unified API
tags_path = output_path.with_suffix(output_path.suffix + '.tag')
try:
# Merge tags from source files using metadata API
if source_tags and HAS_METADATA_API:
# Use dedup function to normalize and deduplicate
merged_source_tags = dedup_tags_by_namespace(source_tags)
merged_tags.extend(merged_source_tags)
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
elif source_tags:
# Fallback: simple deduplication if metadata API unavailable
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
# Write merged tags to sidecar file
if HAS_METADATA_API and write_tags_to_file:
# Use unified API for file writing
source_hashes_list = source_hashes if source_hashes else None
source_url_list = source_url if source_url else None
write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_url_list)
else:
# Fallback: manual file writing
tags_lines = []
# Add hash first (if available)
if source_hashes:
tags_lines.append(f"hash:{source_hashes[0]}")
# Add regular tags
tags_lines.extend(merged_tags)
# Add known url
if source_url:
for url in source_url:
tags_lines.append(f"url:{url}")
# Add relationships (if available)
if source_relationships:
for rel in source_relationships:
tags_lines.append(f"relationship:{rel}")
with open(tags_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(tags_lines) + '\n')
log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
# Also create .metadata file using centralized function
try:
if HAS_METADATA_API and write_metadata:
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
# Merge tags from source files into the emitted PipeObject only.
# Sidecar files (.tag/.metadata) are written only during explicit filesystem export (add-file to a path).
if source_tags and HAS_METADATA_API:
merged_source_tags = dedup_tags_by_namespace(source_tags)
merged_tags.extend(merged_source_tags)
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
elif source_tags:
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
try:
@@ -287,6 +243,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tag=merged_tags,
url=source_url,
media_kind=file_kind,
store="PATH",
)
# Clear previous results to ensure only the merged file is passed down
ctx.clear_last_result()
@@ -424,6 +381,33 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
current_time_ms += int(duration_sec * 1000)
# If these came from a playlist/album, titles often look like:
# "Book Name - Chapter"
# If *all* titles share the same "Book Name" prefix, strip it.
if len(chapters) >= 2:
split_re = _re.compile(r"^(?P<prefix>.+?)\s+-\s+(?P<chapter>.+)$")
prefixes: List[str] = []
stripped_titles: List[str] = []
all_match = True
for ch in chapters:
raw_title = str(ch.get('title') or '').strip()
m = split_re.match(raw_title)
if not m:
all_match = False
break
prefix = m.group('prefix').strip()
chapter_title = m.group('chapter').strip()
if not prefix or not chapter_title:
all_match = False
break
prefixes.append(prefix.casefold())
stripped_titles.append(chapter_title)
if all_match and prefixes and len(set(prefixes)) == 1:
for idx, ch in enumerate(chapters):
ch['title'] = stripped_titles[idx]
logger.info(f"[merge-file] Stripped common title prefix for chapters: {prefixes[0]}")
# Step 2: Create concat demuxer file
concat_file = output.parent / f".concat_{output.stem}.txt"