This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -23,6 +23,7 @@ import re
import string
import subprocess
import sys
import tempfile
import time
import traceback
from typing import Any, Dict, Iterator, List, Optional
@@ -274,17 +275,28 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]
if opts.clip_sections:
sections = []
for section_range in opts.clip_sections.split(','):
sections: List[str] = []
def _secs_to_hms(seconds: float) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
for section_range in str(opts.clip_sections).split(","):
section_range = section_range.strip()
if not section_range:
continue
try:
start_s, end_s = [int(x) for x in section_range.split('-')]
def _secs_to_hms(s: int) -> str:
minutes, seconds = divmod(s, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
start_s_raw, end_s_raw = section_range.split("-", 1)
start_s = float(start_s_raw.strip())
end_s = float(end_s_raw.strip())
if start_s >= end_s:
continue
sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
except (ValueError, AttributeError):
pass
continue
if sections:
base_options["download_sections"] = sections
debug(f"Download sections configured: {', '.join(sections)}")
@@ -1150,6 +1162,7 @@ class Download_Media(Cmdlet):
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
SharedArgs.PATH
],
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
exec=self.run,
@@ -1215,11 +1228,11 @@ class Download_Media(Cmdlet):
mode = "audio" if parsed.get("audio") else "video"
# Parse clip range if specified
clip_range = None
# Parse clip range(s) if specified
clip_ranges: Optional[List[tuple[int, int]]] = None
if clip_spec:
clip_range = self._parse_time_range(clip_spec)
if not clip_range:
clip_ranges = self._parse_time_ranges(str(clip_spec))
if not clip_ranges:
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
return 1
@@ -1738,7 +1751,7 @@ class Download_Media(Cmdlet):
# Download each URL
downloaded_count = 0
clip_sections_spec = self._build_clip_sections_spec(clip_range)
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
for url in supported_url:
try:
@@ -1789,15 +1802,58 @@ class Download_Media(Cmdlet):
result_obj = _download_with_timeout(opts, timeout_seconds=300)
debug(f"Download completed, building pipe object...")
# Emit one PipeObject per downloaded file (playlists/albums return a list)
results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj]
debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...")
# Expand result set:
# - playlists return a list
# - section clips return a single DownloadMediaResult with `paths` populated
results_to_emit: List[Any] = []
if isinstance(result_obj, list):
results_to_emit = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
# Create one DownloadMediaResult per section file
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
results_to_emit.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or opts.url,
hash_value=hv,
)
)
else:
results_to_emit = [result_obj]
# Build PipeObjects first so we can attach cross-clip relationships.
pipe_objects: List[Dict[str, Any]] = []
for downloaded in results_to_emit:
pipe_objects.append(self._build_pipe_object(downloaded, url, opts))
# If this is a clip download, decorate titles/tags so the title: tag is clip-based.
# Relationship tags are only added when multiple clips exist.
try:
if clip_ranges and len(pipe_objects) == len(clip_ranges):
source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
except Exception:
pass
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
stage_ctx = pipeline_context.get_stage_context()
emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
for downloaded in results_to_emit:
pipe_obj_dict = self._build_pipe_object(downloaded, url, opts)
for pipe_obj_dict in pipe_objects:
# Only emit when there is a downstream stage.
# This keeps `download-media` from producing a result table when run standalone.
if emit_enabled:
@@ -1808,7 +1864,7 @@ class Download_Media(Cmdlet):
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
register_url_with_local_library(pipe_obj, config)
downloaded_count += len(results_to_emit)
downloaded_count += len(pipe_objects)
debug("✓ Downloaded and emitted")
except DownloadError as e:
@@ -1828,62 +1884,120 @@ class Download_Media(Cmdlet):
return 1
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
"""Resolve the output directory from storage location or config."""
storage_location = parsed.get("storage")
"""Resolve the output directory.
# Priority 1: --storage flag
if storage_location:
Rules:
- If user passes `-path`, use that directory (override).
- Otherwise default to a temp directory (config["temp"] if present, else OS temp).
"""
# Priority 1: explicit output directory override
path_override = parsed.get("path")
if path_override:
try:
return SharedArgs.resolve_storage(storage_location)
candidate = Path(str(path_override)).expanduser()
# If user passed a file path, treat its parent as output dir.
if candidate.suffix:
candidate = candidate.parent
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using output directory override: {candidate}")
return candidate
except Exception as e:
log(f"Invalid storage location: {e}", file=sys.stderr)
log(f"Invalid -path output directory: {e}", file=sys.stderr)
return None
# Priority 2: Config default output/temp directory
# Priority 2: config-provided temp/output directory
try:
from config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
except Exception:
final_output_dir = Path.home() / "Videos"
debug(f"Using default directory: {final_output_dir}")
temp_value = None
if temp_value:
try:
candidate = Path(str(temp_value)).expanduser()
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using config temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
return None
# Priority 3: OS temp fallback
try:
final_output_dir.mkdir(parents=True, exist_ok=True)
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using OS temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
return None
return final_output_dir
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
"""Parse clip specs into a list of (start_seconds, end_seconds).
def _parse_time_range(self, spec: str) -> Optional[tuple]:
"""Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds)."""
try:
parts = spec.split("-")
if len(parts) != 2:
Supported inputs:
- "MM:SS-MM:SS"
- "HH:MM:SS-HH:MM:SS"
- seconds: "280-300"
- multiple ranges separated by commas: "4:40-5:00,5:15-5:25"
"""
def _to_seconds(ts: str) -> Optional[int]:
ts = str(ts).strip()
if not ts:
return None
def to_seconds(ts: str) -> int:
ts = ts.strip()
if ":" in ts:
mm, ss = ts.split(":")
return int(mm) * 60 + int(ss)
return int(ts)
start = to_seconds(parts[0])
end = to_seconds(parts[1])
return (start, end) if start < end else None
except Exception:
return None
if ":" in ts:
parts = [p.strip() for p in ts.split(":")]
if len(parts) == 2:
hh_s = "0"
mm_s, ss_s = parts
elif len(parts) == 3:
hh_s, mm_s, ss_s = parts
else:
return None
try:
hours = int(hh_s)
minutes = int(mm_s)
seconds = float(ss_s)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
try:
return int(float(ts))
except Exception:
return None
ranges: List[tuple[int, int]] = []
if not spec:
return ranges
for piece in str(spec).split(","):
piece = piece.strip()
if not piece:
continue
if "-" not in piece:
return []
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
start = _to_seconds(start_s)
end = _to_seconds(end_s)
if start is None or end is None or start >= end:
return []
ranges.append((start, end))
return ranges
def _build_clip_sections_spec(
self,
clip_range: Optional[tuple],
clip_ranges: Optional[List[tuple[int, int]]],
) -> Optional[str]:
"""Convert parsed clip range into downloader spec (seconds)."""
ranges: List[str] = []
if clip_range:
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
if clip_ranges:
for start_s, end_s in clip_ranges:
ranges.append(f"{start_s}-{end_s}")
return ",".join(ranges) if ranges else None
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
@@ -1926,6 +2040,159 @@ class Download_Media(Cmdlet):
"media_kind": "video" if opts.mode == "video" else "audio",
}
@staticmethod
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
candidate = value.strip().lower()
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
return candidate
return None
@classmethod
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
if not isinstance(hit, dict):
return None
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
v = hit.get(key)
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
if normalized:
return normalized
return None
@classmethod
def _find_existing_hash_for_url(
cls,
storage: Any,
canonical_url: str,
*,
hydrus_available: bool,
) -> Optional[str]:
"""Best-effort lookup of an existing stored item hash by url:<canonical_url>.
Used to make the stored source video the king for multi-clip relationships.
"""
if storage is None or not canonical_url:
return None
try:
from Store.HydrusNetwork import HydrusNetwork
except Exception:
HydrusNetwork = None # type: ignore
try:
backend_names = list(storage.list_searchable_backends() or [])
except Exception:
backend_names = []
for backend_name in backend_names:
try:
backend = storage[backend_name]
except Exception:
continue
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
except Exception:
pass
try:
hits = backend.search(f"url:{canonical_url}", limit=5) or []
except Exception:
hits = []
for hit in hits:
extracted = cls._extract_hash_from_search_hit(hit)
if extracted:
return extracted
return None
@staticmethod
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
if force_hours:
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
return f"{minutes:02d}:{secs:02d}"
@classmethod
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
force_hours = bool(start_s >= 3600 or end_s >= 3600)
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
@classmethod
def _apply_clip_decorations(
cls,
pipe_objects: List[Dict[str, Any]],
clip_ranges: List[tuple[int, int]],
*,
source_king_hash: Optional[str],
) -> None:
"""Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads.
- Sets the clip title (and title: tag) to exactly `clip:{range}`.
- Adds `clip:{range}` tag.
- Sets `relationships` on each emitted item (king hash first, then alt hashes)
so downstream can persist relationships into a DB/API without storing relationship tags.
"""
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
return
# Always apply clip titles/tags (even for a single clip).
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
clip_range = cls._format_clip_range(start_s, end_s)
clip_tag = f"clip:{clip_range}"
# Title: make it generic/consistent for clips.
po["title"] = clip_tag
tags = po.get("tag")
if not isinstance(tags, list):
tags = []
# Replace any existing title: tags with title:<clip_tag>
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
# Relationships must not be stored as tags.
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
tags.insert(0, f"title:{clip_tag}")
# Ensure clip tag exists
if clip_tag not in tags:
tags.append(clip_tag)
po["tag"] = tags
# Relationship tagging only makes sense when multiple clips exist.
if len(pipe_objects) < 2:
return
hashes: List[str] = []
for po in pipe_objects:
h = cls._normalise_hash_hex(str(po.get("hash") or ""))
hashes.append(h or "")
# Determine king: prefer an existing source video hash if present; else first clip becomes king.
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
if not king_hash:
king_hash = hashes[0] if hashes and hashes[0] else None
if not king_hash:
return
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
if not alt_hashes:
return
# Carry relationship metadata through the pipeline without using tags.
rel_payload = {"king": [king_hash], "alt": list(alt_hashes)}
for po in pipe_objects:
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib