d
This commit is contained in:
@@ -23,6 +23,7 @@ import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
@@ -274,17 +275,28 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]
|
||||
|
||||
if opts.clip_sections:
|
||||
sections = []
|
||||
for section_range in opts.clip_sections.split(','):
|
||||
sections: List[str] = []
|
||||
|
||||
def _secs_to_hms(seconds: float) -> str:
|
||||
total = max(0, int(seconds))
|
||||
minutes, secs = divmod(total, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
|
||||
for section_range in str(opts.clip_sections).split(","):
|
||||
section_range = section_range.strip()
|
||||
if not section_range:
|
||||
continue
|
||||
try:
|
||||
start_s, end_s = [int(x) for x in section_range.split('-')]
|
||||
def _secs_to_hms(s: int) -> str:
|
||||
minutes, seconds = divmod(s, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
start_s_raw, end_s_raw = section_range.split("-", 1)
|
||||
start_s = float(start_s_raw.strip())
|
||||
end_s = float(end_s_raw.strip())
|
||||
if start_s >= end_s:
|
||||
continue
|
||||
sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
continue
|
||||
|
||||
if sections:
|
||||
base_options["download_sections"] = sections
|
||||
debug(f"Download sections configured: {', '.join(sections)}")
|
||||
@@ -1150,6 +1162,7 @@ class Download_Media(Cmdlet):
|
||||
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
|
||||
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
|
||||
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
|
||||
SharedArgs.PATH
|
||||
],
|
||||
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
|
||||
exec=self.run,
|
||||
@@ -1215,11 +1228,11 @@ class Download_Media(Cmdlet):
|
||||
|
||||
mode = "audio" if parsed.get("audio") else "video"
|
||||
|
||||
# Parse clip range if specified
|
||||
clip_range = None
|
||||
# Parse clip range(s) if specified
|
||||
clip_ranges: Optional[List[tuple[int, int]]] = None
|
||||
if clip_spec:
|
||||
clip_range = self._parse_time_range(clip_spec)
|
||||
if not clip_range:
|
||||
clip_ranges = self._parse_time_ranges(str(clip_spec))
|
||||
if not clip_ranges:
|
||||
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
@@ -1738,7 +1751,7 @@ class Download_Media(Cmdlet):
|
||||
|
||||
# Download each URL
|
||||
downloaded_count = 0
|
||||
clip_sections_spec = self._build_clip_sections_spec(clip_range)
|
||||
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
|
||||
|
||||
for url in supported_url:
|
||||
try:
|
||||
@@ -1789,15 +1802,58 @@ class Download_Media(Cmdlet):
|
||||
result_obj = _download_with_timeout(opts, timeout_seconds=300)
|
||||
debug(f"Download completed, building pipe object...")
|
||||
|
||||
# Emit one PipeObject per downloaded file (playlists/albums return a list)
|
||||
results_to_emit = result_obj if isinstance(result_obj, list) else [result_obj]
|
||||
debug(f"Emitting {len(results_to_emit)} result(s) to pipeline...")
|
||||
# Expand result set:
|
||||
# - playlists return a list
|
||||
# - section clips return a single DownloadMediaResult with `paths` populated
|
||||
results_to_emit: List[Any] = []
|
||||
if isinstance(result_obj, list):
|
||||
results_to_emit = list(result_obj)
|
||||
else:
|
||||
paths = getattr(result_obj, "paths", None)
|
||||
if isinstance(paths, list) and paths:
|
||||
# Create one DownloadMediaResult per section file
|
||||
for p in paths:
|
||||
try:
|
||||
p_path = Path(p)
|
||||
except Exception:
|
||||
continue
|
||||
if not p_path.exists() or p_path.is_dir():
|
||||
continue
|
||||
try:
|
||||
hv = sha256_file(p_path)
|
||||
except Exception:
|
||||
hv = None
|
||||
results_to_emit.append(
|
||||
DownloadMediaResult(
|
||||
path=p_path,
|
||||
info=getattr(result_obj, "info", {}) or {},
|
||||
tag=list(getattr(result_obj, "tag", []) or []),
|
||||
source_url=getattr(result_obj, "source_url", None) or opts.url,
|
||||
hash_value=hv,
|
||||
)
|
||||
)
|
||||
else:
|
||||
results_to_emit = [result_obj]
|
||||
|
||||
# Build PipeObjects first so we can attach cross-clip relationships.
|
||||
pipe_objects: List[Dict[str, Any]] = []
|
||||
for downloaded in results_to_emit:
|
||||
pipe_objects.append(self._build_pipe_object(downloaded, url, opts))
|
||||
|
||||
# If this is a clip download, decorate titles/tags so the title: tag is clip-based.
|
||||
# Relationship tags are only added when multiple clips exist.
|
||||
try:
|
||||
if clip_ranges and len(pipe_objects) == len(clip_ranges):
|
||||
source_hash = self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
|
||||
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
|
||||
|
||||
stage_ctx = pipeline_context.get_stage_context()
|
||||
emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
|
||||
for downloaded in results_to_emit:
|
||||
pipe_obj_dict = self._build_pipe_object(downloaded, url, opts)
|
||||
|
||||
for pipe_obj_dict in pipe_objects:
|
||||
# Only emit when there is a downstream stage.
|
||||
# This keeps `download-media` from producing a result table when run standalone.
|
||||
if emit_enabled:
|
||||
@@ -1808,7 +1864,7 @@ class Download_Media(Cmdlet):
|
||||
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
||||
register_url_with_local_library(pipe_obj, config)
|
||||
|
||||
downloaded_count += len(results_to_emit)
|
||||
downloaded_count += len(pipe_objects)
|
||||
debug("✓ Downloaded and emitted")
|
||||
|
||||
except DownloadError as e:
|
||||
@@ -1828,62 +1884,120 @@ class Download_Media(Cmdlet):
|
||||
return 1
|
||||
|
||||
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
||||
"""Resolve the output directory from storage location or config."""
|
||||
storage_location = parsed.get("storage")
|
||||
"""Resolve the output directory.
|
||||
|
||||
# Priority 1: --storage flag
|
||||
if storage_location:
|
||||
Rules:
|
||||
- If user passes `-path`, use that directory (override).
|
||||
- Otherwise default to a temp directory (config["temp"] if present, else OS temp).
|
||||
"""
|
||||
|
||||
# Priority 1: explicit output directory override
|
||||
path_override = parsed.get("path")
|
||||
if path_override:
|
||||
try:
|
||||
return SharedArgs.resolve_storage(storage_location)
|
||||
candidate = Path(str(path_override)).expanduser()
|
||||
# If user passed a file path, treat its parent as output dir.
|
||||
if candidate.suffix:
|
||||
candidate = candidate.parent
|
||||
candidate.mkdir(parents=True, exist_ok=True)
|
||||
debug(f"Using output directory override: {candidate}")
|
||||
return candidate
|
||||
except Exception as e:
|
||||
log(f"Invalid storage location: {e}", file=sys.stderr)
|
||||
log(f"Invalid -path output directory: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Priority 2: Config default output/temp directory
|
||||
# Priority 2: config-provided temp/output directory
|
||||
try:
|
||||
from config import resolve_output_dir
|
||||
final_output_dir = resolve_output_dir(config)
|
||||
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
|
||||
except Exception:
|
||||
final_output_dir = Path.home() / "Videos"
|
||||
|
||||
debug(f"Using default directory: {final_output_dir}")
|
||||
temp_value = None
|
||||
if temp_value:
|
||||
try:
|
||||
candidate = Path(str(temp_value)).expanduser()
|
||||
candidate.mkdir(parents=True, exist_ok=True)
|
||||
debug(f"Using config temp directory: {candidate}")
|
||||
return candidate
|
||||
except Exception as e:
|
||||
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Priority 3: OS temp fallback
|
||||
try:
|
||||
final_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
|
||||
candidate.mkdir(parents=True, exist_ok=True)
|
||||
debug(f"Using OS temp directory: {candidate}")
|
||||
return candidate
|
||||
except Exception as e:
|
||||
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
||||
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return final_output_dir
|
||||
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
|
||||
"""Parse clip specs into a list of (start_seconds, end_seconds).
|
||||
|
||||
def _parse_time_range(self, spec: str) -> Optional[tuple]:
|
||||
"""Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds)."""
|
||||
try:
|
||||
parts = spec.split("-")
|
||||
if len(parts) != 2:
|
||||
Supported inputs:
|
||||
- "MM:SS-MM:SS"
|
||||
- "HH:MM:SS-HH:MM:SS"
|
||||
- seconds: "280-300"
|
||||
- multiple ranges separated by commas: "4:40-5:00,5:15-5:25"
|
||||
"""
|
||||
|
||||
def _to_seconds(ts: str) -> Optional[int]:
|
||||
ts = str(ts).strip()
|
||||
if not ts:
|
||||
return None
|
||||
|
||||
def to_seconds(ts: str) -> int:
|
||||
ts = ts.strip()
|
||||
if ":" in ts:
|
||||
mm, ss = ts.split(":")
|
||||
return int(mm) * 60 + int(ss)
|
||||
return int(ts)
|
||||
|
||||
start = to_seconds(parts[0])
|
||||
end = to_seconds(parts[1])
|
||||
return (start, end) if start < end else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if ":" in ts:
|
||||
parts = [p.strip() for p in ts.split(":")]
|
||||
if len(parts) == 2:
|
||||
hh_s = "0"
|
||||
mm_s, ss_s = parts
|
||||
elif len(parts) == 3:
|
||||
hh_s, mm_s, ss_s = parts
|
||||
else:
|
||||
return None
|
||||
|
||||
try:
|
||||
hours = int(hh_s)
|
||||
minutes = int(mm_s)
|
||||
seconds = float(ss_s)
|
||||
total = (hours * 3600) + (minutes * 60) + seconds
|
||||
return int(total)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
return int(float(ts))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
ranges: List[tuple[int, int]] = []
|
||||
if not spec:
|
||||
return ranges
|
||||
|
||||
for piece in str(spec).split(","):
|
||||
piece = piece.strip()
|
||||
if not piece:
|
||||
continue
|
||||
if "-" not in piece:
|
||||
return []
|
||||
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
|
||||
start = _to_seconds(start_s)
|
||||
end = _to_seconds(end_s)
|
||||
if start is None or end is None or start >= end:
|
||||
return []
|
||||
ranges.append((start, end))
|
||||
|
||||
return ranges
|
||||
|
||||
def _build_clip_sections_spec(
|
||||
self,
|
||||
clip_range: Optional[tuple],
|
||||
clip_ranges: Optional[List[tuple[int, int]]],
|
||||
) -> Optional[str]:
|
||||
"""Convert parsed clip range into downloader spec (seconds)."""
|
||||
ranges: List[str] = []
|
||||
if clip_range:
|
||||
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
|
||||
if clip_ranges:
|
||||
for start_s, end_s in clip_ranges:
|
||||
ranges.append(f"{start_s}-{end_s}")
|
||||
return ",".join(ranges) if ranges else None
|
||||
|
||||
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
|
||||
@@ -1926,6 +2040,159 @@ class Download_Media(Cmdlet):
|
||||
"media_kind": "video" if opts.mode == "video" else "audio",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
||||
if not value or not isinstance(value, str):
|
||||
return None
|
||||
candidate = value.strip().lower()
|
||||
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
|
||||
if not isinstance(hit, dict):
|
||||
return None
|
||||
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
|
||||
v = hit.get(key)
|
||||
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
|
||||
if normalized:
|
||||
return normalized
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _find_existing_hash_for_url(
|
||||
cls,
|
||||
storage: Any,
|
||||
canonical_url: str,
|
||||
*,
|
||||
hydrus_available: bool,
|
||||
) -> Optional[str]:
|
||||
"""Best-effort lookup of an existing stored item hash by url:<canonical_url>.
|
||||
|
||||
Used to make the stored source video the king for multi-clip relationships.
|
||||
"""
|
||||
if storage is None or not canonical_url:
|
||||
return None
|
||||
try:
|
||||
from Store.HydrusNetwork import HydrusNetwork
|
||||
except Exception:
|
||||
HydrusNetwork = None # type: ignore
|
||||
|
||||
try:
|
||||
backend_names = list(storage.list_searchable_backends() or [])
|
||||
except Exception:
|
||||
backend_names = []
|
||||
|
||||
for backend_name in backend_names:
|
||||
try:
|
||||
backend = storage[backend_name]
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
if str(backend_name).strip().lower() == "temp":
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
hits = backend.search(f"url:{canonical_url}", limit=5) or []
|
||||
except Exception:
|
||||
hits = []
|
||||
for hit in hits:
|
||||
extracted = cls._extract_hash_from_search_hit(hit)
|
||||
if extracted:
|
||||
return extracted
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
|
||||
total = max(0, int(seconds))
|
||||
minutes, secs = divmod(total, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
if force_hours:
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
return f"{minutes:02d}:{secs:02d}"
|
||||
|
||||
@classmethod
|
||||
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
|
||||
force_hours = bool(start_s >= 3600 or end_s >= 3600)
|
||||
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
|
||||
|
||||
@classmethod
|
||||
def _apply_clip_decorations(
|
||||
cls,
|
||||
pipe_objects: List[Dict[str, Any]],
|
||||
clip_ranges: List[tuple[int, int]],
|
||||
*,
|
||||
source_king_hash: Optional[str],
|
||||
) -> None:
|
||||
"""Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads.
|
||||
|
||||
- Sets the clip title (and title: tag) to exactly `clip:{range}`.
|
||||
- Adds `clip:{range}` tag.
|
||||
- Sets `relationships` on each emitted item (king hash first, then alt hashes)
|
||||
so downstream can persist relationships into a DB/API without storing relationship tags.
|
||||
"""
|
||||
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
|
||||
return
|
||||
|
||||
# Always apply clip titles/tags (even for a single clip).
|
||||
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
|
||||
clip_range = cls._format_clip_range(start_s, end_s)
|
||||
clip_tag = f"clip:{clip_range}"
|
||||
|
||||
# Title: make it generic/consistent for clips.
|
||||
po["title"] = clip_tag
|
||||
|
||||
tags = po.get("tag")
|
||||
if not isinstance(tags, list):
|
||||
tags = []
|
||||
|
||||
# Replace any existing title: tags with title:<clip_tag>
|
||||
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
|
||||
|
||||
# Relationships must not be stored as tags.
|
||||
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
|
||||
tags.insert(0, f"title:{clip_tag}")
|
||||
|
||||
# Ensure clip tag exists
|
||||
if clip_tag not in tags:
|
||||
tags.append(clip_tag)
|
||||
|
||||
po["tag"] = tags
|
||||
|
||||
# Relationship tagging only makes sense when multiple clips exist.
|
||||
if len(pipe_objects) < 2:
|
||||
return
|
||||
|
||||
hashes: List[str] = []
|
||||
for po in pipe_objects:
|
||||
h = cls._normalise_hash_hex(str(po.get("hash") or ""))
|
||||
hashes.append(h or "")
|
||||
|
||||
# Determine king: prefer an existing source video hash if present; else first clip becomes king.
|
||||
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
|
||||
if not king_hash:
|
||||
king_hash = hashes[0] if hashes and hashes[0] else None
|
||||
if not king_hash:
|
||||
return
|
||||
|
||||
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
|
||||
if not alt_hashes:
|
||||
return
|
||||
|
||||
# Carry relationship metadata through the pipeline without using tags.
|
||||
rel_payload = {"king": [king_hash], "alt": list(alt_hashes)}
|
||||
for po in pipe_objects:
|
||||
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
|
||||
|
||||
def _compute_file_hash(self, filepath: Path) -> str:
|
||||
"""Compute SHA256 hash of a file."""
|
||||
import hashlib
|
||||
|
||||
Reference in New Issue
Block a user