dfdsf
This commit is contained in:
195
SYS/background_notifier.py
Normal file
195
SYS/background_notifier.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""Lightweight console notifier for background WorkerManager tasks.
|
||||
|
||||
Registers a refresh callback on WorkerManager and prints concise updates when
|
||||
workers start, progress, or finish. Intended for CLI background workflows.
|
||||
|
||||
Filters to show only workers related to the current pipeline session to avoid
|
||||
cluttering the terminal with workers from previous sessions.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, Optional, Set
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
class BackgroundNotifier:
|
||||
"""Simple notifier that prints worker status changes for a session."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> None:
|
||||
self.manager = manager
|
||||
self.output = output
|
||||
self.session_worker_ids = session_worker_ids if session_worker_ids is not None else set()
|
||||
self.only_terminal_updates = only_terminal_updates
|
||||
self.overlay_mode = overlay_mode
|
||||
self._filter_enabled = session_worker_ids is not None
|
||||
self._last_state: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
self.manager.add_refresh_callback(self._on_refresh)
|
||||
self.manager.start_auto_refresh()
|
||||
except Exception as exc: # pragma: no cover - best effort
|
||||
debug(f"[notifier] Could not attach refresh callback: {exc}")
|
||||
|
||||
def _render_line(self, worker: Dict[str, Any]) -> Optional[str]:
|
||||
# Use worker_id (the actual worker ID we set) for filtering and display
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
return None
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent")
|
||||
progress = ""
|
||||
if isinstance(progress_val, (int, float)):
|
||||
progress = f" {progress_val:.1f}%"
|
||||
elif progress_val:
|
||||
progress = f" {progress_val}"
|
||||
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
parts = [f"[worker:{worker_id}] {status}{progress}"]
|
||||
if step:
|
||||
parts.append(step)
|
||||
return " - ".join(parts)
|
||||
|
||||
def _on_refresh(self, workers: list[Dict[str, Any]]) -> None:
|
||||
overlay_active_workers = 0
|
||||
|
||||
for worker in workers:
|
||||
# Use worker_id (the actual worker ID we set) for filtering
|
||||
worker_id = str(worker.get("worker_id") or "").strip()
|
||||
if not worker_id:
|
||||
# Fallback to database id if worker_id is not set
|
||||
worker_id = str(worker.get("id") or "").strip()
|
||||
if not worker_id:
|
||||
continue
|
||||
|
||||
# If filtering is enabled, skip workers not in this session
|
||||
if self._filter_enabled and worker_id not in self.session_worker_ids:
|
||||
continue
|
||||
|
||||
status = str(worker.get("status") or "running")
|
||||
|
||||
# Overlay mode: only emit on completion; suppress start/progress spam
|
||||
if self.overlay_mode:
|
||||
if status in ("completed", "finished", "error"):
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# For terminal-only mode, emit once when the worker finishes and skip intermediate updates
|
||||
if self.only_terminal_updates:
|
||||
if status in ("completed", "finished", "error"):
|
||||
if self._last_state.get(worker_id) == status:
|
||||
continue
|
||||
self._last_state[worker_id] = status
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
# Stop tracking this worker after terminal notification
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
# Skip finished workers after showing them once (standard verbose mode)
|
||||
if status in ("completed", "finished", "error"):
|
||||
if worker_id in self._last_state:
|
||||
# Already shown, remove from tracking
|
||||
self._last_state.pop(worker_id, None)
|
||||
self.session_worker_ids.discard(worker_id)
|
||||
continue
|
||||
|
||||
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
|
||||
step = str(worker.get("current_step") or worker.get("description") or "").strip()
|
||||
signature = f"{status}|{progress_val}|{step}"
|
||||
|
||||
if self._last_state.get(worker_id) == signature:
|
||||
continue
|
||||
|
||||
self._last_state[worker_id] = signature
|
||||
line = self._render_line(worker)
|
||||
if line:
|
||||
try:
|
||||
self.output(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self.overlay_mode:
|
||||
try:
|
||||
# If nothing active for this session, clear the overlay text
|
||||
if overlay_active_workers == 0:
|
||||
self.output("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def ensure_background_notifier(
|
||||
manager: Any,
|
||||
output: Callable[[str], None] = log,
|
||||
session_worker_ids: Optional[Set[str]] = None,
|
||||
only_terminal_updates: bool = False,
|
||||
overlay_mode: bool = False,
|
||||
) -> Optional[BackgroundNotifier]:
|
||||
"""Attach a BackgroundNotifier to a WorkerManager if not already present.
|
||||
|
||||
Args:
|
||||
manager: WorkerManager instance
|
||||
output: Function to call for printing updates
|
||||
session_worker_ids: Set of worker IDs belonging to this pipeline session.
|
||||
If None, show all workers. If a set (even empty), only show workers in that set.
|
||||
"""
|
||||
if manager is None:
|
||||
return None
|
||||
|
||||
existing = getattr(manager, "_background_notifier", None)
|
||||
if isinstance(existing, BackgroundNotifier):
|
||||
# Update session IDs if provided
|
||||
if session_worker_ids is not None:
|
||||
existing._filter_enabled = True
|
||||
existing.session_worker_ids.update(session_worker_ids)
|
||||
# Respect the most restrictive setting for terminal-only updates
|
||||
if only_terminal_updates:
|
||||
existing.only_terminal_updates = True
|
||||
# Enable overlay mode if requested later
|
||||
if overlay_mode:
|
||||
existing.overlay_mode = True
|
||||
return existing
|
||||
|
||||
notifier = BackgroundNotifier(
|
||||
manager,
|
||||
output,
|
||||
session_worker_ids=session_worker_ids,
|
||||
only_terminal_updates=only_terminal_updates,
|
||||
overlay_mode=overlay_mode,
|
||||
)
|
||||
try:
|
||||
manager._background_notifier = notifier # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
return notifier
|
||||
767
SYS/download.py
Normal file
767
SYS/download.py
Normal file
@@ -0,0 +1,767 @@
|
||||
"""Download media files using yt-dlp with support for direct file downloads.
|
||||
|
||||
Lean, focused downloader without event infrastructure overhead.
|
||||
- yt-dlp integration for streaming sites
|
||||
- Direct file download fallback for PDFs, images, documents
|
||||
- Tag extraction via metadata.extract_ytdlp_tags()
|
||||
- Logging via helper.logger.log()
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import glob # noqa: F401
|
||||
import hashlib
|
||||
import json # noqa: F401
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.utils import ensure_directory, sha256_file
|
||||
from API.HTTP import HTTPClient
|
||||
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
||||
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
from yt_dlp.extractor import gen_extractors # type: ignore
|
||||
except Exception as exc:
|
||||
yt_dlp = None # type: ignore
|
||||
YTDLP_IMPORT_ERROR = exc
|
||||
else:
|
||||
YTDLP_IMPORT_ERROR = None
|
||||
|
||||
try:
|
||||
from metadata import extract_ytdlp_tags
|
||||
except ImportError:
|
||||
extract_ytdlp_tags = None
|
||||
|
||||
_EXTRACTOR_CACHE: List[Any] | None = None
|
||||
|
||||
|
||||
def _ensure_yt_dlp_ready() -> None:
|
||||
"""Verify yt-dlp is available, raise if not."""
|
||||
if yt_dlp is not None:
|
||||
return
|
||||
detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
|
||||
raise DownloadError(f"yt-dlp module not available: {detail}")
|
||||
|
||||
|
||||
def _progress_callback(status: Dict[str, Any]) -> None:
|
||||
"""Simple progress callback using logger."""
|
||||
event = status.get("status")
|
||||
if event == "downloading":
|
||||
percent = status.get("_percent_str", "?")
|
||||
speed = status.get("_speed_str", "?")
|
||||
eta = status.get("_eta_str", "?")
|
||||
sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ")
|
||||
sys.stdout.flush()
|
||||
elif event == "finished":
|
||||
sys.stdout.write("\r" + " " * 70 + "\r")
|
||||
sys.stdout.flush()
|
||||
debug(f"✓ Download finished: {status.get('filename')}")
|
||||
elif event in ("postprocessing", "processing"):
|
||||
debug(f"Post-processing: {status.get('postprocessor')}")
|
||||
|
||||
|
||||
def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
"""Check if URL is supported by yt-dlp."""
|
||||
if yt_dlp is None:
|
||||
return False
|
||||
global _EXTRACTOR_CACHE
|
||||
if _EXTRACTOR_CACHE is None:
|
||||
try:
|
||||
_EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type]
|
||||
except Exception:
|
||||
_EXTRACTOR_CACHE = []
|
||||
for extractor in _EXTRACTOR_CACHE:
|
||||
try:
|
||||
if not extractor.suitable(url):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
name = getattr(extractor, "IE_NAME", "")
|
||||
if name.lower() == "generic":
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get list of available formats for a URL using yt-dlp."""
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
try:
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 30,
|
||||
}
|
||||
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
if playlist_items:
|
||||
ydl_opts["playlist_items"] = playlist_items
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
debug(f"Fetching format list for: {url}")
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
formats = info.get("formats", [])
|
||||
if not formats:
|
||||
log("No formats available", file=sys.stderr)
|
||||
return None
|
||||
|
||||
result_formats = []
|
||||
for fmt in formats:
|
||||
result_formats.append({
|
||||
"format_id": fmt.get("format_id", ""),
|
||||
"format": fmt.get("format", ""),
|
||||
"ext": fmt.get("ext", ""),
|
||||
"resolution": fmt.get("resolution", ""),
|
||||
"width": fmt.get("width"),
|
||||
"height": fmt.get("height"),
|
||||
"fps": fmt.get("fps"),
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
})
|
||||
|
||||
debug(f"Found {len(result_formats)} available formats")
|
||||
return result_formats
|
||||
|
||||
except Exception as e:
|
||||
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
We need separate files for merge-file, so download each section individually.
|
||||
|
||||
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
|
||||
thinking sections are already downloaded. The title is extracted and stored in tags.
|
||||
|
||||
Returns:
|
||||
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
|
||||
"""
|
||||
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
return "", {}
|
||||
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
first_section_info = None
|
||||
title_from_first = None
|
||||
|
||||
# Download each section separately with unique output template using session ID
|
||||
for section_idx, section in enumerate(sections_list, 1):
|
||||
# Build unique output template for this section using session-based filename
|
||||
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
|
||||
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
||||
output_dir_path = Path(base_outtmpl).parent
|
||||
|
||||
# Use session_id + section index for temp filename
|
||||
# e.g., "/path/{session_id}_1.%(ext)s"
|
||||
filename_tmpl = f"{session_id}_{section_idx}"
|
||||
if base_outtmpl.endswith(".%(ext)s"):
|
||||
filename_tmpl += ".%(ext)s"
|
||||
|
||||
# Use Path to handle separators correctly for the OS
|
||||
section_outtmpl = str(output_dir_path / filename_tmpl)
|
||||
|
||||
# For the first section, extract metadata first (separate call)
|
||||
if section_idx == 1:
|
||||
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
||||
if ytdl_options.get("cookiefile"):
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
metadata_cmd.extend(["--cookies", cookies_path])
|
||||
if ytdl_options.get("noplaylist"):
|
||||
metadata_cmd.append("--no-playlist")
|
||||
metadata_cmd.append(url)
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
if not quiet:
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
if not quiet:
|
||||
debug("Could not parse JSON metadata")
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
|
||||
# Add format
|
||||
if ytdl_options.get("format"):
|
||||
cmd.extend(["-f", ytdl_options["format"]])
|
||||
|
||||
# Add ONLY this section (not all sections)
|
||||
cmd.extend(["--download-sections", section])
|
||||
|
||||
# Add force-keyframes-at-cuts if specified
|
||||
if ytdl_options.get("force_keyframes_at_cuts"):
|
||||
cmd.append("--force-keyframes-at-cuts")
|
||||
|
||||
# Add output template for this section
|
||||
cmd.extend(["-o", section_outtmpl])
|
||||
|
||||
# Add cookies file if present
|
||||
if ytdl_options.get("cookiefile"):
|
||||
# Convert backslashes to forward slashes for better compatibility
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
cmd.extend(["--cookies", cookies_path])
|
||||
|
||||
# Add no-playlist if specified
|
||||
if ytdl_options.get("noplaylist"):
|
||||
cmd.append("--no-playlist")
|
||||
|
||||
# Add the URL
|
||||
cmd.append(url)
|
||||
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
result = subprocess.run(cmd)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
|
||||
except Exception as exc:
|
||||
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
|
||||
|
||||
return session_id, first_section_info or {}
|
||||
|
||||
|
||||
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
"""Build yt-dlp download options."""
|
||||
ensure_directory(opts.output_dir)
|
||||
|
||||
# Build output template
|
||||
# When downloading sections, each section will have .section_N_of_M added by _download_with_sections_via_cli
|
||||
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
|
||||
|
||||
base_options: Dict[str, Any] = {
|
||||
"outtmpl": outtmpl,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 30,
|
||||
"retries": 10,
|
||||
"fragment_retries": 10,
|
||||
"http_chunk_size": 10_485_760,
|
||||
"restrictfilenames": True,
|
||||
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
||||
}
|
||||
|
||||
if opts.cookies_path and opts.cookies_path.is_file():
|
||||
base_options["cookiefile"] = str(opts.cookies_path)
|
||||
else:
|
||||
# Check global cookies file lazily to avoid import cycles
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
base_options["cookiefile"] = global_cookies
|
||||
else:
|
||||
# Fallback to browser cookies
|
||||
base_options["cookiesfrombrowser"] = ("chrome",)
|
||||
|
||||
# Add no-playlist option if specified (for single video from playlist url)
|
||||
if opts.no_playlist:
|
||||
base_options["noplaylist"] = True
|
||||
|
||||
# Configure based on mode
|
||||
if opts.mode == "audio":
|
||||
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
|
||||
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
|
||||
else: # video
|
||||
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
||||
base_options["format_sort"] = [
|
||||
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
|
||||
]
|
||||
|
||||
# Add clip sections if provided (yt-dlp will download only these sections)
|
||||
if opts.clip_sections:
|
||||
# Parse section ranges like "48-65,120-152,196-205" (seconds)
|
||||
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
|
||||
sections = []
|
||||
for section_range in opts.clip_sections.split(','):
|
||||
try:
|
||||
start_str, end_str = section_range.strip().split('-')
|
||||
start_sec = float(start_str)
|
||||
end_sec = float(end_str)
|
||||
|
||||
# Convert seconds to HH:MM:SS format
|
||||
def sec_to_hhmmss(seconds):
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
|
||||
start_time = sec_to_hhmmss(start_sec)
|
||||
end_time = sec_to_hhmmss(end_sec)
|
||||
sections.append(f"*{start_time}-{end_time}")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
if sections:
|
||||
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
|
||||
base_options["download_sections"] = sections
|
||||
debug(f"Download sections configured: {', '.join(sections)}")
|
||||
# Note: Not using --force-keyframes-at-cuts to avoid re-encoding
|
||||
# This may result in less precise cuts but faster downloads
|
||||
|
||||
# Add playlist items selection if provided
|
||||
if opts.playlist_items:
|
||||
base_options["playlist_items"] = opts.playlist_items
|
||||
|
||||
if not opts.quiet:
|
||||
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
||||
return base_options
|
||||
|
||||
|
||||
def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate through download entries, handling playlists."""
|
||||
queue: List[Dict[str, Any]] = [info]
|
||||
seen: set[int] = set()
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
obj_id = id(current)
|
||||
if obj_id in seen:
|
||||
continue
|
||||
seen.add(obj_id)
|
||||
entries = current.get("entries")
|
||||
if isinstance(entries, list):
|
||||
for entry in entries:
|
||||
if isinstance(entry, dict):
|
||||
queue.append(entry)
|
||||
if current.get("requested_downloads") or not entries:
|
||||
yield current
|
||||
|
||||
|
||||
def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
||||
"""Get candidate file paths for downloaded media."""
|
||||
requested = entry.get("requested_downloads")
|
||||
if isinstance(requested, list):
|
||||
for item in requested:
|
||||
if isinstance(item, dict):
|
||||
for key in ("filepath", "_filename", "filename"):
|
||||
value = item.get(key)
|
||||
if value:
|
||||
yield Path(value)
|
||||
for key in ("filepath", "_filename", "filename"):
|
||||
value = entry.get(key)
|
||||
if value:
|
||||
yield Path(value)
|
||||
if entry.get("filename"):
|
||||
yield output_dir / entry["filename"]
|
||||
|
||||
|
||||
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
||||
"""Find downloaded file in yt-dlp metadata."""
|
||||
for entry in _iter_download_entries(info):
|
||||
for candidate in _candidate_paths(entry, output_dir):
|
||||
if candidate.is_file():
|
||||
return entry, candidate
|
||||
if not candidate.is_absolute():
|
||||
resolved = output_dir / candidate
|
||||
if resolved.is_file():
|
||||
return entry, resolved
|
||||
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
|
||||
|
||||
|
||||
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||
"""Extract SHA256 hash from yt-dlp metadata."""
|
||||
for payload in [info] + info.get("entries", []):
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
hashes = payload.get("hashes")
|
||||
if isinstance(hashes, dict):
|
||||
for key in ("sha256", "sha-256", "sha_256"):
|
||||
value = hashes.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip().lower()
|
||||
for key in ("sha256", "sha-256", "sha_256"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip().lower()
|
||||
return None
|
||||
|
||||
|
||||
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
"""Extract the actual download link from LibGen redirect URL.
|
||||
|
||||
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
|
||||
actual mirror url. This follows the redirect chain to get the real file.
|
||||
|
||||
Args:
|
||||
libgen_url: LibGen file.php URL
|
||||
|
||||
Returns:
|
||||
Actual download URL or None if extraction fails
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Check if this is a LibGen URL
|
||||
parsed = urlparse(libgen_url)
|
||||
if 'libgen' not in parsed.netloc.lower():
|
||||
return None
|
||||
|
||||
if '/file.php' not in parsed.path.lower():
|
||||
return None
|
||||
|
||||
# LibGen redirects to actual mirrors, follow redirects to get final URL
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
})
|
||||
|
||||
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
||||
|
||||
# First, get the page and look for direct download link
|
||||
try:
|
||||
response = session.get(libgen_url, timeout=10, allow_redirects=True)
|
||||
final_url = response.url
|
||||
|
||||
# Try to find actual download link in the page
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
# Look for download links - LibGen typically has forms with download buttons
|
||||
# Look for all links and forms that might lead to download
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href')
|
||||
if href and isinstance(href, str):
|
||||
# Look for direct file links or get.php redirects
|
||||
if 'get.php' in href.lower() or href.endswith(('.pdf', '.epub', '.djvu', '.mobi')):
|
||||
download_url = href if href.startswith('http') else urljoin(final_url, href)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
except ImportError:
|
||||
pass # BeautifulSoup not available
|
||||
|
||||
# If we followed redirects successfully, return the final URL
|
||||
# This handles cases where libgen redirects to a direct download mirror
|
||||
if final_url != libgen_url:
|
||||
debug(f"LibGen resolved to mirror: {final_url}")
|
||||
return final_url
|
||||
|
||||
except requests.RequestException as e:
|
||||
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
|
||||
# Try head request as fallback
|
||||
try:
|
||||
response = session.head(libgen_url, allow_redirects=True, timeout=10)
|
||||
if response.url != libgen_url:
|
||||
debug(f"LibGen HEAD resolved to: {response.url}")
|
||||
return response.url
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def _download_direct_file(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
debug_logger: Optional[DebugLogger] = None,
|
||||
quiet: bool = False,
|
||||
) -> DownloadMediaResult:
|
||||
"""Download a direct file (PDF, image, document, etc.) without yt-dlp."""
|
||||
ensure_directory(output_dir)
|
||||
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
import re
|
||||
|
||||
# Extract filename from URL
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
|
||||
# Try to get filename from query parameters first (for LibGen and similar services)
|
||||
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
|
||||
filename = None
|
||||
if parsed_url.query:
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
for param_name in ('filename', 'download', 'file', 'name'):
|
||||
if param_name in query_params and query_params[param_name]:
|
||||
filename = query_params[param_name][0]
|
||||
filename = unquote(filename)
|
||||
break
|
||||
|
||||
# If not found in query params, extract from URL path
|
||||
if not filename or not filename.strip():
|
||||
filename = url_path.split("/")[-1] if url_path else ""
|
||||
filename = unquote(filename)
|
||||
|
||||
# Remove query strings from filename if any
|
||||
if "?" in filename:
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
# Try to get real filename from Content-Disposition header (HEAD request)
|
||||
try:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
# Extract filename from Content-Disposition header
|
||||
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
||||
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
||||
if match:
|
||||
extracted_name = match.group(1) or match.group(2)
|
||||
if extracted_name:
|
||||
filename = unquote(extracted_name)
|
||||
if not quiet:
|
||||
debug(f"Filename from Content-Disposition: {filename}")
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
||||
|
||||
# Fallback if we still don't have a good filename
|
||||
if not filename or "." not in filename:
|
||||
filename = "downloaded_file.bin"
|
||||
|
||||
file_path = output_dir / filename
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
if not quiet:
|
||||
debug(f"Direct download: {filename}")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
downloaded_bytes = [0]
|
||||
total_bytes = [0]
|
||||
last_progress_time = [start_time]
|
||||
|
||||
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
||||
downloaded_bytes[0] = bytes_downloaded
|
||||
total_bytes[0] = content_length
|
||||
|
||||
now = time.time()
|
||||
if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
|
||||
elapsed = now - start_time
|
||||
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
|
||||
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
|
||||
eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
|
||||
|
||||
speed_str = progress_bar.format_bytes(speed) + "/s"
|
||||
minutes, seconds = divmod(int(eta_seconds), 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
|
||||
progress_line = progress_bar.format_progress(
|
||||
percent_str=f"{percent:.1f}%",
|
||||
downloaded=bytes_downloaded,
|
||||
total=content_length,
|
||||
speed_str=speed_str,
|
||||
eta_str=eta_str,
|
||||
)
|
||||
if not quiet:
|
||||
debug(progress_line)
|
||||
last_progress_time[0] = now
|
||||
|
||||
with HTTPClient(timeout=30.0) as client:
|
||||
client.download(url, str(file_path), progress_callback=progress_callback)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
||||
if not quiet:
|
||||
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
||||
|
||||
# For direct file downloads, create minimal info dict without filename as title
|
||||
# This prevents creating duplicate title: tags when filename gets auto-generated
|
||||
# We'll add title back later only if we couldn't extract meaningful tags
|
||||
info = {
|
||||
"id": filename.rsplit(".", 1)[0],
|
||||
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
|
||||
"webpage_url": url,
|
||||
}
|
||||
|
||||
hash_value = None
|
||||
try:
|
||||
hash_value = sha256_file(file_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as e:
|
||||
log(f"Error extracting tags: {e}", file=sys.stderr)
|
||||
|
||||
# Only use filename as a title tag if we couldn't extract any meaningful tags
|
||||
# This prevents duplicate title: tags when the filename could be mistaken for metadata
|
||||
if not any(t.startswith('title:') for t in tags):
|
||||
# Re-extract tags with filename as title only if needed
|
||||
info['title'] = filename
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags = extract_ytdlp_tags(info)
|
||||
except Exception as e:
|
||||
log(f"Error extracting tags with filename: {e}", file=sys.stderr)
|
||||
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"direct-file-downloaded",
|
||||
{"url": url, "path": str(file_path), "hash": hash_value},
|
||||
)
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=file_path,
|
||||
info=info,
|
||||
tags=tags,
|
||||
source_url=url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
|
||||
except (httpx.HTTPError, httpx.RequestError) as exc:
|
||||
log(f"Download error: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{"phase": "direct-file", "url": url, "error": str(exc)},
|
||||
)
|
||||
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
||||
except Exception as exc:
|
||||
log(f"Error downloading file: {exc}", file=sys.stderr)
|
||||
if debug_logger is not None:
|
||||
debug_logger.write_record(
|
||||
"exception",
|
||||
{
|
||||
"phase": "direct-file",
|
||||
"url": url,
|
||||
"error": str(exc),
|
||||
"traceback": traceback.format_exc(),
|
||||
},
|
||||
)
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
|
||||
"""Probe URL to extract metadata WITHOUT downloading.
|
||||
|
||||
Args:
|
||||
url: URL to probe
|
||||
no_playlist: If True, ignore playlists and probe only the single video
|
||||
timeout_seconds: Max seconds to wait for probe (default 15s)
|
||||
|
||||
Returns:
|
||||
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
||||
Returns None if not supported by yt-dlp or on timeout.
|
||||
"""
|
||||
if not is_url_supported_by_ytdlp(url):
|
||||
return None
|
||||
|
||||
# Wrap probe in timeout to prevent hanging on large playlists
|
||||
import threading
|
||||
from typing import cast
|
||||
|
||||
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
||||
|
||||
def _do_probe() -> None:
|
||||
try:
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
assert yt_dlp is not None
|
||||
# Extract info without downloading
|
||||
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||
ydl_opts = {
|
||||
"quiet": True, # Suppress all output
|
||||
"no_warnings": True,
|
||||
"socket_timeout": 10,
|
||||
"retries": 2, # Reduce retries for faster timeout
|
||||
"skip_download": True, # Don't actually download
|
||||
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||
"noprogress": True, # No progress bars
|
||||
}
|
||||
|
||||
# Add cookies if available (lazy import to avoid circular dependency)
|
||||
from hydrus_health_check import get_cookies_file_path # local import
|
||||
|
||||
global_cookies = get_cookies_file_path()
|
||||
if global_cookies:
|
||||
ydl_opts["cookiefile"] = global_cookies
|
||||
|
||||
# Add no_playlist option if specified
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
if not isinstance(info, dict):
|
||||
result_container[0] = None
|
||||
return
|
||||
|
||||
# Extract relevant fields
|
||||
result_container[0] = {
|
||||
"extractor": info.get("extractor", ""),
|
||||
"title": info.get("title", ""),
|
||||
"entries": info.get("entries", []), # Will be populated if playlist
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"description": info.get("description"),
|
||||
"url": url,
|
||||
}
|
||||
except Exception as exc:
|
||||
log(f"Probe error for {url}: {exc}")
|
||||
result_container[1] = exc
|
||||
|
||||
thread = threading.Thread(target=_do_probe, daemon=False)
|
||||
thread.start()
|
||||
thread.join(timeout=timeout_seconds)
|
||||
|
||||
if thread.is_alive():
|
||||
# Probe timed out - return None to fall back to direct download
|
||||
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
||||
return None
|
||||
|
||||
if result_container[1] is not None:
|
||||
# Probe error - return None to proceed anyway
|
||||
return None
|
||||
|
||||
return cast(Optional[Dict[str, Any]], result_container[0])
|
||||
|
||||
|
||||
__all__ = [
|
||||
"is_url_supported_by_ytdlp",
|
||||
"list_formats",
|
||||
"probe_url",
|
||||
"DownloadError",
|
||||
"DownloadOptions",
|
||||
"DownloadMediaResult",
|
||||
]
|
||||
|
||||
180
SYS/file_server.py
Normal file
180
SYS/file_server.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Simple HTTP file server for serving files in web mode."""
|
||||
|
||||
import threading
|
||||
import socket
|
||||
import logging
|
||||
from http.server import HTTPServer, SimpleHTTPRequestHandler
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import mimetypes
|
||||
import urllib.parse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global server instance
|
||||
_file_server: Optional[HTTPServer] = None
|
||||
_server_thread: Optional[threading.Thread] = None
|
||||
_server_port: int = 8001
|
||||
|
||||
|
||||
class FileServerHandler(SimpleHTTPRequestHandler):
|
||||
"""HTTP request handler for file serving."""
|
||||
|
||||
def do_GET(self):
|
||||
"""Handle GET requests."""
|
||||
# Parse the path
|
||||
parsed_path = urllib.parse.urlparse(self.path)
|
||||
file_path = urllib.parse.unquote(parsed_path.path)
|
||||
|
||||
# Remove leading slash
|
||||
if file_path.startswith('/'):
|
||||
file_path = file_path[1:]
|
||||
|
||||
# Decode the file path (it's URL encoded)
|
||||
try:
|
||||
full_path = Path(file_path).resolve()
|
||||
|
||||
# Security check: ensure the path is within allowed directories
|
||||
# For now, allow all paths (can be restricted later)
|
||||
|
||||
if full_path.is_file() and full_path.exists():
|
||||
# Serve the file
|
||||
logger.debug(f"Serving file: {full_path}")
|
||||
|
||||
# Determine content type
|
||||
content_type, _ = mimetypes.guess_type(str(full_path))
|
||||
if content_type is None:
|
||||
content_type = 'application/octet-stream'
|
||||
|
||||
try:
|
||||
with open(full_path, 'rb') as f:
|
||||
file_content = f.read()
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', content_type)
|
||||
self.send_header('Content-Length', str(len(file_content)))
|
||||
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
|
||||
self.end_headers()
|
||||
self.wfile.write(file_content)
|
||||
logger.info(f"Successfully served file: {full_path.name}")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"Error serving file: {e}")
|
||||
self.send_error(500, "Internal server error")
|
||||
return
|
||||
else:
|
||||
logger.warning(f"File not found: {full_path}")
|
||||
self.send_error(404, "File not found")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling request: {e}")
|
||||
self.send_error(400, "Bad request")
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Override to use our logger instead of stderr."""
|
||||
logger.debug(format % args)
|
||||
|
||||
|
||||
def get_local_ip() -> Optional[str]:
|
||||
"""Get the local IP address that's accessible from other devices."""
|
||||
try:
|
||||
# Connect to a remote server to determine local IP
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80))
|
||||
ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return ip
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to determine local IP: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def start_file_server(port: int = 8001) -> Optional[str]:
|
||||
"""Start the HTTP file server.
|
||||
|
||||
Args:
|
||||
port: Port to serve on
|
||||
|
||||
Returns:
|
||||
Server URL if successful, None otherwise
|
||||
"""
|
||||
global _file_server, _server_thread, _server_port
|
||||
|
||||
if _file_server is not None:
|
||||
logger.debug(f"File server already running on port {_server_port}")
|
||||
local_ip = get_local_ip()
|
||||
if local_ip:
|
||||
return f"http://{local_ip}:{_server_port}"
|
||||
return None
|
||||
|
||||
try:
|
||||
_server_port = port
|
||||
|
||||
# Create server
|
||||
server_address = ('', port)
|
||||
_file_server = HTTPServer(server_address, FileServerHandler)
|
||||
|
||||
# Start in daemon thread
|
||||
_server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
|
||||
_server_thread.start()
|
||||
|
||||
logger.info(f"File server started on port {port}")
|
||||
|
||||
# Get local IP
|
||||
local_ip = get_local_ip()
|
||||
if local_ip:
|
||||
server_url = f"http://{local_ip}:{port}"
|
||||
logger.info(f"File server accessible at: {server_url}")
|
||||
return server_url
|
||||
else:
|
||||
logger.warning("Could not determine local IP")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start file server: {e}")
|
||||
_file_server = None
|
||||
_server_thread = None
|
||||
return None
|
||||
|
||||
|
||||
def stop_file_server():
|
||||
"""Stop the HTTP file server."""
|
||||
global _file_server, _server_thread
|
||||
|
||||
if _file_server is not None:
|
||||
try:
|
||||
_file_server.shutdown()
|
||||
_file_server.server_close()
|
||||
logger.info("File server stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping file server: {e}")
|
||||
finally:
|
||||
_file_server = None
|
||||
_server_thread = None
|
||||
|
||||
|
||||
def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
|
||||
"""Get the HTTP URL for a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
server_url: Base server URL (gets determined if None)
|
||||
|
||||
Returns:
|
||||
HTTP URL to the file, or None if server not running
|
||||
"""
|
||||
if not file_path.exists():
|
||||
logger.warning(f"File does not exist: {file_path}")
|
||||
return None
|
||||
|
||||
if server_url is None:
|
||||
local_ip = get_local_ip()
|
||||
if not local_ip:
|
||||
logger.error("Cannot determine local IP for file URL")
|
||||
return None
|
||||
server_url = f"http://{local_ip}:{_server_port}"
|
||||
|
||||
# URL encode the file path
|
||||
encoded_path = urllib.parse.quote(str(file_path.resolve()))
|
||||
return f"{server_url}/{encoded_path}"
|
||||
104
SYS/logger.py
Normal file
104
SYS/logger.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Unified logging utility for automatic file and function name tracking."""
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
_DEBUG_ENABLED = False
|
||||
_thread_local = threading.local()
|
||||
|
||||
def set_thread_stream(stream):
|
||||
"""Set a custom output stream for the current thread."""
|
||||
_thread_local.stream = stream
|
||||
|
||||
def get_thread_stream():
|
||||
"""Get the custom output stream for the current thread, if any."""
|
||||
return getattr(_thread_local, 'stream', None)
|
||||
|
||||
def set_debug(enabled: bool) -> None:
|
||||
"""Enable or disable debug logging."""
|
||||
global _DEBUG_ENABLED
|
||||
_DEBUG_ENABLED = enabled
|
||||
|
||||
def is_debug_enabled() -> bool:
|
||||
"""Check if debug logging is enabled."""
|
||||
return _DEBUG_ENABLED
|
||||
|
||||
def debug(*args, **kwargs) -> None:
|
||||
"""Print debug message if debug logging is enabled.
|
||||
|
||||
Automatically prepends [filename.function_name] to all output.
|
||||
"""
|
||||
if not _DEBUG_ENABLED:
|
||||
return
|
||||
|
||||
# Check if stderr has been redirected to /dev/null (quiet mode)
|
||||
# If so, skip output to avoid queuing in background worker's capture
|
||||
try:
|
||||
stderr_name = getattr(sys.stderr, 'name', '')
|
||||
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stderr for debug messages
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stderr
|
||||
|
||||
# Prepend DEBUG label
|
||||
args = ("DEBUG:", *args)
|
||||
|
||||
# Use the same logic as log()
|
||||
log(*args, **kwargs)
|
||||
|
||||
def log(*args, **kwargs) -> None:
|
||||
"""Print with automatic file.function prefix.
|
||||
|
||||
Automatically prepends [filename.function_name] to all output.
|
||||
Defaults to stdout if not specified.
|
||||
|
||||
Example:
|
||||
log("Upload started") # Output: [add_file.run] Upload started
|
||||
"""
|
||||
# When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
|
||||
add_prefix = _DEBUG_ENABLED
|
||||
|
||||
# Get the calling frame
|
||||
frame = inspect.currentframe()
|
||||
if frame is None:
|
||||
print(*args, **kwargs)
|
||||
return
|
||||
|
||||
caller_frame = frame.f_back
|
||||
if caller_frame is None:
|
||||
print(*args, **kwargs)
|
||||
return
|
||||
|
||||
try:
|
||||
# Get file name without extension
|
||||
file_name = Path(caller_frame.f_code.co_filename).stem
|
||||
|
||||
# Get function name
|
||||
func_name = caller_frame.f_code.co_name
|
||||
|
||||
# Check for thread-local stream first
|
||||
stream = get_thread_stream()
|
||||
if stream:
|
||||
kwargs['file'] = stream
|
||||
# Set default to stdout if not specified
|
||||
elif 'file' not in kwargs:
|
||||
kwargs['file'] = sys.stdout
|
||||
|
||||
if add_prefix:
|
||||
prefix = f"[{file_name}.{func_name}]"
|
||||
print(prefix, *args, **kwargs)
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
finally:
|
||||
del frame
|
||||
del caller_frame
|
||||
102
SYS/progress.py
Normal file
102
SYS/progress.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Text-based progress bar utilities for consistent display across all downloads."""
|
||||
|
||||
import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
|
||||
"""Create a text-based progress bar.
|
||||
|
||||
Args:
|
||||
current: Current progress (bytes/items)
|
||||
total: Total to complete (bytes/items)
|
||||
width: Width of the bar in characters (default 40)
|
||||
label: Optional label prefix
|
||||
|
||||
Returns:
|
||||
Formatted progress bar string
|
||||
|
||||
Examples:
|
||||
format_progress_bar(50, 100)
|
||||
# Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
|
||||
|
||||
format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
|
||||
# Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
|
||||
"""
|
||||
if total <= 0:
|
||||
percentage = 0
|
||||
filled = 0
|
||||
else:
|
||||
percentage = (current / total) * 100
|
||||
filled = int((current / total) * width)
|
||||
|
||||
bar = "█" * filled + "░" * (width - filled)
|
||||
pct_str = f"{percentage:.1f}%"
|
||||
|
||||
if label:
|
||||
result = f"{label}: [{bar}] {pct_str}"
|
||||
else:
|
||||
result = f"[{bar}] {pct_str}"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def format_size(bytes_val: float) -> str:
|
||||
"""Format bytes to human-readable size."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||
if bytes_val < 1024:
|
||||
return f"{bytes_val:.2f} {unit}"
|
||||
bytes_val /= 1024
|
||||
return f"{bytes_val:.2f} PB"
|
||||
|
||||
|
||||
def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
|
||||
"""Format download status with progress bar and details."""
|
||||
bar = format_progress_bar(current, total, width=30)
|
||||
size_current = format_size(current)
|
||||
size_total = format_size(total)
|
||||
|
||||
if speed > 0:
|
||||
speed_str = f" @ {format_size(speed)}/s"
|
||||
else:
|
||||
speed_str = ""
|
||||
|
||||
return f"{bar} ({size_current} / {size_total}{speed_str})"
|
||||
|
||||
|
||||
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
|
||||
"""Print download progress to stderr (doesn't interfere with piped output)."""
|
||||
status = format_download_status(filename, current, total, speed)
|
||||
debug(status, end=end, flush=True)
|
||||
|
||||
|
||||
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
|
||||
"""Print final progress line (100%) with time elapsed."""
|
||||
bar = format_progress_bar(total, total, width=30)
|
||||
size_str = format_size(total)
|
||||
|
||||
if elapsed < 60:
|
||||
time_str = f"{elapsed:.1f}s"
|
||||
elif elapsed < 3600:
|
||||
minutes = elapsed / 60
|
||||
time_str = f"{minutes:.1f}m"
|
||||
else:
|
||||
hours = elapsed / 3600
|
||||
time_str = f"{hours:.2f}h"
|
||||
|
||||
debug(f"{bar} ({size_str}) - {time_str}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
log("Progress Bar Demo:", file=sys.stderr)
|
||||
|
||||
for i in range(101):
|
||||
print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
|
||||
time.sleep(0.02)
|
||||
|
||||
print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
|
||||
log()
|
||||
155
SYS/tasks.py
Normal file
155
SYS/tasks.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""Background task handling and IPC helpers for mpv integration."""
|
||||
from __future__ import annotations
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
import threading
|
||||
import time
|
||||
from typing import IO, Iterable
|
||||
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
|
||||
"""Connect to the mpv IPC server located at *path*."""
|
||||
deadline = time.time() + timeout
|
||||
if not path:
|
||||
return None
|
||||
if os.name == 'nt':
|
||||
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
|
||||
while True:
|
||||
try:
|
||||
return open(path, 'r+b', buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc: # Pipe busy
|
||||
if exc.errno not in (errno.ENOENT, errno.EPIPE, errno.EBUSY):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
sock = socket.socket(socket.AF_UNIX)
|
||||
while True:
|
||||
try:
|
||||
sock.connect(path)
|
||||
return sock.makefile('r+b', buffering=0)
|
||||
except FileNotFoundError:
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
except OSError as exc:
|
||||
if exc.errno not in (errno.ENOENT, errno.ECONNREFUSED):
|
||||
raise
|
||||
if time.time() > deadline:
|
||||
return None
|
||||
time.sleep(0.05)
|
||||
def ipc_sender(ipc: IO[bytes] | None):
|
||||
"""Create a helper function for sending script messages via IPC."""
|
||||
if ipc is None:
|
||||
def _noop(_event: str, _payload: dict) -> None:
|
||||
return None
|
||||
return _noop
|
||||
lock = threading.Lock()
|
||||
def _send(event: str, payload: dict) -> None:
|
||||
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
|
||||
encoded = message.encode('utf-8') + b'\n'
|
||||
with lock:
|
||||
try:
|
||||
ipc.write(encoded)
|
||||
ipc.flush()
|
||||
except OSError:
|
||||
pass
|
||||
return _send
|
||||
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
|
||||
for raw in stream:
|
||||
yield raw.rstrip('\r\n')
|
||||
def _run_task(args, parser) -> int:
|
||||
if not args.command:
|
||||
parser.error('run-task requires a command to execute (use "--" before the command).')
|
||||
env = os.environ.copy()
|
||||
for entry in args.env:
|
||||
key, sep, value = entry.partition('=')
|
||||
if not sep:
|
||||
parser.error(f'Invalid environment variable definition: {entry!r}')
|
||||
env[key] = value
|
||||
command = list(args.command)
|
||||
if command and command[0] == '--':
|
||||
command.pop(0)
|
||||
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
|
||||
if not command:
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'error',
|
||||
'message': 'No command provided after separator',
|
||||
})
|
||||
log('[downlow.py] No command provided for run-task', file=sys.stderr)
|
||||
return 1
|
||||
if command and isinstance(command[0], str) and sys.executable:
|
||||
first = command[0].lower()
|
||||
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
|
||||
command[0] = sys.executable
|
||||
if os.environ.get('DOWNLOW_DEBUG'):
|
||||
log(f"Launching command: {command}", file=sys.stderr)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'start',
|
||||
'command': command,
|
||||
'cwd': args.cwd or os.getcwd(),
|
||||
})
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=args.cwd or None,
|
||||
env=env,
|
||||
text=True,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'error',
|
||||
'message': f'Executable not found: {exc.filename}',
|
||||
})
|
||||
log(f"{exc}", file=sys.stderr)
|
||||
return 1
|
||||
stdout_lines: list[str] = []
|
||||
stderr_lines: list[str] = []
|
||||
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
|
||||
for line in iter_stream(stream):
|
||||
sink.append(line)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': label,
|
||||
'line': line,
|
||||
})
|
||||
threads = []
|
||||
if process.stdout:
|
||||
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
|
||||
t_out.start()
|
||||
threads.append(t_out)
|
||||
if process.stderr:
|
||||
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
|
||||
t_err.start()
|
||||
threads.append(t_err)
|
||||
return_code = process.wait()
|
||||
for t in threads:
|
||||
t.join(timeout=0.1)
|
||||
notifier('downlow-task-event', {
|
||||
'id': args.task_id,
|
||||
'event': 'exit',
|
||||
'returncode': return_code,
|
||||
'success': return_code == 0,
|
||||
})
|
||||
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
|
||||
if stdout_lines:
|
||||
log('\n'.join(stdout_lines))
|
||||
if stderr_lines:
|
||||
log('\n'.join(stderr_lines), file=sys.stderr)
|
||||
return return_code
|
||||
492
SYS/utils.py
Normal file
492
SYS/utils.py
Normal file
@@ -0,0 +1,492 @@
|
||||
"""General-purpose helpers used across the downlow CLI."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import ffmpeg
|
||||
import base64
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass, field
|
||||
from fnmatch import fnmatch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import SYS.utils_constant
|
||||
|
||||
try:
|
||||
import cbor2
|
||||
except ImportError:
|
||||
cbor2 = None # type: ignore
|
||||
|
||||
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
||||
_format_logger = logging.getLogger(__name__)
|
||||
def ensure_directory(path: Path) -> None:
|
||||
"""Ensure *path* exists as a directory."""
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc: # pragma: no cover - surfaced to caller
|
||||
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
|
||||
def unique_path(path: Path) -> Path:
|
||||
"""Return a unique path by appending " (n)" if needed."""
|
||||
if not path.exists():
|
||||
return path
|
||||
stem = path.stem
|
||||
suffix = path.suffix
|
||||
parent = path.parent
|
||||
counter = 1
|
||||
while True:
|
||||
candidate = parent / f"{stem} ({counter}){suffix}"
|
||||
if not candidate.exists():
|
||||
return candidate
|
||||
counter += 1
|
||||
|
||||
def sanitize_metadata_value(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if not isinstance(value, str):
|
||||
value = str(value)
|
||||
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
|
||||
if not value:
|
||||
return None
|
||||
return value
|
||||
def unique_preserve_order(values: Iterable[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
ordered: list[str] = []
|
||||
for value in values:
|
||||
if value not in seen:
|
||||
seen.add(value)
|
||||
ordered.append(value)
|
||||
return ordered
|
||||
def sha256_file(file_path: Path) -> str:
|
||||
"""Return the SHA-256 hex digest of *path*."""
|
||||
hasher = hashlib.sha256()
|
||||
with file_path.open('rb') as handle:
|
||||
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
|
||||
hasher.update(chunk)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
|
||||
"""Create a .metadata sidecar file with JSON metadata.
|
||||
|
||||
The metadata dict should contain title. If not present, it will be derived from
|
||||
the filename. This ensures the .metadata file can be matched during batch import.
|
||||
|
||||
Args:
|
||||
file_path: Path to the exported file
|
||||
metadata: Dictionary of metadata to save
|
||||
"""
|
||||
if not metadata:
|
||||
return
|
||||
file_name = file_path.stem
|
||||
file_ext = file_path.suffix.lower()
|
||||
# Ensure metadata has a title field that matches the filename (without extension)
|
||||
# This allows the sidecar to be matched and imported properly during batch import
|
||||
if 'title' not in metadata or not metadata.get('title'):
|
||||
metadata['title'] = file_name
|
||||
metadata['hash'] = sha256_file(file_path)
|
||||
metadata['size'] = Path(file_path).stat().st_size
|
||||
format_found = False
|
||||
for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
|
||||
for key, info in ext_map.items():
|
||||
if info.get("ext") == file_ext:
|
||||
metadata['type'] = mime_type
|
||||
format_found = True
|
||||
break
|
||||
if format_found:
|
||||
break
|
||||
else:
|
||||
metadata['type'] = 'unknown'
|
||||
metadata.update(ffprobe(str(file_path)))
|
||||
|
||||
|
||||
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
|
||||
try:
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=2)
|
||||
except OSError as exc:
|
||||
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
|
||||
|
||||
def create_tags_sidecar(file_path: Path, tags: set) -> None:
|
||||
"""Create a .tags sidecar file with tags (one per line).
|
||||
|
||||
Args:
|
||||
file_path: Path to the exported file
|
||||
tags: Set of tag strings
|
||||
"""
|
||||
if not tags:
|
||||
return
|
||||
|
||||
tags_path = file_path.with_suffix(file_path.suffix + '.tags')
|
||||
try:
|
||||
with open(tags_path, 'w', encoding='utf-8') as f:
|
||||
for tag in sorted(tags):
|
||||
f.write(f"{tag}\n")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
|
||||
|
||||
|
||||
def ffprobe(file_path: str) -> dict:
|
||||
probe = ffmpeg.probe(file_path)
|
||||
metadata = {}
|
||||
|
||||
# Format-level info
|
||||
fmt = probe.get("format", {})
|
||||
metadata["duration"] = float(fmt.get("duration", 0)) if "duration" in fmt else None
|
||||
metadata["size"] = int(fmt.get("size", 0)) if "size" in fmt else None
|
||||
metadata["format_name"] = fmt.get("format_name", None)
|
||||
|
||||
# Stream-level info
|
||||
for stream in probe.get("streams", []):
|
||||
codec_type = stream.get("codec_type")
|
||||
if codec_type == "audio":
|
||||
metadata["audio_codec"] = stream.get("codec_name")
|
||||
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
|
||||
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
|
||||
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
|
||||
elif codec_type == "video":
|
||||
metadata["video_codec"] = stream.get("codec_name")
|
||||
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||
elif codec_type == "image":
|
||||
metadata["image_codec"] = stream.get("codec_name")
|
||||
metadata["width"] = int(stream.get("width", 0)) if "width" in stream else None
|
||||
metadata["height"] = int(stream.get("height", 0)) if "height" in stream else None
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# CBOR Utilities - Consolidated from cbor.py
|
||||
# ============================================================================
|
||||
"""CBOR utilities backed by the `cbor2` library."""
|
||||
|
||||
|
||||
def decode_cbor(data: bytes) -> Any:
|
||||
"""Decode *data* from CBOR into native Python objects."""
|
||||
if not data:
|
||||
return None
|
||||
if cbor2 is None:
|
||||
raise ImportError("cbor2 library is required for CBOR decoding")
|
||||
return cbor2.loads(data)
|
||||
|
||||
|
||||
def jsonify(value: Any) -> Any:
|
||||
"""Convert *value* into a JSON-friendly structure."""
|
||||
if isinstance(value, dict):
|
||||
return {str(key): jsonify(val) for key, val in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [jsonify(item) for item in value]
|
||||
if isinstance(value, bytes):
|
||||
return {"__bytes__": base64.b64encode(value).decode("ascii")}
|
||||
return value
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Format Utilities - Consolidated from format_utils.py
|
||||
# ============================================================================
|
||||
"""Formatting utilities for displaying metadata consistently across the application."""
|
||||
|
||||
|
||||
def format_bytes(bytes_value) -> str:
|
||||
"""Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
|
||||
|
||||
Args:
|
||||
bytes_value: Size in bytes (int or float)
|
||||
|
||||
Returns:
|
||||
Formatted string like '1.5 MB' or '756 MB'
|
||||
"""
|
||||
if bytes_value is None or bytes_value <= 0:
|
||||
return "0 B"
|
||||
|
||||
if isinstance(bytes_value, (int, float)):
|
||||
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||
if bytes_value < 1024:
|
||||
if unit == "B":
|
||||
return f"{int(bytes_value)} {unit}"
|
||||
return f"{bytes_value:.1f} {unit}"
|
||||
bytes_value /= 1024
|
||||
return f"{bytes_value:.1f} PB"
|
||||
return str(bytes_value)
|
||||
|
||||
|
||||
def format_duration(seconds) -> str:
|
||||
"""Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
|
||||
|
||||
Args:
|
||||
seconds: Duration in seconds (int or float)
|
||||
|
||||
Returns:
|
||||
Formatted string like '1:23:45' or '5:30'
|
||||
"""
|
||||
if seconds is None or seconds == '':
|
||||
return "N/A"
|
||||
|
||||
if isinstance(seconds, str):
|
||||
try:
|
||||
seconds = float(seconds)
|
||||
except ValueError:
|
||||
return str(seconds)
|
||||
|
||||
if not isinstance(seconds, (int, float)):
|
||||
return str(seconds)
|
||||
|
||||
total_seconds = int(seconds)
|
||||
if total_seconds < 0:
|
||||
return "N/A"
|
||||
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
secs = total_seconds % 60
|
||||
|
||||
if hours > 0:
|
||||
return f"{hours}:{minutes:02d}:{secs:02d}"
|
||||
elif minutes > 0:
|
||||
return f"{minutes}:{secs:02d}"
|
||||
else:
|
||||
return f"{secs}s"
|
||||
|
||||
|
||||
def format_timestamp(timestamp_str) -> str:
|
||||
"""Format ISO timestamp to readable format.
|
||||
|
||||
Args:
|
||||
timestamp_str: ISO format timestamp string or None
|
||||
|
||||
Returns:
|
||||
Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
|
||||
"""
|
||||
if not timestamp_str:
|
||||
return "N/A"
|
||||
|
||||
try:
|
||||
# Handle ISO format timestamps
|
||||
if isinstance(timestamp_str, str):
|
||||
# Try parsing ISO format
|
||||
if 'T' in timestamp_str:
|
||||
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
|
||||
else:
|
||||
# Try other common formats
|
||||
dt = datetime.fromisoformat(timestamp_str)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception as e:
|
||||
_format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
|
||||
|
||||
return str(timestamp_str)
|
||||
|
||||
|
||||
def format_metadata_value(key: str, value) -> str:
|
||||
"""Format a metadata value based on its key for display.
|
||||
|
||||
This is the central formatting rule for all metadata display.
|
||||
|
||||
Args:
|
||||
key: Metadata field name
|
||||
value: Value to format
|
||||
|
||||
Returns:
|
||||
Formatted string for display
|
||||
"""
|
||||
if value is None or value == '':
|
||||
return "N/A"
|
||||
|
||||
# Apply field-specific formatting
|
||||
if key in ('size', 'file_size'):
|
||||
return format_bytes(value)
|
||||
elif key in ('duration', 'length'):
|
||||
return format_duration(value)
|
||||
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
|
||||
return format_timestamp(value)
|
||||
else:
|
||||
return str(value)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Link Utilities - Consolidated from link_utils.py
|
||||
# ============================================================================
|
||||
"""Link utilities - Extract and process url from various sources."""
|
||||
|
||||
|
||||
def extract_link_from_args(args: Iterable[str]) -> Any | None:
|
||||
"""Extract HTTP/HTTPS URL from command arguments.
|
||||
|
||||
Args:
|
||||
args: Command arguments
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
args_list = list(args) if not isinstance(args, (list, tuple)) else args
|
||||
if not args_list or len(args_list) == 0:
|
||||
return None
|
||||
|
||||
potential_link = str(args_list[0])
|
||||
if potential_link.startswith(('http://', 'https://')):
|
||||
return potential_link
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_link_from_result(result: Any) -> Any | None:
|
||||
"""Extract URL from a result object (dict or object with attributes).
|
||||
|
||||
Args:
|
||||
result: Result object from pipeline (dict or object)
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
if isinstance(result, dict):
|
||||
return result.get('url') or result.get('link') or result.get('href')
|
||||
|
||||
return (
|
||||
getattr(result, 'url', None) or
|
||||
getattr(result, 'link', None) or
|
||||
getattr(result, 'href', None)
|
||||
)
|
||||
|
||||
|
||||
def extract_link(result: Any, args: Iterable[str]) -> Any | None:
|
||||
"""Extract link from args or result (args take priority).
|
||||
|
||||
Args:
|
||||
result: Pipeline result object
|
||||
args: Command arguments
|
||||
|
||||
Returns:
|
||||
URL string if found, None otherwise
|
||||
"""
|
||||
# Try args first
|
||||
link = extract_link_from_args(args)
|
||||
if link:
|
||||
return link
|
||||
|
||||
# Fall back to result
|
||||
return extract_link_from_result(result)
|
||||
|
||||
|
||||
def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
|
||||
"""Get API key from config with fallback support.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
service: Service name for logging
|
||||
key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
|
||||
|
||||
Returns:
|
||||
API key if found and not empty, None otherwise
|
||||
"""
|
||||
try:
|
||||
parts = key_path.split('.')
|
||||
value = config
|
||||
for part in parts:
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part)
|
||||
else:
|
||||
return None
|
||||
|
||||
if isinstance(value, str):
|
||||
return value.strip() or None
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
|
||||
"""Add direct link information to result object.
|
||||
|
||||
Args:
|
||||
result: Result object to modify (dict or object)
|
||||
direct_link: The unlocked/direct URL
|
||||
original_link: The original restricted URL
|
||||
"""
|
||||
if isinstance(result, dict):
|
||||
result['direct_link'] = direct_link
|
||||
result['original_link'] = original_link
|
||||
else:
|
||||
setattr(result, 'direct_link', direct_link)
|
||||
setattr(result, 'original_link', original_link)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# URL Policy Resolution - Consolidated from url_parser.py
|
||||
# ============================================================================
|
||||
"""URL policy resolution for downlow workflows."""
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class UrlPolicy:
|
||||
"""Describe how a URL should be handled by download and screenshot flows."""
|
||||
|
||||
skip_download: bool = False
|
||||
skip_metadata: bool = False
|
||||
force_screenshot: bool = False
|
||||
extra_tags: list[str] = field(default_factory=list)
|
||||
|
||||
def apply_tags(self, sources: Iterable[str]) -> list[str]:
|
||||
tags = [tag.strip() for tag in self.extra_tags if tag and tag.strip()]
|
||||
for value in sources:
|
||||
text = str(value).strip()
|
||||
if text:
|
||||
tags.append(text)
|
||||
return tags
|
||||
|
||||
|
||||
def _normalise_rule(rule: dict[str, Any]) -> dict[str, Any] | None:
|
||||
pattern = str(rule.get("pattern") or rule.get("host") or "").strip()
|
||||
if not pattern:
|
||||
return None
|
||||
skip_download = bool(rule.get("skip_download"))
|
||||
skip_metadata = bool(rule.get("skip_metadata"))
|
||||
force_screenshot = bool(rule.get("force_screenshot"))
|
||||
extra_tags_raw = rule.get("extra_tags")
|
||||
if isinstance(extra_tags_raw, str):
|
||||
extra_tags = [part.strip() for part in extra_tags_raw.split(",") if part.strip()]
|
||||
elif isinstance(extra_tags_raw, (list, tuple, set)):
|
||||
extra_tags = [str(item).strip() for item in extra_tags_raw if str(item).strip()]
|
||||
else:
|
||||
extra_tags = []
|
||||
return {
|
||||
"pattern": pattern,
|
||||
"skip_download": skip_download,
|
||||
"skip_metadata": skip_metadata,
|
||||
"force_screenshot": force_screenshot,
|
||||
"extra_tags": extra_tags,
|
||||
}
|
||||
|
||||
|
||||
def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
|
||||
policies_raw = config.get("url_policies")
|
||||
if not policies_raw:
|
||||
return UrlPolicy()
|
||||
if not isinstance(policies_raw, list):
|
||||
return UrlPolicy()
|
||||
parsed = urlparse(url)
|
||||
subject = f"{parsed.netloc}{parsed.path}"
|
||||
host = parsed.netloc
|
||||
resolved = UrlPolicy()
|
||||
for rule_raw in policies_raw:
|
||||
if not isinstance(rule_raw, dict):
|
||||
continue
|
||||
rule = _normalise_rule(rule_raw)
|
||||
if rule is None:
|
||||
continue
|
||||
pattern = rule["pattern"]
|
||||
if not (fnmatch(host, pattern) or fnmatch(subject, pattern)):
|
||||
continue
|
||||
if rule["skip_download"]:
|
||||
resolved.skip_download = True
|
||||
if rule["skip_metadata"]:
|
||||
resolved.skip_metadata = True
|
||||
if rule["force_screenshot"]:
|
||||
resolved.force_screenshot = True
|
||||
if rule["extra_tags"]:
|
||||
for tag in rule["extra_tags"]:
|
||||
if tag not in resolved.extra_tags:
|
||||
resolved.extra_tags.append(tag)
|
||||
return resolved
|
||||
100
SYS/utils_constant.py
Normal file
100
SYS/utils_constant.py
Normal file
@@ -0,0 +1,100 @@
|
||||
mime_maps = {
|
||||
"image": {
|
||||
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
|
||||
"png": { "ext": ".png", "mimes": ["image/png"] },
|
||||
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
|
||||
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
|
||||
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
|
||||
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
|
||||
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
|
||||
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
|
||||
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
|
||||
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
|
||||
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
|
||||
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
|
||||
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
|
||||
},
|
||||
"image_sequence": {
|
||||
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
|
||||
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
|
||||
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
|
||||
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
|
||||
},
|
||||
"video": {
|
||||
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
|
||||
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
|
||||
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
|
||||
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
|
||||
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
|
||||
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
|
||||
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
|
||||
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
|
||||
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
|
||||
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
|
||||
},
|
||||
"audio": {
|
||||
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
|
||||
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
|
||||
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
|
||||
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
|
||||
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
|
||||
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
|
||||
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
|
||||
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
|
||||
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
|
||||
},
|
||||
"document": {
|
||||
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
|
||||
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
|
||||
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
|
||||
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
|
||||
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
|
||||
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
|
||||
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
|
||||
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
|
||||
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
|
||||
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
|
||||
},
|
||||
"archive": {
|
||||
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
|
||||
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
|
||||
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
|
||||
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
|
||||
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
|
||||
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
|
||||
},
|
||||
"project": {
|
||||
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
|
||||
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
|
||||
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
|
||||
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
|
||||
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
|
||||
},
|
||||
"other": {
|
||||
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
|
||||
"json": { "ext": ".json", "mimes": ["application/json"] },
|
||||
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
|
||||
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_type_from_ext(ext: str) -> str:
|
||||
"""Determine the type (e.g., 'image', 'video', 'audio') from file extension.
|
||||
|
||||
Args:
|
||||
ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg')
|
||||
|
||||
Returns:
|
||||
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
|
||||
"""
|
||||
if not ext:
|
||||
return 'other'
|
||||
|
||||
ext_clean = ext.lstrip('.').lower()
|
||||
|
||||
for type_name, extensions_dict in mime_maps.items():
|
||||
if ext_clean in extensions_dict:
|
||||
return type_name
|
||||
|
||||
return 'other'
|
||||
671
SYS/worker_manager.py
Normal file
671
SYS/worker_manager.py
Normal file
@@ -0,0 +1,671 @@
|
||||
"""Worker task management with persistent database storage.
|
||||
|
||||
Manages worker tasks for downloads, searches, imports, etc. with automatic
|
||||
persistence to database and optional auto-refresh callbacks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Callable
|
||||
from datetime import datetime
|
||||
from threading import Thread, Lock
|
||||
import time
|
||||
|
||||
from ..API.folder import API_folder_store
|
||||
from SYS.logger import log
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Worker:
|
||||
"""Represents a single worker task with state management."""
|
||||
|
||||
def __init__(self, worker_id: str, worker_type: str, title: str = "",
|
||||
description: str = "", manager: Optional['WorkerManager'] = None):
|
||||
"""Initialize a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for this worker
|
||||
worker_type: Type of work (e.g., 'download', 'search', 'import')
|
||||
title: Human-readable title
|
||||
description: Detailed description
|
||||
manager: Reference to parent WorkerManager for state updates
|
||||
"""
|
||||
self.id = worker_id
|
||||
self.type = worker_type
|
||||
self.title = title or worker_type
|
||||
self.description = description
|
||||
self.manager = manager
|
||||
self.status = "running"
|
||||
self.progress = ""
|
||||
self.details = ""
|
||||
self.error_message = ""
|
||||
self.result = "pending"
|
||||
self._stdout_buffer = []
|
||||
self._steps_buffer = []
|
||||
|
||||
def log_step(self, step_text: str) -> None:
|
||||
"""Log a step for this worker.
|
||||
|
||||
Args:
|
||||
step_text: Text describing the step
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.log_step(self.id, step_text)
|
||||
else:
|
||||
logger.info(f"[{self.id}] {step_text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error logging step for worker {self.id}: {e}")
|
||||
|
||||
def append_stdout(self, text: str) -> None:
|
||||
"""Append text to stdout log.
|
||||
|
||||
Args:
|
||||
text: Text to append
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.append_worker_stdout(self.id, text)
|
||||
else:
|
||||
self._stdout_buffer.append(text)
|
||||
except Exception as e:
|
||||
logger.error(f"Error appending stdout for worker {self.id}: {e}")
|
||||
|
||||
def get_stdout(self) -> str:
|
||||
"""Get all stdout for this worker.
|
||||
|
||||
Returns:
|
||||
Complete stdout text
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
return self.manager.get_stdout(self.id)
|
||||
else:
|
||||
return "\n".join(self._stdout_buffer)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting stdout for worker {self.id}: {e}")
|
||||
return ""
|
||||
|
||||
def get_steps(self) -> str:
|
||||
"""Get all steps for this worker.
|
||||
|
||||
Returns:
|
||||
Complete steps text
|
||||
"""
|
||||
try:
|
||||
if self.manager:
|
||||
return self.manager.get_steps(self.id)
|
||||
else:
|
||||
return "\n".join(self._steps_buffer)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting steps for worker {self.id}: {e}")
|
||||
return ""
|
||||
|
||||
def update_progress(self, progress: str = "", details: str = "") -> None:
|
||||
"""Update worker progress.
|
||||
|
||||
Args:
|
||||
progress: Progress string (e.g., "50%")
|
||||
details: Additional details
|
||||
"""
|
||||
self.progress = progress
|
||||
self.details = details
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.update_worker(self.id, progress, details)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating worker {self.id}: {e}")
|
||||
|
||||
def finish(self, result: str = "completed", message: str = "") -> None:
|
||||
"""Mark worker as finished.
|
||||
|
||||
Args:
|
||||
result: Result status ('completed', 'error', 'cancelled')
|
||||
message: Result message/error details
|
||||
"""
|
||||
self.result = result
|
||||
self.status = "finished"
|
||||
self.error_message = message
|
||||
try:
|
||||
if self.manager:
|
||||
# Flush and disable logging handler before marking finished
|
||||
self.manager.disable_logging_for_worker(self.id)
|
||||
# Then mark as finished in database
|
||||
self.manager.finish_worker(self.id, result, message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error finishing worker {self.id}: {e}")
|
||||
|
||||
|
||||
class WorkerLoggingHandler(logging.StreamHandler):
|
||||
"""Custom logging handler that captures logs for a worker."""
|
||||
|
||||
def __init__(self, worker_id: str, db: API_folder_store,
|
||||
manager: Optional['WorkerManager'] = None,
|
||||
buffer_size: int = 50):
|
||||
"""Initialize the handler.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to capture logs for
|
||||
db: Reference to LocalLibraryDB for storing logs
|
||||
buffer_size: Number of logs to buffer before flushing to DB
|
||||
"""
|
||||
super().__init__()
|
||||
self.worker_id = worker_id
|
||||
self.db = db
|
||||
self.manager = manager
|
||||
self.buffer_size = buffer_size
|
||||
self.buffer = []
|
||||
self._lock = Lock()
|
||||
|
||||
# Set a format that includes timestamp and level
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
self.setFormatter(formatter)
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record."""
|
||||
try:
|
||||
# Try to format the record normally
|
||||
try:
|
||||
msg = self.format(record)
|
||||
except (TypeError, ValueError):
|
||||
# If formatting fails (e.g., %d format with non-int arg),
|
||||
# build message manually without calling getMessage()
|
||||
try:
|
||||
# Try to format with args if possible
|
||||
if record.args:
|
||||
msg = record.msg % record.args
|
||||
else:
|
||||
msg = record.msg
|
||||
except (TypeError, ValueError):
|
||||
# If that fails too, just use the raw message string
|
||||
msg = str(record.msg)
|
||||
|
||||
# Add timestamp and level if not already in message
|
||||
import time
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
|
||||
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
|
||||
|
||||
with self._lock:
|
||||
self.buffer.append(msg)
|
||||
|
||||
# Flush to DB when buffer reaches size
|
||||
if len(self.buffer) >= self.buffer_size:
|
||||
self._flush()
|
||||
except Exception:
|
||||
self.handleError(record)
|
||||
|
||||
def _flush(self):
|
||||
"""Flush buffered logs to database."""
|
||||
if self.buffer:
|
||||
log_text = '\n'.join(self.buffer)
|
||||
try:
|
||||
if self.manager:
|
||||
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||
else:
|
||||
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||||
except Exception as e:
|
||||
# If we can't write to DB, at least log it
|
||||
log(f"Error flushing worker logs: {e}")
|
||||
self.buffer = []
|
||||
|
||||
def flush(self):
|
||||
"""Flush any buffered records."""
|
||||
with self._lock:
|
||||
self._flush()
|
||||
super().flush()
|
||||
|
||||
def close(self):
|
||||
"""Close the handler."""
|
||||
self.flush()
|
||||
super().close()
|
||||
|
||||
|
||||
class WorkerManager:
|
||||
"""Manages persistent worker tasks with auto-refresh capability."""
|
||||
|
||||
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
|
||||
"""Initialize the worker manager.
|
||||
|
||||
Args:
|
||||
library_root: Root directory for the local library database
|
||||
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
|
||||
"""
|
||||
self.library_root = Path(library_root)
|
||||
self.db = API_folder_store(library_root)
|
||||
self.auto_refresh_interval = auto_refresh_interval
|
||||
self.refresh_callbacks: List[Callable] = []
|
||||
self.refresh_thread: Optional[Thread] = None
|
||||
self._stop_refresh = False
|
||||
self._lock = Lock()
|
||||
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
|
||||
self._worker_last_step: Dict[str, str] = {}
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
if self.db:
|
||||
try:
|
||||
self.db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit - close database."""
|
||||
self.close()
|
||||
|
||||
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
|
||||
"""Register a callback to be called on worker updates.
|
||||
|
||||
Args:
|
||||
callback: Function that receives list of active workers
|
||||
"""
|
||||
with self._lock:
|
||||
self.refresh_callbacks.append(callback)
|
||||
|
||||
def expire_running_workers(
|
||||
self,
|
||||
older_than_seconds: int = 300,
|
||||
worker_id_prefix: Optional[str] = None,
|
||||
reason: Optional[str] = None,
|
||||
status: str = "error",
|
||||
) -> int:
|
||||
"""Mark stale running workers as finished.
|
||||
|
||||
Args:
|
||||
older_than_seconds: Idle threshold before expiring.
|
||||
worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
|
||||
reason: Error message if none already exists.
|
||||
status: New status to apply.
|
||||
|
||||
Returns:
|
||||
Count of workers updated.
|
||||
"""
|
||||
try:
|
||||
return self.db.expire_running_workers(
|
||||
older_than_seconds=older_than_seconds,
|
||||
status=status,
|
||||
reason=reason,
|
||||
worker_id_prefix=worker_id_prefix,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def remove_refresh_callback(self, callback: Callable) -> None:
|
||||
"""Remove a refresh callback.
|
||||
|
||||
Args:
|
||||
callback: The callback function to remove
|
||||
"""
|
||||
with self._lock:
|
||||
if callback in self.refresh_callbacks:
|
||||
self.refresh_callbacks.remove(callback)
|
||||
|
||||
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
|
||||
"""Enable logging capture for a worker.
|
||||
|
||||
Creates a logging handler that captures all logs for this worker.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to capture logs for
|
||||
|
||||
Returns:
|
||||
The logging handler that was created, or None if there was an error
|
||||
"""
|
||||
try:
|
||||
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
|
||||
with self._lock:
|
||||
self.worker_handlers[worker_id] = handler
|
||||
|
||||
# Add the handler to the root logger so it captures all logs
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addHandler(handler)
|
||||
root_logger.setLevel(logging.DEBUG) # Capture all levels
|
||||
|
||||
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
|
||||
return handler
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def disable_logging_for_worker(self, worker_id: str) -> None:
|
||||
"""Disable logging capture for a worker and flush any pending logs.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to stop capturing logs for
|
||||
"""
|
||||
try:
|
||||
with self._lock:
|
||||
handler = self.worker_handlers.pop(worker_id, None)
|
||||
|
||||
if handler:
|
||||
# Flush and close the handler
|
||||
handler.flush()
|
||||
handler.close()
|
||||
|
||||
# Remove from root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.removeHandler(handler)
|
||||
|
||||
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
|
||||
|
||||
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
|
||||
description: str = "", total_steps: int = 0,
|
||||
pipe: Optional[str] = None) -> bool:
|
||||
"""Start tracking a new worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
worker_type: Type of worker (e.g., 'download', 'search', 'import')
|
||||
title: Worker title/name
|
||||
description: Worker description
|
||||
total_steps: Total number of steps for progress tracking
|
||||
pipe: Text of the originating pipe/prompt, if any
|
||||
|
||||
Returns:
|
||||
True if worker was inserted successfully
|
||||
"""
|
||||
try:
|
||||
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
|
||||
if result > 0:
|
||||
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
|
||||
self._start_refresh_if_needed()
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
|
||||
details: str = "", error: str = "") -> bool:
|
||||
"""Update worker progress and status.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
progress: Progress percentage (0-100)
|
||||
current_step: Current step description
|
||||
details: Additional details
|
||||
error: Error message if any
|
||||
|
||||
Returns:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
kwargs = {}
|
||||
if progress > 0:
|
||||
kwargs['progress'] = progress
|
||||
if current_step:
|
||||
kwargs['current_step'] = current_step
|
||||
if details:
|
||||
kwargs['description'] = details
|
||||
if error:
|
||||
kwargs['error_message'] = error
|
||||
|
||||
if kwargs:
|
||||
kwargs['last_updated'] = datetime.now().isoformat()
|
||||
if 'current_step' in kwargs and kwargs['current_step']:
|
||||
self._worker_last_step[worker_id] = str(kwargs['current_step'])
|
||||
return self.db.update_worker(worker_id, **kwargs)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def finish_worker(self, worker_id: str, result: str = "completed",
|
||||
error_msg: str = "", result_data: str = "") -> bool:
|
||||
"""Mark a worker as finished.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
result: Result status ('completed', 'error', 'cancelled')
|
||||
error_msg: Error message if any
|
||||
result_data: Result data as JSON string
|
||||
|
||||
Returns:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
kwargs = {
|
||||
'status': result,
|
||||
'completed_at': datetime.now().isoformat()
|
||||
}
|
||||
if error_msg:
|
||||
kwargs['error_message'] = error_msg
|
||||
if result_data:
|
||||
kwargs['result_data'] = result_data
|
||||
|
||||
success = self.db.update_worker(worker_id, **kwargs)
|
||||
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
|
||||
self._worker_last_step.pop(worker_id, None)
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def get_active_workers(self) -> List[Dict[str, Any]]:
|
||||
"""Get all active (running) workers.
|
||||
|
||||
Returns:
|
||||
List of active worker dictionaries
|
||||
"""
|
||||
try:
|
||||
return self.db.get_active_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""Get all finished workers (completed, errored, or cancelled).
|
||||
|
||||
Args:
|
||||
limit: Maximum number of workers to retrieve
|
||||
|
||||
Returns:
|
||||
List of finished worker dictionaries
|
||||
"""
|
||||
try:
|
||||
all_workers = self.db.get_all_workers(limit=limit)
|
||||
# Filter to only finished workers
|
||||
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
|
||||
return finished
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a specific worker's data.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Worker data or None if not found
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
|
||||
"""Fetch recorded worker timeline events."""
|
||||
return self.db.get_worker_events(worker_id, limit)
|
||||
|
||||
def log_step(self, worker_id: str, step_text: str) -> bool:
|
||||
"""Log a step to a worker's step history.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
step_text: Step description to log
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
success = self.db.append_worker_steps(worker_id, step_text)
|
||||
if success:
|
||||
self._worker_last_step[worker_id] = step_text
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _get_last_step(self, worker_id: str) -> Optional[str]:
|
||||
"""Return the most recent step description for a worker."""
|
||||
return self._worker_last_step.get(worker_id)
|
||||
|
||||
def get_steps(self, worker_id: str) -> str:
|
||||
"""Get step logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Steps text or empty string if not found
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker_steps(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
|
||||
return ''
|
||||
|
||||
def start_auto_refresh(self) -> None:
|
||||
"""Start the auto-refresh thread for periodic worker updates."""
|
||||
if self.auto_refresh_interval <= 0:
|
||||
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
|
||||
return
|
||||
|
||||
if self.refresh_thread and self.refresh_thread.is_alive():
|
||||
logger.debug("[WorkerManager] Auto-refresh already running")
|
||||
return
|
||||
|
||||
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
|
||||
self._stop_refresh = False
|
||||
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
|
||||
self.refresh_thread.start()
|
||||
|
||||
def stop_auto_refresh(self) -> None:
|
||||
"""Stop the auto-refresh thread."""
|
||||
logger.info("[WorkerManager] Stopping auto-refresh")
|
||||
self._stop_refresh = True
|
||||
if self.refresh_thread:
|
||||
self.refresh_thread.join(timeout=5)
|
||||
self.refresh_thread = None
|
||||
|
||||
def _start_refresh_if_needed(self) -> None:
|
||||
"""Start auto-refresh if we have active workers and callbacks."""
|
||||
active = self.get_active_workers()
|
||||
if active and self.refresh_callbacks and not self._stop_refresh:
|
||||
self.start_auto_refresh()
|
||||
|
||||
def _auto_refresh_loop(self) -> None:
|
||||
"""Main auto-refresh loop that periodically queries and notifies."""
|
||||
try:
|
||||
while not self._stop_refresh:
|
||||
time.sleep(self.auto_refresh_interval)
|
||||
|
||||
# Check if there are active workers
|
||||
active = self.get_active_workers()
|
||||
|
||||
if not active:
|
||||
# No more active workers, stop refreshing
|
||||
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
|
||||
break
|
||||
|
||||
# Call all registered callbacks with the active workers
|
||||
with self._lock:
|
||||
for callback in self.refresh_callbacks:
|
||||
try:
|
||||
callback(active)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
|
||||
finally:
|
||||
logger.debug("[WorkerManager] Auto-refresh loop ended")
|
||||
|
||||
def cleanup_old_workers(self, days: int = 7) -> int:
|
||||
"""Clean up completed/errored workers older than specified days.
|
||||
|
||||
Args:
|
||||
days: Delete workers completed more than this many days ago
|
||||
|
||||
Returns:
|
||||
Number of workers deleted
|
||||
"""
|
||||
try:
|
||||
count = self.db.cleanup_old_workers(days)
|
||||
if count > 0:
|
||||
logger.info(f"[WorkerManager] Cleaned up {count} old workers")
|
||||
return count
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||
"""Append text to a worker's stdout log.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
text: Text to append
|
||||
channel: Logical channel (stdout, stderr, log, etc.)
|
||||
|
||||
Returns:
|
||||
True if append was successful
|
||||
"""
|
||||
try:
|
||||
step_label = self._get_last_step(worker_id)
|
||||
return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def get_stdout(self, worker_id: str) -> str:
|
||||
"""Get stdout logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
Worker's stdout or empty string
|
||||
"""
|
||||
try:
|
||||
return self.db.get_worker_stdout(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
|
||||
return ""
|
||||
|
||||
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||||
"""Compatibility wrapper for append_stdout."""
|
||||
return self.append_stdout(worker_id, text, channel=channel)
|
||||
|
||||
def clear_stdout(self, worker_id: str) -> bool:
|
||||
"""Clear stdout logs for a worker.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for the worker
|
||||
|
||||
Returns:
|
||||
True if clear was successful
|
||||
"""
|
||||
try:
|
||||
return self.db.clear_worker_stdout(worker_id)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the worker manager and database connection."""
|
||||
self.stop_auto_refresh()
|
||||
self.db.close()
|
||||
logger.info("[WorkerManager] Closed")
|
||||
Reference in New Issue
Block a user