df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions

View File

@@ -6,6 +6,7 @@ workers start, progress, or finish. Intended for CLI background workflows.
Filters to show only workers related to the current pipeline session to avoid
cluttering the terminal with workers from previous sessions.
"""
from __future__ import annotations
from typing import Any, Callable, Dict, Optional, Set
@@ -72,18 +73,20 @@ class BackgroundNotifier:
worker_id = str(worker.get("id") or "").strip()
if not worker_id:
continue
# If filtering is enabled, skip workers not in this session
if self._filter_enabled and worker_id not in self.session_worker_ids:
continue
status = str(worker.get("status") or "running")
# Overlay mode: only emit on completion; suppress start/progress spam
if self.overlay_mode:
if status in ("completed", "finished", "error"):
progress_val = worker.get("progress") or worker.get("progress_percent") or ""
step = str(worker.get("current_step") or worker.get("description") or "").strip()
step = str(
worker.get("current_step") or worker.get("description") or ""
).strip()
signature = f"{status}|{progress_val}|{step}"
if self._last_state.get(worker_id) == signature:
@@ -157,7 +160,7 @@ def ensure_background_notifier(
overlay_mode: bool = False,
) -> Optional[BackgroundNotifier]:
"""Attach a BackgroundNotifier to a WorkerManager if not already present.
Args:
manager: WorkerManager instance
output: Function to call for printing updates

View File

@@ -6,6 +6,7 @@ Lean, focused downloader without event infrastructure overhead.
- Tag extraction via metadata.extract_ytdlp_tags()
- Logging via helper.logger.log()
"""
from __future__ import annotations
import glob # noqa: F401
@@ -97,7 +98,9 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
return False
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
def list_formats(
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
) -> Optional[List[Dict[str, Any]]]:
"""Get list of available formats for a URL using yt-dlp."""
_ensure_yt_dlp_ready()
@@ -125,19 +128,21 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
result_formats = []
for fmt in formats:
result_formats.append({
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
"resolution": fmt.get("resolution", ""),
"width": fmt.get("width"),
"height": fmt.get("height"),
"fps": fmt.get("fps"),
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
})
result_formats.append(
{
"format_id": fmt.get("format_id", ""),
"format": fmt.get("format", ""),
"ext": fmt.get("ext", ""),
"resolution": fmt.get("resolution", ""),
"width": fmt.get("width"),
"height": fmt.get("height"),
"fps": fmt.get("fps"),
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"tbr": fmt.get("tbr"),
}
)
debug(f"Found {len(result_formats)} available formats")
return result_formats
@@ -147,48 +152,50 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
def _download_with_sections_via_cli(
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
) -> tuple[Optional[str], Dict[str, Any]]:
"""Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file.
We need separate files for merge-file, so download each section individually.
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
thinking sections are already downloaded. The title is extracted and stored in tags.
Returns:
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
"""
sections_list = ytdl_options.get("download_sections", [])
if not sections_list:
return "", {}
# Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
title_from_first = None
# Download each section separately with unique output template using session ID
for section_idx, section in enumerate(sections_list, 1):
# Build unique output template for this section using session-based filename
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
output_dir_path = Path(base_outtmpl).parent
# Use session_id + section index for temp filename
# e.g., "/path/{session_id}_1.%(ext)s"
filename_tmpl = f"{session_id}_{section_idx}"
if base_outtmpl.endswith(".%(ext)s"):
filename_tmpl += ".%(ext)s"
# Use Path to handle separators correctly for the OS
section_outtmpl = str(output_dir_path / filename_tmpl)
# For the first section, extract metadata first (separate call)
if section_idx == 1:
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
@@ -198,14 +205,14 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
if ytdl_options.get("noplaylist"):
metadata_cmd.append("--no-playlist")
metadata_cmd.append(url)
try:
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
if meta_result.returncode == 0 and meta_result.stdout:
try:
info_dict = json.loads(meta_result.stdout.strip())
first_section_info = info_dict
title_from_first = info_dict.get('title')
title_from_first = info_dict.get("title")
if not quiet:
debug(f"Extracted title from metadata: {title_from_first}")
except json.JSONDecodeError:
@@ -214,50 +221,54 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
except Exception as e:
if not quiet:
debug(f"Error extracting metadata: {e}")
# Build yt-dlp command for downloading this section
cmd = ["yt-dlp"]
# Add format
if ytdl_options.get("format"):
cmd.extend(["-f", ytdl_options["format"]])
# Add ONLY this section (not all sections)
cmd.extend(["--download-sections", section])
# Add force-keyframes-at-cuts if specified
if ytdl_options.get("force_keyframes_at_cuts"):
cmd.append("--force-keyframes-at-cuts")
# Add output template for this section
cmd.extend(["-o", section_outtmpl])
# Add cookies file if present
if ytdl_options.get("cookiefile"):
# Convert backslashes to forward slashes for better compatibility
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
cmd.extend(["--cookies", cookies_path])
# Add no-playlist if specified
if ytdl_options.get("noplaylist"):
cmd.append("--no-playlist")
# Add the URL
cmd.append(url)
if not quiet:
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
debug(f"Command: {' '.join(cmd)}")
# Run the subprocess - don't capture output so progress is shown
try:
result = subprocess.run(cmd)
if result.returncode != 0:
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
raise DownloadError(
f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}"
)
except Exception as exc:
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
raise DownloadError(
f"yt-dlp subprocess error for section {section_idx}: {exc}"
) from exc
return session_id, first_section_info or {}
@@ -299,33 +310,39 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
else: # video
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
base_options["format_sort"] = [
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
"res:4320",
"res:2880",
"res:2160",
"res:1440",
"res:1080",
"res:720",
"res",
]
# Add clip sections if provided (yt-dlp will download only these sections)
if opts.clip_sections:
# Parse section ranges like "48-65,120-152,196-205" (seconds)
# Parse section ranges like "48-65,120-152,196-205" (seconds)
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
sections = []
for section_range in opts.clip_sections.split(','):
for section_range in opts.clip_sections.split(","):
try:
start_str, end_str = section_range.strip().split('-')
start_str, end_str = section_range.strip().split("-")
start_sec = float(start_str)
end_sec = float(end_str)
# Convert seconds to HH:MM:SS format
def sec_to_hhmmss(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
start_time = sec_to_hhmmss(start_sec)
end_time = sec_to_hhmmss(end_sec)
sections.append(f"*{start_time}-{end_time}")
except (ValueError, AttributeError):
pass
if sections:
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
base_options["download_sections"] = sections
@@ -412,41 +429,41 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
"""Extract the actual download link from LibGen redirect URL.
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
actual mirror url. This follows the redirect chain to get the real file.
Args:
libgen_url: LibGen file.php URL
Returns:
Actual download URL or None if extraction fails
"""
try:
import requests
from urllib.parse import urlparse
# Check if this is a LibGen URL
parsed = urlparse(libgen_url)
if 'libgen' not in parsed.netloc.lower():
if "libgen" not in parsed.netloc.lower():
return None
if '/file.php' not in parsed.path.lower():
if "/file.php" not in parsed.path.lower():
return None
# LibGen redirects to actual mirrors, follow redirects to get final URL
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
session.headers.update(
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
)
debug(f"Following LibGen redirect chain for: {libgen_url}")
# First, get the page and look for direct download link
try:
response = session.get(libgen_url, timeout=10, allow_redirects=True)
final_url = response.url
# Try to find actual download link in the page
try:
try:
@@ -462,8 +479,12 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
else:
@@ -477,19 +498,23 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
if not href or href.lower().startswith("javascript:"):
continue
href_lower = href.lower()
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
download_url = href if href.startswith("http") else urljoin(final_url, href)
if "get.php" in href_lower or href_lower.endswith(
(".pdf", ".epub", ".djvu", ".mobi")
):
download_url = (
href if href.startswith("http") else urljoin(final_url, href)
)
debug(f"Found download link: {download_url}")
return download_url
except Exception:
pass
# If we followed redirects successfully, return the final URL
# This handles cases where libgen redirects to a direct download mirror
if final_url != libgen_url:
debug(f"LibGen resolved to mirror: {final_url}")
return final_url
except requests.RequestException as e:
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
# Try head request as fallback
@@ -500,9 +525,9 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
return response.url
except:
pass
return None
except Exception as e:
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
return None
@@ -521,7 +546,7 @@ def _download_direct_file(
from urllib.parse import unquote, urlparse, parse_qs
import re
def _sanitize_filename(name: str) -> str:
# Windows-safe filename sanitization.
# Keep it simple: strip path parts, drop invalid chars, collapse whitespace.
@@ -563,27 +588,27 @@ def _download_direct_file(
# Extract filename from URL
parsed_url = urlparse(url)
url_path = parsed_url.path
# Try to get filename from query parameters first (for LibGen and similar services)
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
filename = None
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param_name in ('filename', 'download', 'file', 'name'):
for param_name in ("filename", "download", "file", "name"):
if param_name in query_params and query_params[param_name]:
filename = query_params[param_name][0]
filename = unquote(filename)
break
# If not found in query params, extract from URL path
if not filename or not filename.strip():
filename = url_path.split("/")[-1] if url_path else ""
filename = unquote(filename)
# Remove query strings from filename if any
if "?" in filename:
filename = filename.split("?")[0]
# Try to get real filename from Content-Disposition header (HEAD request)
content_type = ""
try:
@@ -627,7 +652,12 @@ def _download_direct_file(
with HTTPClient(timeout=10.0) as client:
with client._request_stream("GET", url, follow_redirects=True) as resp:
resp.raise_for_status()
ct = str(resp.headers.get("content-type", "") or "").split(";", 1)[0].strip().lower()
ct = (
str(resp.headers.get("content-type", "") or "")
.split(";", 1)[0]
.strip()
.lower()
)
if ct.startswith("text/html"):
raise DownloadError("URL appears to be an HTML page, not a direct file")
except DownloadError:
@@ -635,7 +665,7 @@ def _download_direct_file(
except Exception:
# If we can't probe, keep going; later logic may still infer a safe extension.
pass
# Apply suggested filename (from provider title) if given.
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
if suggested:
@@ -683,7 +713,9 @@ def _download_direct_file(
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
if not filename or not str(filename).strip():
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
raise DownloadError(
"Could not determine filename for URL (no Content-Disposition and no path filename)"
)
file_path = _unique_path(output_dir / filename)
@@ -719,12 +751,18 @@ def _download_direct_file(
if transfer_started[0]:
return
try:
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
except Exception:
total_val = None
try:
if hasattr(pipeline_progress, "begin_transfer"):
pipeline_progress.begin_transfer(label=str(filename or "download"), total=total_val)
pipeline_progress.begin_transfer(
label=str(filename or "download"), total=total_val
)
transfer_started[0] = True
except Exception:
return
@@ -737,7 +775,11 @@ def _download_direct_file(
try:
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
_maybe_begin_transfer(content_length)
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
total_val: Optional[int] = (
int(content_length)
if isinstance(content_length, int) and content_length > 0
else None
)
pipeline_progress.update_transfer(
label=str(filename or "download"),
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
@@ -790,14 +832,21 @@ def _download_direct_file(
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
try:
if progress_bar is not None:
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
avg_speed_str = (
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
+ "/s"
)
else:
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
except Exception:
@@ -835,9 +884,9 @@ def _download_direct_file(
# Only use filename as a title tag if we couldn't extract any meaningful tags
# This prevents duplicate title: tags when the filename could be mistaken for metadata
if not any(t.startswith('title:') for t in tags):
if not any(t.startswith("title:") for t in tags):
# Re-extract tags with filename as title only if needed
info['title'] = filename
info["title"] = filename
tags = []
if extract_ytdlp_tags:
try:
@@ -866,7 +915,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -884,7 +937,11 @@ def _download_direct_file(
except Exception:
pass
try:
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
if (
pipeline_progress is not None
and transfer_started[0]
and hasattr(pipeline_progress, "finish_transfer")
):
pipeline_progress.finish_transfer(label=str(filename or "download"))
except Exception:
pass
@@ -902,31 +959,33 @@ def _download_direct_file(
raise DownloadError(f"Error downloading file: {exc}") from exc
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
def probe_url(
url: str, no_playlist: bool = False, timeout_seconds: int = 15
) -> Optional[Dict[str, Any]]:
"""Probe URL to extract metadata WITHOUT downloading.
Args:
url: URL to probe
no_playlist: If True, ignore playlists and probe only the single video
timeout_seconds: Max seconds to wait for probe (default 15s)
Returns:
Dict with keys: extractor, title, entries (if playlist), duration, etc.
Returns None if not supported by yt-dlp or on timeout.
"""
if not is_url_supported_by_ytdlp(url):
return None
# Wrap probe in timeout to prevent hanging on large playlists
import threading
from typing import cast
result_container: List[Optional[Any]] = [None, None] # [result, error]
def _do_probe() -> None:
try:
_ensure_yt_dlp_ready()
assert yt_dlp is not None
# Extract info without downloading
# Use extract_flat='in_playlist' to get full metadata for playlist items
@@ -939,20 +998,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
"noprogress": True, # No progress bars
}
# Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
# Add no_playlist option if specified
if no_playlist:
ydl_opts["noplaylist"] = True
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
info = ydl.extract_info(url, download=False)
if not isinstance(info, dict):
result_container[0] = None
return
# Extract relevant fields
result_container[0] = {
"extractor": info.get("extractor", ""),
@@ -966,20 +1025,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
except Exception as exc:
log(f"Probe error for {url}: {exc}")
result_container[1] = exc
thread = threading.Thread(target=_do_probe, daemon=False)
thread.start()
thread.join(timeout=timeout_seconds)
if thread.is_alive():
# Probe timed out - return None to fall back to direct download
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
return None
if result_container[1] is not None:
# Probe error - return None to proceed anyway
return None
return cast(Optional[Dict[str, Any]], result_container[0])
@@ -991,4 +1050,3 @@ __all__ = [
"DownloadOptions",
"DownloadMediaResult",
]

View File

@@ -8,6 +8,7 @@ It is intentionally lightweight and safe to import early at process
startup so the CLI can detect and surface environment problems before
trying to import cmdlets or other modules.
"""
from __future__ import annotations
import importlib
@@ -99,7 +100,11 @@ def check_urllib3_compat() -> Tuple[bool, str]:
return False, "\n".join(lines)
# Looks good
debug("urllib3 appears usable: version=%s, exceptions=%s", getattr(urllib3, "__version__", "<unknown>"), hasattr(urllib3, "exceptions"))
debug(
"urllib3 appears usable: version=%s, exceptions=%s",
getattr(urllib3, "__version__", "<unknown>"),
hasattr(urllib3, "exceptions"),
)
return True, "OK"

View File

@@ -19,41 +19,43 @@ _server_port: int = 8001
class FileServerHandler(SimpleHTTPRequestHandler):
"""HTTP request handler for file serving."""
def do_GET(self):
"""Handle GET requests."""
# Parse the path
parsed_path = urllib.parse.urlparse(self.path)
file_path = urllib.parse.unquote(parsed_path.path)
# Remove leading slash
if file_path.startswith('/'):
if file_path.startswith("/"):
file_path = file_path[1:]
# Decode the file path (it's URL encoded)
try:
full_path = Path(file_path).resolve()
# Security check: ensure the path is within allowed directories
# For now, allow all paths (can be restricted later)
if full_path.is_file() and full_path.exists():
# Serve the file
logger.debug(f"Serving file: {full_path}")
# Determine content type
content_type, _ = mimetypes.guess_type(str(full_path))
if content_type is None:
content_type = 'application/octet-stream'
content_type = "application/octet-stream"
try:
with open(full_path, 'rb') as f:
with open(full_path, "rb") as f:
file_content = f.read()
self.send_response(200)
self.send_header('Content-type', content_type)
self.send_header('Content-Length', str(len(file_content)))
self.send_header('Content-Disposition', f'attachment; filename="{full_path.name}"')
self.send_header("Content-type", content_type)
self.send_header("Content-Length", str(len(file_content)))
self.send_header(
"Content-Disposition", f'attachment; filename="{full_path.name}"'
)
self.end_headers()
self.wfile.write(file_content)
logger.info(f"Successfully served file: {full_path.name}")
@@ -66,11 +68,11 @@ class FileServerHandler(SimpleHTTPRequestHandler):
logger.warning(f"File not found: {full_path}")
self.send_error(404, "File not found")
return
except Exception as e:
logger.error(f"Error handling request: {e}")
self.send_error(400, "Bad request")
def log_message(self, format, *args):
"""Override to use our logger instead of stderr."""
logger.debug(format % args)
@@ -92,35 +94,35 @@ def get_local_ip() -> Optional[str]:
def start_file_server(port: int = 8001) -> Optional[str]:
"""Start the HTTP file server.
Args:
port: Port to serve on
Returns:
Server URL if successful, None otherwise
"""
global _file_server, _server_thread, _server_port
if _file_server is not None:
logger.debug(f"File server already running on port {_server_port}")
local_ip = get_local_ip()
if local_ip:
return f"http://{local_ip}:{_server_port}"
return None
try:
_server_port = port
# Create server
server_address = ('', port)
server_address = ("", port)
_file_server = HTTPServer(server_address, FileServerHandler)
# Start in daemon thread
_server_thread = threading.Thread(target=_file_server.serve_forever, daemon=True)
_server_thread.start()
logger.info(f"File server started on port {port}")
# Get local IP
local_ip = get_local_ip()
if local_ip:
@@ -130,7 +132,7 @@ def start_file_server(port: int = 8001) -> Optional[str]:
else:
logger.warning("Could not determine local IP")
return None
except Exception as e:
logger.error(f"Failed to start file server: {e}")
_file_server = None
@@ -141,7 +143,7 @@ def start_file_server(port: int = 8001) -> Optional[str]:
def stop_file_server():
"""Stop the HTTP file server."""
global _file_server, _server_thread
if _file_server is not None:
try:
_file_server.shutdown()
@@ -156,25 +158,25 @@ def stop_file_server():
def get_file_url(file_path: Path, server_url: Optional[str] = None) -> Optional[str]:
"""Get the HTTP URL for a file.
Args:
file_path: Path to the file
server_url: Base server URL (gets determined if None)
Returns:
HTTP URL to the file, or None if server not running
"""
if not file_path.exists():
logger.warning(f"File does not exist: {file_path}")
return None
if server_url is None:
local_ip = get_local_ip()
if not local_ip:
logger.error("Cannot determine local IP for file URL")
return None
server_url = f"http://{local_ip}:{_server_port}"
# URL encode the file path
encoded_path = urllib.parse.quote(str(file_path.resolve()))
return f"{server_url}/{encoded_path}"

View File

@@ -10,51 +10,56 @@ from rich_display import console_for
_DEBUG_ENABLED = False
_thread_local = threading.local()
def set_thread_stream(stream):
"""Set a custom output stream for the current thread."""
_thread_local.stream = stream
def get_thread_stream():
"""Get the custom output stream for the current thread, if any."""
return getattr(_thread_local, 'stream', None)
return getattr(_thread_local, "stream", None)
def set_debug(enabled: bool) -> None:
"""Enable or disable debug logging."""
global _DEBUG_ENABLED
_DEBUG_ENABLED = enabled
def is_debug_enabled() -> bool:
"""Check if debug logging is enabled."""
return _DEBUG_ENABLED
def debug(*args, **kwargs) -> None:
"""Print debug message if debug logging is enabled.
Automatically prepends [filename.function_name] to all output.
"""
if not _DEBUG_ENABLED:
return
# Check if stderr has been redirected to /dev/null (quiet mode)
# If so, skip output to avoid queuing in background worker's capture
try:
stderr_name = getattr(sys.stderr, 'name', '')
if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name):
stderr_name = getattr(sys.stderr, "name", "")
if "nul" in str(stderr_name).lower() or "/dev/null" in str(stderr_name):
return
except Exception:
pass
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stderr for debug messages
elif 'file' not in kwargs:
kwargs['file'] = sys.stderr
elif "file" not in kwargs:
kwargs["file"] = sys.stderr
# Prepend DEBUG label
args = ("DEBUG:", *args)
# Use the same logic as log()
log(*args, **kwargs)
@@ -150,12 +155,13 @@ def debug_inspect(
value=value,
)
def log(*args, **kwargs) -> None:
"""Print with automatic file.function prefix.
Automatically prepends [filename.function_name] to all output.
Defaults to stdout if not specified.
Example:
log("Upload started") # Output: [add_file.run] Upload started
"""
@@ -170,7 +176,7 @@ def log(*args, **kwargs) -> None:
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
caller_frame = frame.f_back
if caller_frame is None:
file = kwargs.pop("file", sys.stdout)
@@ -178,21 +184,21 @@ def log(*args, **kwargs) -> None:
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
try:
# Get file name without extension
file_name = Path(caller_frame.f_code.co_filename).stem
# Get function name
func_name = caller_frame.f_code.co_name
# Check for thread-local stream first
stream = get_thread_stream()
if stream:
kwargs['file'] = stream
kwargs["file"] = stream
# Set default to stdout if not specified
elif 'file' not in kwargs:
kwargs['file'] = sys.stdout
elif "file" not in kwargs:
kwargs["file"] = sys.stdout
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")

View File

@@ -33,7 +33,9 @@ class PipelineProgress:
pipe_idx: int = 0
try:
stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
stage_ctx = (
self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
)
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
@@ -108,7 +110,9 @@ class PipelineProgress:
except Exception:
return
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
def update_transfer(
self, *, label: str, completed: Optional[int], total: Optional[int] = None
) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
@@ -144,11 +148,15 @@ class PipelineProgress:
except Exception:
return
def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
def ensure_local_ui(
self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None
) -> bool:
"""Start a local PipelineLiveProgress panel if no shared UI exists."""
try:
existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
existing = (
self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
)
except Exception:
existing = None
@@ -170,7 +178,9 @@ class PipelineProgress:
self._local_attached = False
try:
ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
ui.begin_pipe(
0, total_items=max(1, int(total_items)), items_preview=list(items_preview or [])
)
except Exception:
pass
@@ -210,7 +220,9 @@ class PipelineProgress:
total_items: int,
items_preview: Optional[Sequence[Any]] = None,
) -> Iterator["PipelineProgress"]:
created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
created = self.ensure_local_ui(
label=label, total_items=total_items, items_preview=items_preview
)
try:
yield self
finally:

View File

@@ -14,8 +14,15 @@ from models import ProgressBar
_BAR = ProgressBar()
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
_BAR.update(downloaded=int(current), total=int(total) if total else None, label=str(filename or "progress"), file=sys.stderr)
def print_progress(
filename: str, current: int, total: int, speed: float = 0, end: str = "\r"
) -> None:
_BAR.update(
downloaded=int(current),
total=int(total) if total else None,
label=str(filename or "progress"),
file=sys.stderr,
)
def print_final_progress(filename: str, total: int, elapsed: float) -> None:

View File

@@ -1,4 +1,5 @@
"""Background task handling and IPC helpers for mpv integration."""
from __future__ import annotations
import errno
import json
@@ -11,16 +12,18 @@ from SYS.logger import log
import threading
import time
from typing import IO, Iterable
def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
"""Connect to the mpv IPC server located at *path*."""
deadline = time.time() + timeout
if not path:
return None
if os.name == 'nt':
if os.name == "nt":
# mpv exposes a named pipe on Windows. Keep retrying until it is ready.
while True:
try:
return open(path, 'r+b', buffering=0)
return open(path, "r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -38,7 +41,7 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
while True:
try:
sock.connect(path)
return sock.makefile('r+b', buffering=0)
return sock.makefile("r+b", buffering=0)
except FileNotFoundError:
if time.time() > deadline:
return None
@@ -49,74 +52,92 @@ def connect_ipc(path: str, timeout: float = 5.0) -> IO[bytes] | None:
if time.time() > deadline:
return None
time.sleep(0.05)
def ipc_sender(ipc: IO[bytes] | None):
"""Create a helper function for sending script messages via IPC."""
if ipc is None:
def _noop(_event: str, _payload: dict) -> None:
return None
return _noop
lock = threading.Lock()
def _send(event: str, payload: dict) -> None:
message = json.dumps({'command': ['script-message', event, json.dumps(payload)]}, ensure_ascii=False)
encoded = message.encode('utf-8') + b'\n'
message = json.dumps(
{"command": ["script-message", event, json.dumps(payload)]}, ensure_ascii=False
)
encoded = message.encode("utf-8") + b"\n"
with lock:
try:
ipc.write(encoded)
ipc.flush()
except OSError:
pass
return _send
def iter_stream(stream: Iterable[str]) -> Iterable[str]:
for raw in stream:
yield raw.rstrip('\r\n')
yield raw.rstrip("\r\n")
def _run_task(args, parser) -> int:
if not args.command:
parser.error('run-task requires a command to execute (use "--" before the command).')
env = os.environ.copy()
for entry in args.env:
key, sep, value = entry.partition('=')
key, sep, value = entry.partition("=")
if not sep:
parser.error(f'Invalid environment variable definition: {entry!r}')
parser.error(f"Invalid environment variable definition: {entry!r}")
env[key] = value
command = list(args.command)
if command and command[0] == '--':
if command and command[0] == "--":
command.pop(0)
notifier = ipc_sender(connect_ipc(args.ipc, timeout=args.ipc_timeout))
if not command:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': 'No command provided after separator',
})
log('[downlow.py] No command provided for run-task', file=sys.stderr)
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": "No command provided after separator",
},
)
log("[downlow.py] No command provided for run-task", file=sys.stderr)
return 1
if command and isinstance(command[0], str) and sys.executable:
first = command[0].lower()
if first in {'python', 'python3', 'py', 'python.exe', 'python3.exe', 'py.exe'}:
if first in {"python", "python3", "py", "python.exe", "python3.exe", "py.exe"}:
command[0] = sys.executable
if os.environ.get('DOWNLOW_DEBUG'):
if os.environ.get("DOWNLOW_DEBUG"):
log(f"Launching command: {command}", file=sys.stderr)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'start',
'command': command,
'cwd': args.cwd or os.getcwd(),
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "start",
"command": command,
"cwd": args.cwd or os.getcwd(),
},
)
popen_kwargs = {}
if os.name == 'nt':
if os.name == "nt":
# Avoid flashing a console window when spawning console-subsystem executables.
flags = 0
try:
flags |= int(getattr(subprocess, 'CREATE_NO_WINDOW', 0x08000000))
flags |= int(getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000))
except Exception:
flags |= 0x08000000
popen_kwargs['creationflags'] = flags
popen_kwargs["creationflags"] = flags
try:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
popen_kwargs['startupinfo'] = si
popen_kwargs["startupinfo"] = si
except Exception:
pass
try:
@@ -132,44 +153,59 @@ def _run_task(args, parser) -> int:
**popen_kwargs,
)
except FileNotFoundError as exc:
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'error',
'message': f'Executable not found: {exc.filename}',
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "error",
"message": f"Executable not found: {exc.filename}",
},
)
log(f"{exc}", file=sys.stderr)
return 1
stdout_lines: list[str] = []
stderr_lines: list[str] = []
def pump(stream: IO[str], label: str, sink: list[str]) -> None:
for line in iter_stream(stream):
sink.append(line)
notifier('downlow-task-event', {
'id': args.task_id,
'event': label,
'line': line,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": label,
"line": line,
},
)
threads = []
if process.stdout:
t_out = threading.Thread(target=pump, args=(process.stdout, 'stdout', stdout_lines), daemon=True)
t_out = threading.Thread(
target=pump, args=(process.stdout, "stdout", stdout_lines), daemon=True
)
t_out.start()
threads.append(t_out)
if process.stderr:
t_err = threading.Thread(target=pump, args=(process.stderr, 'stderr', stderr_lines), daemon=True)
t_err = threading.Thread(
target=pump, args=(process.stderr, "stderr", stderr_lines), daemon=True
)
t_err.start()
threads.append(t_err)
return_code = process.wait()
for t in threads:
t.join(timeout=0.1)
notifier('downlow-task-event', {
'id': args.task_id,
'event': 'exit',
'returncode': return_code,
'success': return_code == 0,
})
notifier(
"downlow-task-event",
{
"id": args.task_id,
"event": "exit",
"returncode": return_code,
"success": return_code == 0,
},
)
# Also mirror aggregated output to stdout/stderr for compatibility when IPC is unavailable.
if stdout_lines:
log('\n'.join(stdout_lines))
log("\n".join(stdout_lines))
if stderr_lines:
log('\n'.join(stderr_lines), file=sys.stderr)
log("\n".join(stderr_lines), file=sys.stderr)
return return_code

View File

@@ -1,10 +1,12 @@
"""General-purpose helpers used across the downlow CLI."""
from __future__ import annotations
import json
import hashlib
import subprocess
import shutil
try:
import ffmpeg # type: ignore
except Exception:
@@ -28,12 +30,16 @@ except ImportError:
CHUNK_SIZE = 1024 * 1024 # 1 MiB
_format_logger = logging.getLogger(__name__)
def ensure_directory(path: Path) -> None:
"""Ensure *path* exists as a directory."""
try:
path.mkdir(parents=True, exist_ok=True)
except OSError as exc: # pragma: no cover - surfaced to caller
raise RuntimeError(f"Failed to create directory {path}: {exc}") from exc
def unique_path(path: Path) -> Path:
"""Return a unique path by appending " (n)" if needed."""
if not path.exists():
@@ -48,15 +54,18 @@ def unique_path(path: Path) -> Path:
return candidate
counter += 1
def sanitize_metadata_value(value: Any) -> str | None:
if value is None:
return None
if not isinstance(value, str):
value = str(value)
value = value.replace('\x00', ' ').replace('\r', ' ').replace('\n', ' ').strip()
value = value.replace("\x00", " ").replace("\r", " ").replace("\n", " ").strip()
if not value:
return None
return value
def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen: set[str] = set()
ordered: list[str] = []
@@ -65,21 +74,23 @@ def unique_preserve_order(values: Iterable[str]) -> list[str]:
seen.add(value)
ordered.append(value)
return ordered
def sha256_file(file_path: Path) -> str:
"""Return the SHA-256 hex digest of *path*."""
hasher = hashlib.sha256()
with file_path.open('rb') as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b''):
with file_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(CHUNK_SIZE), b""):
hasher.update(chunk)
return hasher.hexdigest()
def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
"""Create a .metadata sidecar file with JSON metadata.
The metadata dict should contain title. If not present, it will be derived from
the filename. This ensures the .metadata file can be matched during batch import.
Args:
file_path: Path to the exported file
metadata: Dictionary of metadata to save
@@ -90,50 +101,50 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None:
file_ext = file_path.suffix.lower()
# Ensure metadata has a title field that matches the filename (without extension)
# This allows the sidecar to be matched and imported properly during batch import
if 'title' not in metadata or not metadata.get('title'):
metadata['title'] = file_name
metadata['hash'] = sha256_file(file_path)
metadata['size'] = Path(file_path).stat().st_size
if "title" not in metadata or not metadata.get("title"):
metadata["title"] = file_name
metadata["hash"] = sha256_file(file_path)
metadata["size"] = Path(file_path).stat().st_size
format_found = False
for mime_type, ext_map in SYS.utils_constant.mime_maps.items():
for key, info in ext_map.items():
if info.get("ext") == file_ext:
metadata['type'] = mime_type
metadata["type"] = mime_type
format_found = True
break
if format_found:
break
else:
metadata['type'] = 'unknown'
metadata["type"] = "unknown"
metadata.update(ffprobe(str(file_path)))
metadata_path = file_path.with_suffix(file_path.suffix + '.metadata')
metadata_path = file_path.with_suffix(file_path.suffix + ".metadata")
try:
with open(metadata_path, 'w', encoding='utf-8') as f:
with open(metadata_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
except OSError as exc:
raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc
def create_tags_sidecar(file_path: Path, tags: set) -> None:
"""Create a .tag sidecar file with tags (one per line).
Args:
file_path: Path to the exported file
tags: Set of tag strings
"""
if not tags:
return
tags_path = file_path.with_suffix(file_path.suffix + '.tag')
tags_path = file_path.with_suffix(file_path.suffix + ".tag")
try:
with open(tags_path, 'w', encoding='utf-8') as f:
with open(tags_path, "w", encoding="utf-8") as f:
for tag in sorted(tags):
f.write(f"{str(tag).strip().lower()}\n")
except Exception as e:
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e
def ffprobe(file_path: str) -> dict:
"""Probe a media file and return a metadata dictionary.
@@ -157,7 +168,16 @@ def ffprobe(file_path: str) -> dict:
if ffprobe_cmd:
try:
proc = subprocess.run(
[ffprobe_cmd, "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(file_path)],
[
ffprobe_cmd,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
str(file_path),
],
check=True,
capture_output=True,
text=True,
@@ -185,7 +205,9 @@ def ffprobe(file_path: str) -> dict:
if codec_type == "audio":
metadata["audio_codec"] = stream.get("codec_name")
metadata["bitrate"] = int(stream.get("bit_rate", 0)) if "bit_rate" in stream else None
metadata["samplerate"] = int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
metadata["samplerate"] = (
int(stream.get("sample_rate", 0)) if "sample_rate" in stream else None
)
metadata["channels"] = int(stream.get("channels", 0)) if "channels" in stream else None
elif codec_type == "video":
metadata["video_codec"] = stream.get("codec_name")
@@ -233,16 +255,16 @@ def jsonify(value: Any) -> Any:
def format_bytes(bytes_value) -> str:
"""Format bytes to human-readable format (e.g., '1.5 MB', '250 KB').
Args:
bytes_value: Size in bytes (int or float)
Returns:
Formatted string like '1.5 MB' or '756 MB'
"""
if bytes_value is None or bytes_value <= 0:
return "0 B"
if isinstance(bytes_value, (int, float)):
for unit in ("B", "KB", "MB", "GB", "TB"):
if bytes_value < 1024:
@@ -256,33 +278,33 @@ def format_bytes(bytes_value) -> str:
def format_duration(seconds) -> str:
"""Format duration in seconds to human-readable format (e.g., '1h 23m 5s', '5m 30s').
Args:
seconds: Duration in seconds (int or float)
Returns:
Formatted string like '1:23:45' or '5:30'
"""
if seconds is None or seconds == '':
if seconds is None or seconds == "":
return "N/A"
if isinstance(seconds, str):
try:
seconds = float(seconds)
except ValueError:
return str(seconds)
if not isinstance(seconds, (int, float)):
return str(seconds)
total_seconds = int(seconds)
if total_seconds < 0:
return "N/A"
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
secs = total_seconds % 60
if hours > 0:
return f"{hours}:{minutes:02d}:{secs:02d}"
elif minutes > 0:
@@ -293,53 +315,60 @@ def format_duration(seconds) -> str:
def format_timestamp(timestamp_str) -> str:
"""Format ISO timestamp to readable format.
Args:
timestamp_str: ISO format timestamp string or None
Returns:
Formatted string like "2025-10-28 19:36:01" or original string if parsing fails
"""
if not timestamp_str:
return "N/A"
try:
# Handle ISO format timestamps
if isinstance(timestamp_str, str):
# Try parsing ISO format
if 'T' in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
if "T" in timestamp_str:
dt = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
else:
# Try other common formats
dt = datetime.fromisoformat(timestamp_str)
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
_format_logger.debug(f"Could not parse timestamp '{timestamp_str}': {e}")
return str(timestamp_str)
def format_metadata_value(key: str, value) -> str:
"""Format a metadata value based on its key for display.
This is the central formatting rule for all metadata display.
Args:
key: Metadata field name
value: Value to format
Returns:
Formatted string for display
"""
if value is None or value == '':
if value is None or value == "":
return "N/A"
# Apply field-specific formatting
if key in ('size', 'file_size'):
if key in ("size", "file_size"):
return format_bytes(value)
elif key in ('duration', 'length'):
elif key in ("duration", "length"):
return format_duration(value)
elif key in ('time_modified', 'time_imported', 'created_at', 'updated_at', 'indexed_at', 'timestamp'):
elif key in (
"time_modified",
"time_imported",
"created_at",
"updated_at",
"indexed_at",
"timestamp",
):
return format_timestamp(value)
else:
return str(value)
@@ -353,50 +382,50 @@ def format_metadata_value(key: str, value) -> str:
def extract_link_from_args(args: Iterable[str]) -> Any | None:
"""Extract HTTP/HTTPS URL from command arguments.
Args:
args: Command arguments
Returns:
URL string if found, None otherwise
"""
args_list = list(args) if not isinstance(args, (list, tuple)) else args
if not args_list or len(args_list) == 0:
return None
potential_link = str(args_list[0])
if potential_link.startswith(('http://', 'https://')):
if potential_link.startswith(("http://", "https://")):
return potential_link
return None
def extract_link_from_result(result: Any) -> Any | None:
"""Extract URL from a result object (dict or object with attributes).
Args:
result: Result object from pipeline (dict or object)
Returns:
URL string if found, None otherwise
"""
if isinstance(result, dict):
return result.get('url') or result.get('link') or result.get('href')
return result.get("url") or result.get("link") or result.get("href")
return (
getattr(result, 'url', None) or
getattr(result, 'link', None) or
getattr(result, 'href', None)
getattr(result, "url", None)
or getattr(result, "link", None)
or getattr(result, "href", None)
)
def extract_link(result: Any, args: Iterable[str]) -> Any | None:
"""Extract link from args or result (args take priority).
Args:
result: Pipeline result object
args: Command arguments
Returns:
URL string if found, None otherwise
"""
@@ -404,34 +433,34 @@ def extract_link(result: Any, args: Iterable[str]) -> Any | None:
link = extract_link_from_args(args)
if link:
return link
# Fall back to result
return extract_link_from_result(result)
def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | None:
"""Get API key from config with fallback support.
Args:
config: Configuration dictionary
service: Service name for logging
key_path: Dot-notation path to key (e.g., "Debrid.All-debrid")
Returns:
API key if found and not empty, None otherwise
"""
try:
parts = key_path.split('.')
parts = key_path.split(".")
value = config
for part in parts:
if isinstance(value, dict):
value = value.get(part)
else:
return None
if isinstance(value, str):
return value.strip() or None
return None
except Exception:
return None
@@ -439,18 +468,18 @@ def get_api_key(config: dict[str, Any], service: str, key_path: str) -> str | No
def add_direct_link_to_result(result: Any, direct_link: str, original_link: str) -> None:
"""Add direct link information to result object.
Args:
result: Result object to modify (dict or object)
direct_link: The unlocked/direct URL
original_link: The original restricted URL
"""
if isinstance(result, dict):
result['direct_link'] = direct_link
result['original_link'] = original_link
result["direct_link"] = direct_link
result["original_link"] = original_link
else:
setattr(result, 'direct_link', direct_link)
setattr(result, 'original_link', original_link)
setattr(result, "direct_link", direct_link)
setattr(result, "original_link", original_link)
# ============================================================================
@@ -529,4 +558,4 @@ def resolve_url_policy(config: dict[str, Any], url: str) -> UrlPolicy:
for tag in rule["extra_tags"]:
if tag not in resolved.extra_tags:
resolved.extra_tags.append(tag)
return resolved
return resolved

View File

@@ -1,82 +1,99 @@
mime_maps = {
"image": {
"jpg": { "ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"] },
"png": { "ext": ".png", "mimes": ["image/png"] },
"gif": { "ext": ".gif", "mimes": ["image/gif"] },
"webp": { "ext": ".webp", "mimes": ["image/webp"] },
"avif": { "ext": ".avif", "mimes": ["image/avif"] },
"jxl": { "ext": ".jxl", "mimes": ["image/jxl"] },
"bmp": { "ext": ".bmp", "mimes": ["image/bmp"] },
"heic": { "ext": ".heic", "mimes": ["image/heic"] },
"heif": { "ext": ".heif", "mimes": ["image/heif"] },
"ico": { "ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"] },
"qoi": { "ext": ".qoi", "mimes": ["image/qoi"] },
"tiff": { "ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"] },
"svg": { "ext": ".svg", "mimes": ["image/svg+xml"] }
},
"image_sequence": {
"apng": { "ext": ".apng", "mimes": ["image/apng"], "sequence": True },
"avifs": { "ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True },
"heics": { "ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True },
"heifs": { "ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True }
},
"video": {
"mp4": { "ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"] },
"webm": { "ext": ".webm", "mimes": ["video/webm", "audio/webm"] },
"mov": { "ext": ".mov", "mimes": ["video/quicktime"] },
"ogv": { "ext": ".ogv", "mimes": ["video/ogg"] },
"mpeg": { "ext": ".mpeg", "mimes": ["video/mpeg"] },
"avi": { "ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"] },
"flv": { "ext": ".flv", "mimes": ["video/x-flv"] },
"mkv": { "ext": ".mkv", "mimes": ["video/x-matroska", "application/x-matroska"], "audio_only_ext": ".mka" },
"wmv": { "ext": ".wmv", "mimes": ["video/x-ms-wmv"] },
"rv": { "ext": ".rv", "mimes": ["video/vnd.rn-realvideo"] }
},
"audio": {
"mp3": { "ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"] },
"m4a": { "ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"] },
"ogg": { "ext": ".ogg", "mimes": ["audio/ogg"] },
"opus": { "ext": ".opus", "mimes": ["audio/opus"] },
"flac": { "ext": ".flac", "mimes": ["audio/flac"] },
"wav": { "ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"] },
"wma": { "ext": ".wma", "mimes": ["audio/x-ms-wma"] },
"tta": { "ext": ".tta", "mimes": ["audio/x-tta"] },
"wv": { "ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"] },
"mka": { "ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"] }
},
"document": {
"pdf": { "ext": ".pdf", "mimes": ["application/pdf"] },
"epub": { "ext": ".epub", "mimes": ["application/epub+zip"] },
"djvu": { "ext": ".djvu", "mimes": ["application/vnd.djvu"] },
"rtf": { "ext": ".rtf", "mimes": ["application/rtf"] },
"docx": { "ext": ".docx", "mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] },
"xlsx": { "ext": ".xlsx", "mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] },
"pptx": { "ext": ".pptx", "mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] },
"doc": { "ext": ".doc", "mimes": ["application/msword"] },
"xls": { "ext": ".xls", "mimes": ["application/vnd.ms-excel"] },
"ppt": { "ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"] }
},
"archive": {
"zip": { "ext": ".zip", "mimes": ["application/zip"] },
"7z": { "ext": ".7z", "mimes": ["application/x-7z-compressed"] },
"rar": { "ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"] },
"gz": { "ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"] },
"tar": { "ext": ".tar", "mimes": ["application/x-tar"] },
"cbz": { "ext": ".cbz", "mimes": ["application/zip"], "note": "zip archive of images; prefer extension-based detection for comics" }
},
"project": {
"clip": { "ext": ".clip", "mimes": ["application/clip"] },
"kra": { "ext": ".kra", "mimes": ["application/x-krita"] },
"procreate": { "ext": ".procreate", "mimes": ["application/x-procreate"] },
"psd": { "ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"] },
"swf": { "ext": ".swf", "mimes": ["application/x-shockwave-flash"] }
},
"other": {
"octet-stream": { "ext": "", "mimes": ["application/octet-stream"] },
"json": { "ext": ".json", "mimes": ["application/json"] },
"xml": { "ext": ".xml", "mimes": ["application/xml", "text/xml"] },
"csv": { "ext": ".csv", "mimes": ["text/csv"] }
}
"image": {
"jpg": {"ext": ".jpg", "mimes": ["image/jpeg", "image/jpg"]},
"png": {"ext": ".png", "mimes": ["image/png"]},
"gif": {"ext": ".gif", "mimes": ["image/gif"]},
"webp": {"ext": ".webp", "mimes": ["image/webp"]},
"avif": {"ext": ".avif", "mimes": ["image/avif"]},
"jxl": {"ext": ".jxl", "mimes": ["image/jxl"]},
"bmp": {"ext": ".bmp", "mimes": ["image/bmp"]},
"heic": {"ext": ".heic", "mimes": ["image/heic"]},
"heif": {"ext": ".heif", "mimes": ["image/heif"]},
"ico": {"ext": ".ico", "mimes": ["image/x-icon", "image/vnd.microsoft.icon"]},
"qoi": {"ext": ".qoi", "mimes": ["image/qoi"]},
"tiff": {"ext": ".tiff", "mimes": ["image/tiff", "image/x-tiff"]},
"svg": {"ext": ".svg", "mimes": ["image/svg+xml"]},
},
"image_sequence": {
"apng": {"ext": ".apng", "mimes": ["image/apng"], "sequence": True},
"avifs": {"ext": ".avifs", "mimes": ["image/avif-sequence"], "sequence": True},
"heics": {"ext": ".heics", "mimes": ["image/heic-sequence"], "sequence": True},
"heifs": {"ext": ".heifs", "mimes": ["image/heif-sequence"], "sequence": True},
},
"video": {
"mp4": {"ext": ".mp4", "mimes": ["video/mp4", "audio/mp4"]},
"webm": {"ext": ".webm", "mimes": ["video/webm", "audio/webm"]},
"mov": {"ext": ".mov", "mimes": ["video/quicktime"]},
"ogv": {"ext": ".ogv", "mimes": ["video/ogg"]},
"mpeg": {"ext": ".mpeg", "mimes": ["video/mpeg"]},
"avi": {"ext": ".avi", "mimes": ["video/x-msvideo", "video/avi"]},
"flv": {"ext": ".flv", "mimes": ["video/x-flv"]},
"mkv": {
"ext": ".mkv",
"mimes": ["video/x-matroska", "application/x-matroska"],
"audio_only_ext": ".mka",
},
"wmv": {"ext": ".wmv", "mimes": ["video/x-ms-wmv"]},
"rv": {"ext": ".rv", "mimes": ["video/vnd.rn-realvideo"]},
},
"audio": {
"mp3": {"ext": ".mp3", "mimes": ["audio/mpeg", "audio/mp3"]},
"m4a": {"ext": ".m4a", "mimes": ["audio/mp4", "audio/x-m4a"]},
"ogg": {"ext": ".ogg", "mimes": ["audio/ogg"]},
"opus": {"ext": ".opus", "mimes": ["audio/opus"]},
"flac": {"ext": ".flac", "mimes": ["audio/flac"]},
"wav": {"ext": ".wav", "mimes": ["audio/wav", "audio/x-wav", "audio/vnd.wave"]},
"wma": {"ext": ".wma", "mimes": ["audio/x-ms-wma"]},
"tta": {"ext": ".tta", "mimes": ["audio/x-tta"]},
"wv": {"ext": ".wv", "mimes": ["audio/x-wavpack", "audio/wavpack"]},
"mka": {"ext": ".mka", "mimes": ["audio/x-matroska", "video/x-matroska"]},
},
"document": {
"pdf": {"ext": ".pdf", "mimes": ["application/pdf"]},
"epub": {"ext": ".epub", "mimes": ["application/epub+zip"]},
"djvu": {"ext": ".djvu", "mimes": ["application/vnd.djvu"]},
"rtf": {"ext": ".rtf", "mimes": ["application/rtf"]},
"docx": {
"ext": ".docx",
"mimes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
},
"xlsx": {
"ext": ".xlsx",
"mimes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
},
"pptx": {
"ext": ".pptx",
"mimes": ["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
},
"doc": {"ext": ".doc", "mimes": ["application/msword"]},
"xls": {"ext": ".xls", "mimes": ["application/vnd.ms-excel"]},
"ppt": {"ext": ".ppt", "mimes": ["application/vnd.ms-powerpoint"]},
},
"archive": {
"zip": {"ext": ".zip", "mimes": ["application/zip"]},
"7z": {"ext": ".7z", "mimes": ["application/x-7z-compressed"]},
"rar": {"ext": ".rar", "mimes": ["application/x-rar-compressed", "application/vnd.rar"]},
"gz": {"ext": ".gz", "mimes": ["application/gzip", "application/x-gzip"]},
"tar": {"ext": ".tar", "mimes": ["application/x-tar"]},
"cbz": {
"ext": ".cbz",
"mimes": ["application/zip"],
"note": "zip archive of images; prefer extension-based detection for comics",
},
},
"project": {
"clip": {"ext": ".clip", "mimes": ["application/clip"]},
"kra": {"ext": ".kra", "mimes": ["application/x-krita"]},
"procreate": {"ext": ".procreate", "mimes": ["application/x-procreate"]},
"psd": {"ext": ".psd", "mimes": ["image/vnd.adobe.photoshop"]},
"swf": {"ext": ".swf", "mimes": ["application/x-shockwave-flash"]},
},
"other": {
"octet-stream": {"ext": "", "mimes": ["application/octet-stream"]},
"json": {"ext": ".json", "mimes": ["application/json"]},
"xml": {"ext": ".xml", "mimes": ["application/xml", "text/xml"]},
"csv": {"ext": ".csv", "mimes": ["text/csv"]},
},
}
@@ -90,15 +107,15 @@ def get_type_from_ext(ext: str) -> str:
Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown
"""
if not ext:
return 'other'
return "other"
ext_clean = ext.lstrip('.').lower()
ext_clean = ext.lstrip(".").lower()
for type_name, extensions_dict in mime_maps.items():
if ext_clean in extensions_dict:
return type_name
return 'other'
return "other"
# Canonical supported extension set for all stores/cmdlets.

View File

@@ -19,11 +19,17 @@ logger = logging.getLogger(__name__)
class Worker:
"""Represents a single worker task with state management."""
def __init__(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", manager: Optional['WorkerManager'] = None):
def __init__(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
manager: Optional["WorkerManager"] = None,
):
"""Initialize a worker.
Args:
worker_id: Unique identifier for this worker
worker_type: Type of work (e.g., 'download', 'search', 'import')
@@ -43,10 +49,10 @@ class Worker:
self.result = "pending"
self._stdout_buffer = []
self._steps_buffer = []
def log_step(self, step_text: str) -> None:
"""Log a step for this worker.
Args:
step_text: Text describing the step
"""
@@ -57,10 +63,10 @@ class Worker:
logger.info(f"[{self.id}] {step_text}")
except Exception as e:
logger.error(f"Error logging step for worker {self.id}: {e}")
def append_stdout(self, text: str) -> None:
"""Append text to stdout log.
Args:
text: Text to append
"""
@@ -71,10 +77,10 @@ class Worker:
self._stdout_buffer.append(text)
except Exception as e:
logger.error(f"Error appending stdout for worker {self.id}: {e}")
def get_stdout(self) -> str:
"""Get all stdout for this worker.
Returns:
Complete stdout text
"""
@@ -86,10 +92,10 @@ class Worker:
except Exception as e:
logger.error(f"Error getting stdout for worker {self.id}: {e}")
return ""
def get_steps(self) -> str:
"""Get all steps for this worker.
Returns:
Complete steps text
"""
@@ -101,10 +107,10 @@ class Worker:
except Exception as e:
logger.error(f"Error getting steps for worker {self.id}: {e}")
return ""
def update_progress(self, progress: str = "", details: str = "") -> None:
"""Update worker progress.
Args:
progress: Progress string (e.g., "50%")
details: Additional details
@@ -116,10 +122,10 @@ class Worker:
self.manager.update_worker(self.id, progress, details)
except Exception as e:
logger.error(f"Error updating worker {self.id}: {e}")
def finish(self, result: str = "completed", message: str = "") -> None:
"""Mark worker as finished.
Args:
result: Result status ('completed', 'error', 'cancelled')
message: Result message/error details
@@ -139,12 +145,16 @@ class Worker:
class WorkerLoggingHandler(logging.StreamHandler):
"""Custom logging handler that captures logs for a worker."""
def __init__(self, worker_id: str, db: API_folder_store,
manager: Optional['WorkerManager'] = None,
buffer_size: int = 50):
def __init__(
self,
worker_id: str,
db: API_folder_store,
manager: Optional["WorkerManager"] = None,
buffer_size: int = 50,
):
"""Initialize the handler.
Args:
worker_id: ID of the worker to capture logs for
db: Reference to LocalLibraryDB for storing logs
@@ -157,14 +167,13 @@ class WorkerLoggingHandler(logging.StreamHandler):
self.buffer_size = buffer_size
self.buffer = []
self._lock = Lock()
# Set a format that includes timestamp and level
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
self.setFormatter(formatter)
def emit(self, record):
"""Emit a log record."""
try:
@@ -183,41 +192,42 @@ class WorkerLoggingHandler(logging.StreamHandler):
except (TypeError, ValueError):
# If that fails too, just use the raw message string
msg = str(record.msg)
# Add timestamp and level if not already in message
import time
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
with self._lock:
self.buffer.append(msg)
# Flush to DB when buffer reaches size
if len(self.buffer) >= self.buffer_size:
self._flush()
except Exception:
self.handleError(record)
def _flush(self):
"""Flush buffered logs to database."""
if self.buffer:
log_text = '\n'.join(self.buffer)
log_text = "\n".join(self.buffer)
try:
if self.manager:
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
self.manager.append_worker_stdout(self.worker_id, log_text, channel="log")
else:
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
self.db.append_worker_stdout(self.worker_id, log_text, channel="log")
except Exception as e:
# If we can't write to DB, at least log it
log(f"Error flushing worker logs: {e}")
self.buffer = []
def flush(self):
"""Flush any buffered records."""
with self._lock:
self._flush()
super().flush()
def close(self):
"""Close the handler."""
self.flush()
@@ -226,10 +236,10 @@ class WorkerLoggingHandler(logging.StreamHandler):
class WorkerManager:
"""Manages persistent worker tasks with auto-refresh capability."""
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
"""Initialize the worker manager.
Args:
library_root: Root directory for the local library database
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
@@ -243,7 +253,7 @@ class WorkerManager:
self._lock = Lock()
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
self._worker_last_step: Dict[str, str] = {}
def close(self) -> None:
"""Close the database connection."""
if self.db:
@@ -251,18 +261,18 @@ class WorkerManager:
self.db.close()
except Exception:
pass
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - close database."""
self.close()
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
"""Register a callback to be called on worker updates.
Args:
callback: Function that receives list of active workers
"""
@@ -297,25 +307,25 @@ class WorkerManager:
except Exception as exc:
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
return 0
def remove_refresh_callback(self, callback: Callable) -> None:
"""Remove a refresh callback.
Args:
callback: The callback function to remove
"""
with self._lock:
if callback in self.refresh_callbacks:
self.refresh_callbacks.remove(callback)
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
"""Enable logging capture for a worker.
Creates a logging handler that captures all logs for this worker.
Args:
worker_id: ID of the worker to capture logs for
Returns:
The logging handler that was created, or None if there was an error
"""
@@ -323,46 +333,57 @@ class WorkerManager:
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
with self._lock:
self.worker_handlers[worker_id] = handler
# Add the handler to the root logger so it captures all logs
root_logger = logging.getLogger()
root_logger.addHandler(handler)
root_logger.setLevel(logging.DEBUG) # Capture all levels
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
return handler
except Exception as e:
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True
)
return None
def disable_logging_for_worker(self, worker_id: str) -> None:
"""Disable logging capture for a worker and flush any pending logs.
Args:
worker_id: ID of the worker to stop capturing logs for
"""
try:
with self._lock:
handler = self.worker_handlers.pop(worker_id, None)
if handler:
# Flush and close the handler
handler.flush()
handler.close()
# Remove from root logger
root_logger = logging.getLogger()
root_logger.removeHandler(handler)
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
except Exception as e:
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
description: str = "", total_steps: int = 0,
pipe: Optional[str] = None) -> bool:
logger.error(
f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}",
exc_info=True,
)
def track_worker(
self,
worker_id: str,
worker_type: str,
title: str = "",
description: str = "",
total_steps: int = 0,
pipe: Optional[str] = None,
) -> bool:
"""Start tracking a new worker.
Args:
worker_id: Unique identifier for the worker
worker_type: Type of worker (e.g., 'download', 'search', 'import')
@@ -370,12 +391,14 @@ class WorkerManager:
description: Worker description
total_steps: Total number of steps for progress tracking
pipe: Text of the originating pipe/prompt, if any
Returns:
True if worker was inserted successfully
"""
try:
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
result = self.db.insert_worker(
worker_id, worker_type, title, description, total_steps, pipe=pipe
)
if result > 0:
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
self._start_refresh_if_needed()
@@ -384,65 +407,69 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
return False
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
details: str = "", error: str = "") -> bool:
def update_worker(
self,
worker_id: str,
progress: float = 0.0,
current_step: str = "",
details: str = "",
error: str = "",
) -> bool:
"""Update worker progress and status.
Args:
worker_id: Unique identifier for the worker
progress: Progress percentage (0-100)
current_step: Current step description
details: Additional details
error: Error message if any
Returns:
True if update was successful
"""
try:
kwargs = {}
if progress > 0:
kwargs['progress'] = progress
kwargs["progress"] = progress
if current_step:
kwargs['current_step'] = current_step
kwargs["current_step"] = current_step
if details:
kwargs['description'] = details
kwargs["description"] = details
if error:
kwargs['error_message'] = error
kwargs["error_message"] = error
if kwargs:
kwargs['last_updated'] = datetime.now().isoformat()
if 'current_step' in kwargs and kwargs['current_step']:
self._worker_last_step[worker_id] = str(kwargs['current_step'])
kwargs["last_updated"] = datetime.now().isoformat()
if "current_step" in kwargs and kwargs["current_step"]:
self._worker_last_step[worker_id] = str(kwargs["current_step"])
return self.db.update_worker(worker_id, **kwargs)
return True
except Exception as e:
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
return False
def finish_worker(self, worker_id: str, result: str = "completed",
error_msg: str = "", result_data: str = "") -> bool:
def finish_worker(
self, worker_id: str, result: str = "completed", error_msg: str = "", result_data: str = ""
) -> bool:
"""Mark a worker as finished.
Args:
worker_id: Unique identifier for the worker
result: Result status ('completed', 'error', 'cancelled')
error_msg: Error message if any
result_data: Result data as JSON string
Returns:
True if update was successful
"""
try:
kwargs = {
'status': result,
'completed_at': datetime.now().isoformat()
}
kwargs = {"status": result, "completed_at": datetime.now().isoformat()}
if error_msg:
kwargs['error_message'] = error_msg
kwargs["error_message"] = error_msg
if result_data:
kwargs['result_data'] = result_data
kwargs["result_data"] = result_data
success = self.db.update_worker(worker_id, **kwargs)
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
self._worker_last_step.pop(worker_id, None)
@@ -450,10 +477,10 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
return False
def get_active_workers(self) -> List[Dict[str, Any]]:
"""Get all active (running) workers.
Returns:
List of active worker dictionaries
"""
@@ -462,31 +489,33 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
return []
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
"""Get all finished workers (completed, errored, or cancelled).
Args:
limit: Maximum number of workers to retrieve
Returns:
List of finished worker dictionaries
"""
try:
all_workers = self.db.get_all_workers(limit=limit)
# Filter to only finished workers
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
finished = [
w for w in all_workers if w.get("status") in ["completed", "error", "cancelled"]
]
return finished
except Exception as e:
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
return []
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
"""Get a specific worker's data.
Args:
worker_id: Unique identifier for the worker
Returns:
Worker data or None if not found
"""
@@ -495,18 +524,18 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
return None
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
"""Fetch recorded worker timeline events."""
return self.db.get_worker_events(worker_id, limit)
def log_step(self, worker_id: str, step_text: str) -> bool:
"""Log a step to a worker's step history.
Args:
worker_id: Unique identifier for the worker
step_text: Step description to log
Returns:
True if successful
"""
@@ -516,43 +545,49 @@ class WorkerManager:
self._worker_last_step[worker_id] = step_text
return success
except Exception as e:
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True
)
return False
def _get_last_step(self, worker_id: str) -> Optional[str]:
"""Return the most recent step description for a worker."""
return self._worker_last_step.get(worker_id)
def get_steps(self, worker_id: str) -> str:
"""Get step logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
Steps text or empty string if not found
"""
try:
return self.db.get_worker_steps(worker_id)
except Exception as e:
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
return ''
logger.error(
f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True
)
return ""
def start_auto_refresh(self) -> None:
"""Start the auto-refresh thread for periodic worker updates."""
if self.auto_refresh_interval <= 0:
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
return
if self.refresh_thread and self.refresh_thread.is_alive():
logger.debug("[WorkerManager] Auto-refresh already running")
return
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
logger.info(
f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval"
)
self._stop_refresh = False
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
self.refresh_thread.start()
def stop_auto_refresh(self) -> None:
"""Stop the auto-refresh thread."""
logger.info("[WorkerManager] Stopping auto-refresh")
@@ -560,46 +595,48 @@ class WorkerManager:
if self.refresh_thread:
self.refresh_thread.join(timeout=5)
self.refresh_thread = None
def _start_refresh_if_needed(self) -> None:
"""Start auto-refresh if we have active workers and callbacks."""
active = self.get_active_workers()
if active and self.refresh_callbacks and not self._stop_refresh:
self.start_auto_refresh()
def _auto_refresh_loop(self) -> None:
"""Main auto-refresh loop that periodically queries and notifies."""
try:
while not self._stop_refresh:
time.sleep(self.auto_refresh_interval)
# Check if there are active workers
active = self.get_active_workers()
if not active:
# No more active workers, stop refreshing
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
break
# Call all registered callbacks with the active workers
with self._lock:
for callback in self.refresh_callbacks:
try:
callback(active)
except Exception as e:
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error in refresh callback: {e}", exc_info=True
)
except Exception as e:
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
finally:
logger.debug("[WorkerManager] Auto-refresh loop ended")
def cleanup_old_workers(self, days: int = 7) -> int:
"""Clean up completed/errored workers older than specified days.
Args:
days: Delete workers completed more than this many days ago
Returns:
Number of workers deleted
"""
@@ -611,15 +648,15 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
return 0
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
"""Append text to a worker's stdout log.
Args:
worker_id: Unique identifier for the worker
text: Text to append
channel: Logical channel (stdout, stderr, log, etc.)
Returns:
True if append was successful
"""
@@ -629,13 +666,13 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
return False
def get_stdout(self, worker_id: str) -> str:
"""Get stdout logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
Worker's stdout or empty string
"""
@@ -644,17 +681,17 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
return ""
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
"""Compatibility wrapper for append_stdout."""
return self.append_stdout(worker_id, text, channel=channel)
def clear_stdout(self, worker_id: str) -> bool:
"""Clear stdout logs for a worker.
Args:
worker_id: Unique identifier for the worker
Returns:
True if clear was successful
"""
@@ -663,7 +700,7 @@ class WorkerManager:
except Exception as e:
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
return False
def close(self) -> None:
"""Close the worker manager and database connection."""
self.stop_auto_refresh()