656 lines
24 KiB
Python
656 lines
24 KiB
Python
|
|
"""Worker task management with persistent database storage.
|
||
|
|
|
||
|
|
Manages worker tasks for downloads, searches, imports, etc. with automatic
|
||
|
|
persistence to database and optional auto-refresh callbacks.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import logging
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional, Dict, Any, List, Callable
|
||
|
|
from datetime import datetime
|
||
|
|
from threading import Thread, Lock
|
||
|
|
import time
|
||
|
|
|
||
|
|
from .local_library import LocalLibraryDB
|
||
|
|
from helper.logger import log
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class Worker:
|
||
|
|
"""Represents a single worker task with state management."""
|
||
|
|
|
||
|
|
def __init__(self, worker_id: str, worker_type: str, title: str = "",
|
||
|
|
description: str = "", manager: Optional['WorkerManager'] = None):
|
||
|
|
"""Initialize a worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for this worker
|
||
|
|
worker_type: Type of work (e.g., 'download', 'search', 'import')
|
||
|
|
title: Human-readable title
|
||
|
|
description: Detailed description
|
||
|
|
manager: Reference to parent WorkerManager for state updates
|
||
|
|
"""
|
||
|
|
self.id = worker_id
|
||
|
|
self.type = worker_type
|
||
|
|
self.title = title or worker_type
|
||
|
|
self.description = description
|
||
|
|
self.manager = manager
|
||
|
|
self.status = "running"
|
||
|
|
self.progress = ""
|
||
|
|
self.details = ""
|
||
|
|
self.error_message = ""
|
||
|
|
self.result = "pending"
|
||
|
|
self._stdout_buffer = []
|
||
|
|
self._steps_buffer = []
|
||
|
|
|
||
|
|
def log_step(self, step_text: str) -> None:
|
||
|
|
"""Log a step for this worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
step_text: Text describing the step
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
self.manager.log_step(self.id, step_text)
|
||
|
|
else:
|
||
|
|
logger.info(f"[{self.id}] {step_text}")
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error logging step for worker {self.id}: {e}")
|
||
|
|
|
||
|
|
def append_stdout(self, text: str) -> None:
|
||
|
|
"""Append text to stdout log.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
text: Text to append
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
self.manager.append_worker_stdout(self.id, text)
|
||
|
|
else:
|
||
|
|
self._stdout_buffer.append(text)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error appending stdout for worker {self.id}: {e}")
|
||
|
|
|
||
|
|
def get_stdout(self) -> str:
|
||
|
|
"""Get all stdout for this worker.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Complete stdout text
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
return self.manager.get_stdout(self.id)
|
||
|
|
else:
|
||
|
|
return "\n".join(self._stdout_buffer)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error getting stdout for worker {self.id}: {e}")
|
||
|
|
return ""
|
||
|
|
|
||
|
|
def get_steps(self) -> str:
|
||
|
|
"""Get all steps for this worker.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Complete steps text
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
return self.manager.get_steps(self.id)
|
||
|
|
else:
|
||
|
|
return "\n".join(self._steps_buffer)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error getting steps for worker {self.id}: {e}")
|
||
|
|
return ""
|
||
|
|
|
||
|
|
def update_progress(self, progress: str = "", details: str = "") -> None:
|
||
|
|
"""Update worker progress.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
progress: Progress string (e.g., "50%")
|
||
|
|
details: Additional details
|
||
|
|
"""
|
||
|
|
self.progress = progress
|
||
|
|
self.details = details
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
self.manager.update_worker(self.id, progress, details)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error updating worker {self.id}: {e}")
|
||
|
|
|
||
|
|
def finish(self, result: str = "completed", message: str = "") -> None:
|
||
|
|
"""Mark worker as finished.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
result: Result status ('completed', 'error', 'cancelled')
|
||
|
|
message: Result message/error details
|
||
|
|
"""
|
||
|
|
self.result = result
|
||
|
|
self.status = "finished"
|
||
|
|
self.error_message = message
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
# Flush and disable logging handler before marking finished
|
||
|
|
self.manager.disable_logging_for_worker(self.id)
|
||
|
|
# Then mark as finished in database
|
||
|
|
self.manager.finish_worker(self.id, result, message)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error finishing worker {self.id}: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
class WorkerLoggingHandler(logging.StreamHandler):
|
||
|
|
"""Custom logging handler that captures logs for a worker."""
|
||
|
|
|
||
|
|
def __init__(self, worker_id: str, db: LocalLibraryDB,
|
||
|
|
manager: Optional['WorkerManager'] = None,
|
||
|
|
buffer_size: int = 50):
|
||
|
|
"""Initialize the handler.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: ID of the worker to capture logs for
|
||
|
|
db: Reference to LocalLibraryDB for storing logs
|
||
|
|
buffer_size: Number of logs to buffer before flushing to DB
|
||
|
|
"""
|
||
|
|
super().__init__()
|
||
|
|
self.worker_id = worker_id
|
||
|
|
self.db = db
|
||
|
|
self.manager = manager
|
||
|
|
self.buffer_size = buffer_size
|
||
|
|
self.buffer = []
|
||
|
|
self._lock = Lock()
|
||
|
|
|
||
|
|
# Set a format that includes timestamp and level
|
||
|
|
formatter = logging.Formatter(
|
||
|
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||
|
|
)
|
||
|
|
self.setFormatter(formatter)
|
||
|
|
|
||
|
|
def emit(self, record):
|
||
|
|
"""Emit a log record."""
|
||
|
|
try:
|
||
|
|
# Try to format the record normally
|
||
|
|
try:
|
||
|
|
msg = self.format(record)
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
# If formatting fails (e.g., %d format with non-int arg),
|
||
|
|
# build message manually without calling getMessage()
|
||
|
|
try:
|
||
|
|
# Try to format with args if possible
|
||
|
|
if record.args:
|
||
|
|
msg = record.msg % record.args
|
||
|
|
else:
|
||
|
|
msg = record.msg
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
# If that fails too, just use the raw message string
|
||
|
|
msg = str(record.msg)
|
||
|
|
|
||
|
|
# Add timestamp and level if not already in message
|
||
|
|
import time
|
||
|
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(record.created))
|
||
|
|
msg = f"{timestamp} - {record.name} - {record.levelname} - {msg}"
|
||
|
|
|
||
|
|
with self._lock:
|
||
|
|
self.buffer.append(msg)
|
||
|
|
|
||
|
|
# Flush to DB when buffer reaches size
|
||
|
|
if len(self.buffer) >= self.buffer_size:
|
||
|
|
self._flush()
|
||
|
|
except Exception:
|
||
|
|
self.handleError(record)
|
||
|
|
|
||
|
|
def _flush(self):
|
||
|
|
"""Flush buffered logs to database."""
|
||
|
|
if self.buffer:
|
||
|
|
log_text = '\n'.join(self.buffer)
|
||
|
|
try:
|
||
|
|
if self.manager:
|
||
|
|
self.manager.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||
|
|
else:
|
||
|
|
self.db.append_worker_stdout(self.worker_id, log_text, channel='log')
|
||
|
|
except Exception as e:
|
||
|
|
# If we can't write to DB, at least log it
|
||
|
|
log(f"Error flushing worker logs: {e}")
|
||
|
|
self.buffer = []
|
||
|
|
|
||
|
|
def flush(self):
|
||
|
|
"""Flush any buffered records."""
|
||
|
|
with self._lock:
|
||
|
|
self._flush()
|
||
|
|
super().flush()
|
||
|
|
|
||
|
|
def close(self):
|
||
|
|
"""Close the handler."""
|
||
|
|
self.flush()
|
||
|
|
super().close()
|
||
|
|
|
||
|
|
|
||
|
|
class WorkerManager:
|
||
|
|
"""Manages persistent worker tasks with auto-refresh capability."""
|
||
|
|
|
||
|
|
def __init__(self, library_root: Path, auto_refresh_interval: float = 2.0):
|
||
|
|
"""Initialize the worker manager.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
library_root: Root directory for the local library database
|
||
|
|
auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled)
|
||
|
|
"""
|
||
|
|
self.library_root = Path(library_root)
|
||
|
|
self.db = LocalLibraryDB(library_root)
|
||
|
|
self.auto_refresh_interval = auto_refresh_interval
|
||
|
|
self.refresh_callbacks: List[Callable] = []
|
||
|
|
self.refresh_thread: Optional[Thread] = None
|
||
|
|
self._stop_refresh = False
|
||
|
|
self._lock = Lock()
|
||
|
|
self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers
|
||
|
|
self._worker_last_step: Dict[str, str] = {}
|
||
|
|
|
||
|
|
def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None:
|
||
|
|
"""Register a callback to be called on worker updates.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
callback: Function that receives list of active workers
|
||
|
|
"""
|
||
|
|
with self._lock:
|
||
|
|
self.refresh_callbacks.append(callback)
|
||
|
|
|
||
|
|
def expire_running_workers(
|
||
|
|
self,
|
||
|
|
older_than_seconds: int = 300,
|
||
|
|
worker_id_prefix: Optional[str] = None,
|
||
|
|
reason: Optional[str] = None,
|
||
|
|
status: str = "error",
|
||
|
|
) -> int:
|
||
|
|
"""Mark stale running workers as finished.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
older_than_seconds: Idle threshold before expiring.
|
||
|
|
worker_id_prefix: Optional wildcard filter (e.g., 'cli_%').
|
||
|
|
reason: Error message if none already exists.
|
||
|
|
status: New status to apply.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Count of workers updated.
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.expire_running_workers(
|
||
|
|
older_than_seconds=older_than_seconds,
|
||
|
|
status=status,
|
||
|
|
reason=reason,
|
||
|
|
worker_id_prefix=worker_id_prefix,
|
||
|
|
)
|
||
|
|
except Exception as exc:
|
||
|
|
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
def remove_refresh_callback(self, callback: Callable) -> None:
|
||
|
|
"""Remove a refresh callback.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
callback: The callback function to remove
|
||
|
|
"""
|
||
|
|
with self._lock:
|
||
|
|
if callback in self.refresh_callbacks:
|
||
|
|
self.refresh_callbacks.remove(callback)
|
||
|
|
|
||
|
|
def enable_logging_for_worker(self, worker_id: str) -> Optional[WorkerLoggingHandler]:
|
||
|
|
"""Enable logging capture for a worker.
|
||
|
|
|
||
|
|
Creates a logging handler that captures all logs for this worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: ID of the worker to capture logs for
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The logging handler that was created, or None if there was an error
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
handler = WorkerLoggingHandler(worker_id, self.db, manager=self)
|
||
|
|
with self._lock:
|
||
|
|
self.worker_handlers[worker_id] = handler
|
||
|
|
|
||
|
|
# Add the handler to the root logger so it captures all logs
|
||
|
|
root_logger = logging.getLogger()
|
||
|
|
root_logger.addHandler(handler)
|
||
|
|
root_logger.setLevel(logging.DEBUG) # Capture all levels
|
||
|
|
|
||
|
|
logger.debug(f"[WorkerManager] Enabled logging for worker: {worker_id}")
|
||
|
|
return handler
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error enabling logging for worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return None
|
||
|
|
|
||
|
|
def disable_logging_for_worker(self, worker_id: str) -> None:
|
||
|
|
"""Disable logging capture for a worker and flush any pending logs.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: ID of the worker to stop capturing logs for
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with self._lock:
|
||
|
|
handler = self.worker_handlers.pop(worker_id, None)
|
||
|
|
|
||
|
|
if handler:
|
||
|
|
# Flush and close the handler
|
||
|
|
handler.flush()
|
||
|
|
handler.close()
|
||
|
|
|
||
|
|
# Remove from root logger
|
||
|
|
root_logger = logging.getLogger()
|
||
|
|
root_logger.removeHandler(handler)
|
||
|
|
|
||
|
|
logger.debug(f"[WorkerManager] Disabled logging for worker: {worker_id}")
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}", exc_info=True)
|
||
|
|
|
||
|
|
def track_worker(self, worker_id: str, worker_type: str, title: str = "",
|
||
|
|
description: str = "", total_steps: int = 0,
|
||
|
|
pipe: Optional[str] = None) -> bool:
|
||
|
|
"""Start tracking a new worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
worker_type: Type of worker (e.g., 'download', 'search', 'import')
|
||
|
|
title: Worker title/name
|
||
|
|
description: Worker description
|
||
|
|
total_steps: Total number of steps for progress tracking
|
||
|
|
pipe: Text of the originating pipe/prompt, if any
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if worker was inserted successfully
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
result = self.db.insert_worker(worker_id, worker_type, title, description, total_steps, pipe=pipe)
|
||
|
|
if result > 0:
|
||
|
|
logger.debug(f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})")
|
||
|
|
self._start_refresh_if_needed()
|
||
|
|
return True
|
||
|
|
return False
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error tracking worker: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def update_worker(self, worker_id: str, progress: float = 0.0, current_step: str = "",
|
||
|
|
details: str = "", error: str = "") -> bool:
|
||
|
|
"""Update worker progress and status.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
progress: Progress percentage (0-100)
|
||
|
|
current_step: Current step description
|
||
|
|
details: Additional details
|
||
|
|
error: Error message if any
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if update was successful
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
kwargs = {}
|
||
|
|
if progress > 0:
|
||
|
|
kwargs['progress'] = progress
|
||
|
|
if current_step:
|
||
|
|
kwargs['current_step'] = current_step
|
||
|
|
if details:
|
||
|
|
kwargs['description'] = details
|
||
|
|
if error:
|
||
|
|
kwargs['error_message'] = error
|
||
|
|
|
||
|
|
if kwargs:
|
||
|
|
kwargs['last_updated'] = datetime.now().isoformat()
|
||
|
|
if 'current_step' in kwargs and kwargs['current_step']:
|
||
|
|
self._worker_last_step[worker_id] = str(kwargs['current_step'])
|
||
|
|
return self.db.update_worker(worker_id, **kwargs)
|
||
|
|
return True
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error updating worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def finish_worker(self, worker_id: str, result: str = "completed",
|
||
|
|
error_msg: str = "", result_data: str = "") -> bool:
|
||
|
|
"""Mark a worker as finished.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
result: Result status ('completed', 'error', 'cancelled')
|
||
|
|
error_msg: Error message if any
|
||
|
|
result_data: Result data as JSON string
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if update was successful
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
kwargs = {
|
||
|
|
'status': result,
|
||
|
|
'completed_at': datetime.now().isoformat()
|
||
|
|
}
|
||
|
|
if error_msg:
|
||
|
|
kwargs['error_message'] = error_msg
|
||
|
|
if result_data:
|
||
|
|
kwargs['result_data'] = result_data
|
||
|
|
|
||
|
|
success = self.db.update_worker(worker_id, **kwargs)
|
||
|
|
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
|
||
|
|
self._worker_last_step.pop(worker_id, None)
|
||
|
|
return success
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error finishing worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def get_active_workers(self) -> List[Dict[str, Any]]:
|
||
|
|
"""Get all active (running) workers.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of active worker dictionaries
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.get_active_workers()
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error getting active workers: {e}", exc_info=True)
|
||
|
|
return []
|
||
|
|
|
||
|
|
def get_finished_workers(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||
|
|
"""Get all finished workers (completed, errored, or cancelled).
|
||
|
|
|
||
|
|
Args:
|
||
|
|
limit: Maximum number of workers to retrieve
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
List of finished worker dictionaries
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
all_workers = self.db.get_all_workers(limit=limit)
|
||
|
|
# Filter to only finished workers
|
||
|
|
finished = [w for w in all_workers if w.get('status') in ['completed', 'error', 'cancelled']]
|
||
|
|
return finished
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error getting finished workers: {e}", exc_info=True)
|
||
|
|
return []
|
||
|
|
|
||
|
|
def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
||
|
|
"""Get a specific worker's data.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Worker data or None if not found
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.get_worker(worker_id)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error getting worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return None
|
||
|
|
|
||
|
|
def get_worker_events(self, worker_id: str, limit: int = 500) -> List[Dict[str, Any]]:
|
||
|
|
"""Fetch recorded worker timeline events."""
|
||
|
|
return self.db.get_worker_events(worker_id, limit)
|
||
|
|
|
||
|
|
def log_step(self, worker_id: str, step_text: str) -> bool:
|
||
|
|
"""Log a step to a worker's step history.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
step_text: Step description to log
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if successful
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
success = self.db.append_worker_steps(worker_id, step_text)
|
||
|
|
if success:
|
||
|
|
self._worker_last_step[worker_id] = step_text
|
||
|
|
return success
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error logging step for worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def _get_last_step(self, worker_id: str) -> Optional[str]:
|
||
|
|
"""Return the most recent step description for a worker."""
|
||
|
|
return self._worker_last_step.get(worker_id)
|
||
|
|
|
||
|
|
def get_steps(self, worker_id: str) -> str:
|
||
|
|
"""Get step logs for a worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Steps text or empty string if not found
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.get_worker_steps(worker_id)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error getting steps for worker {worker_id}: {e}", exc_info=True)
|
||
|
|
return ''
|
||
|
|
|
||
|
|
def start_auto_refresh(self) -> None:
|
||
|
|
"""Start the auto-refresh thread for periodic worker updates."""
|
||
|
|
if self.auto_refresh_interval <= 0:
|
||
|
|
logger.debug("[WorkerManager] Auto-refresh disabled (interval <= 0)")
|
||
|
|
return
|
||
|
|
|
||
|
|
if self.refresh_thread and self.refresh_thread.is_alive():
|
||
|
|
logger.debug("[WorkerManager] Auto-refresh already running")
|
||
|
|
return
|
||
|
|
|
||
|
|
logger.info(f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval")
|
||
|
|
self._stop_refresh = False
|
||
|
|
self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
|
||
|
|
self.refresh_thread.start()
|
||
|
|
|
||
|
|
def stop_auto_refresh(self) -> None:
|
||
|
|
"""Stop the auto-refresh thread."""
|
||
|
|
logger.info("[WorkerManager] Stopping auto-refresh")
|
||
|
|
self._stop_refresh = True
|
||
|
|
if self.refresh_thread:
|
||
|
|
self.refresh_thread.join(timeout=5)
|
||
|
|
self.refresh_thread = None
|
||
|
|
|
||
|
|
def _start_refresh_if_needed(self) -> None:
|
||
|
|
"""Start auto-refresh if we have active workers and callbacks."""
|
||
|
|
active = self.get_active_workers()
|
||
|
|
if active and self.refresh_callbacks and not self._stop_refresh:
|
||
|
|
self.start_auto_refresh()
|
||
|
|
|
||
|
|
def _auto_refresh_loop(self) -> None:
|
||
|
|
"""Main auto-refresh loop that periodically queries and notifies."""
|
||
|
|
try:
|
||
|
|
while not self._stop_refresh:
|
||
|
|
time.sleep(self.auto_refresh_interval)
|
||
|
|
|
||
|
|
# Check if there are active workers
|
||
|
|
active = self.get_active_workers()
|
||
|
|
|
||
|
|
if not active:
|
||
|
|
# No more active workers, stop refreshing
|
||
|
|
logger.debug("[WorkerManager] No active workers, stopping auto-refresh")
|
||
|
|
break
|
||
|
|
|
||
|
|
# Call all registered callbacks with the active workers
|
||
|
|
with self._lock:
|
||
|
|
for callback in self.refresh_callbacks:
|
||
|
|
try:
|
||
|
|
callback(active)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error in refresh callback: {e}", exc_info=True)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error in auto-refresh loop: {e}", exc_info=True)
|
||
|
|
finally:
|
||
|
|
logger.debug("[WorkerManager] Auto-refresh loop ended")
|
||
|
|
|
||
|
|
def cleanup_old_workers(self, days: int = 7) -> int:
|
||
|
|
"""Clean up completed/errored workers older than specified days.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
days: Delete workers completed more than this many days ago
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of workers deleted
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
count = self.db.cleanup_old_workers(days)
|
||
|
|
if count > 0:
|
||
|
|
logger.info(f"[WorkerManager] Cleaned up {count} old workers")
|
||
|
|
return count
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error cleaning up old workers: {e}", exc_info=True)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
def append_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||
|
|
"""Append text to a worker's stdout log.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
text: Text to append
|
||
|
|
channel: Logical channel (stdout, stderr, log, etc.)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if append was successful
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
step_label = self._get_last_step(worker_id)
|
||
|
|
return self.db.append_worker_stdout(worker_id, text, step=step_label, channel=channel)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def get_stdout(self, worker_id: str) -> str:
|
||
|
|
"""Get stdout logs for a worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Worker's stdout or empty string
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.get_worker_stdout(worker_id)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
|
||
|
|
return ""
|
||
|
|
|
||
|
|
def append_worker_stdout(self, worker_id: str, text: str, channel: str = "stdout") -> bool:
|
||
|
|
"""Compatibility wrapper for append_stdout."""
|
||
|
|
return self.append_stdout(worker_id, text, channel=channel)
|
||
|
|
|
||
|
|
def clear_stdout(self, worker_id: str) -> bool:
|
||
|
|
"""Clear stdout logs for a worker.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
worker_id: Unique identifier for the worker
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if clear was successful
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
return self.db.clear_worker_stdout(worker_id)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
|
||
|
|
return False
|
||
|
|
|
||
|
|
def close(self) -> None:
|
||
|
|
"""Close the worker manager and database connection."""
|
||
|
|
self.stop_auto_refresh()
|
||
|
|
self.db.close()
|
||
|
|
logger.info("[WorkerManager] Closed")
|