This commit is contained in:
2026-01-02 02:28:59 -08:00
parent deb05c0d44
commit 6e9a0c28ff
13 changed files with 1402 additions and 2334 deletions

View File

@@ -263,6 +263,9 @@ class WorkerManager:
self.refresh_thread: Optional[Thread] = None
self._stop_refresh = False
self._lock = Lock()
# Reuse the DB's own lock so there is exactly one lock guarding the
# sqlite connection (and it is safe for re-entrant/nested DB usage).
self._db_lock = self.db._db_lock
self.worker_handlers: Dict[str,
WorkerLoggingHandler] = {} # Track active handlers
self._worker_last_step: Dict[str,
@@ -272,7 +275,8 @@ class WorkerManager:
"""Close the database connection."""
if self.db:
try:
self.db.close()
with self._db_lock:
self.db.close()
except Exception:
pass
@@ -317,12 +321,13 @@ class WorkerManager:
Count of workers updated.
"""
try:
return self.db.expire_running_workers(
older_than_seconds=older_than_seconds,
status=status,
reason=reason,
worker_id_prefix=worker_id_prefix,
)
with self._db_lock:
return self.db.expire_running_workers(
older_than_seconds=older_than_seconds,
status=status,
reason=reason,
worker_id_prefix=worker_id_prefix,
)
except Exception as exc:
logger.error(f"Failed to expire stale workers: {exc}", exc_info=True)
return 0
@@ -419,14 +424,15 @@ class WorkerManager:
True if worker was inserted successfully
"""
try:
result = self.db.insert_worker(
worker_id,
worker_type,
title,
description,
total_steps,
pipe=pipe
)
with self._db_lock:
result = self.db.insert_worker(
worker_id,
worker_type,
title,
description,
total_steps,
pipe=pipe
)
if result > 0:
logger.debug(
f"[WorkerManager] Tracking worker: {worker_id} ({worker_type})"
@@ -473,7 +479,8 @@ class WorkerManager:
kwargs["last_updated"] = datetime.now().isoformat()
if "current_step" in kwargs and kwargs["current_step"]:
self._worker_last_step[worker_id] = str(kwargs["current_step"])
return self.db.update_worker(worker_id, **kwargs)
with self._db_lock:
return self.db.update_worker(worker_id, **kwargs)
return True
except Exception as e:
logger.error(
@@ -510,7 +517,8 @@ class WorkerManager:
if result_data:
kwargs["result_data"] = result_data
success = self.db.update_worker(worker_id, **kwargs)
with self._db_lock:
success = self.db.update_worker(worker_id, **kwargs)
logger.info(f"[WorkerManager] Worker finished: {worker_id} ({result})")
self._worker_last_step.pop(worker_id, None)
return success
@@ -528,7 +536,8 @@ class WorkerManager:
List of active worker dictionaries
"""
try:
return self.db.get_active_workers()
with self._db_lock:
return self.db.get_active_workers()
except Exception as e:
logger.error(
f"[WorkerManager] Error getting active workers: {e}",
@@ -546,7 +555,8 @@ class WorkerManager:
List of finished worker dictionaries
"""
try:
all_workers = self.db.get_all_workers(limit=limit)
with self._db_lock:
all_workers = self.db.get_all_workers(limit=limit)
# Filter to only finished workers
finished = [
w for w in all_workers
@@ -570,7 +580,8 @@ class WorkerManager:
Worker data or None if not found
"""
try:
return self.db.get_worker(worker_id)
with self._db_lock:
return self.db.get_worker(worker_id)
except Exception as e:
logger.error(
f"[WorkerManager] Error getting worker {worker_id}: {e}",
@@ -583,7 +594,8 @@ class WorkerManager:
limit: int = 500) -> List[Dict[str,
Any]]:
"""Fetch recorded worker timeline events."""
return self.db.get_worker_events(worker_id, limit)
with self._db_lock:
return self.db.get_worker_events(worker_id, limit)
def log_step(self, worker_id: str, step_text: str) -> bool:
"""Log a step to a worker's step history.
@@ -596,7 +608,8 @@ class WorkerManager:
True if successful
"""
try:
success = self.db.append_worker_steps(worker_id, step_text)
with self._db_lock:
success = self.db.append_worker_steps(worker_id, step_text)
if success:
self._worker_last_step[worker_id] = step_text
return success
@@ -621,7 +634,8 @@ class WorkerManager:
Steps text or empty string if not found
"""
try:
return self.db.get_worker_steps(worker_id)
with self._db_lock:
return self.db.get_worker_steps(worker_id)
except Exception as e:
logger.error(
f"[WorkerManager] Error getting steps for worker {worker_id}: {e}",
@@ -705,7 +719,8 @@ class WorkerManager:
Number of workers deleted
"""
try:
count = self.db.cleanup_old_workers(days)
with self._db_lock:
count = self.db.cleanup_old_workers(days)
if count > 0:
logger.info(f"[WorkerManager] Cleaned up {count} old workers")
return count
@@ -729,12 +744,13 @@ class WorkerManager:
"""
try:
step_label = self._get_last_step(worker_id)
return self.db.append_worker_stdout(
worker_id,
text,
step=step_label,
channel=channel
)
with self._db_lock:
return self.db.append_worker_stdout(
worker_id,
text,
step=step_label,
channel=channel
)
except Exception as e:
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
return False
@@ -749,7 +765,8 @@ class WorkerManager:
Worker's stdout or empty string
"""
try:
return self.db.get_worker_stdout(worker_id)
with self._db_lock:
return self.db.get_worker_stdout(worker_id)
except Exception as e:
logger.error(f"[WorkerManager] Error getting stdout: {e}", exc_info=True)
return ""
@@ -773,7 +790,8 @@ class WorkerManager:
True if clear was successful
"""
try:
return self.db.clear_worker_stdout(worker_id)
with self._db_lock:
return self.db.clear_worker_stdout(worker_id)
except Exception as e:
logger.error(f"[WorkerManager] Error clearing stdout: {e}", exc_info=True)
return False
@@ -781,5 +799,6 @@ class WorkerManager:
def close(self) -> None:
"""Close the worker manager and database connection."""
self.stop_auto_refresh()
self.db.close()
with self._db_lock:
self.db.close()
logger.info("[WorkerManager] Closed")