This commit is contained in:
2026-01-18 03:18:48 -08:00
parent 3f874af54a
commit aa675a625a
8 changed files with 508 additions and 161 deletions

View File

@@ -6,7 +6,7 @@ persistence to database and optional auto-refresh callbacks.
import logging
from pathlib import Path
from typing import Optional, Dict, Any, List, Callable
from typing import Optional, Dict, Any, List, Callable, Tuple
from datetime import datetime
from threading import Thread, Lock
import time
@@ -270,6 +270,13 @@ class WorkerManager:
WorkerLoggingHandler] = {} # Track active handlers
self._worker_last_step: Dict[str,
str] = {}
# Buffered stdout/log batching to reduce DB lock contention.
self._stdout_buffers: Dict[Tuple[str, str], List[str]] = {}
self._stdout_buffer_sizes: Dict[Tuple[str, str], int] = {}
self._stdout_buffer_steps: Dict[Tuple[str, str], Optional[str]] = {}
self._stdout_last_flush: Dict[Tuple[str, str], float] = {}
self._stdout_flush_bytes = 4096
self._stdout_flush_interval = 0.75
def close(self) -> None:
"""Close the database connection."""
@@ -392,9 +399,15 @@ class WorkerManager:
root_logger = logging.getLogger()
root_logger.removeHandler(handler)
logger.debug(
f"[WorkerManager] Disabled logging for worker: {worker_id}"
)
# Flush any buffered stdout/log data for this worker
try:
self.flush_worker_stdout(worker_id)
except Exception:
pass
logger.debug(
f"[WorkerManager] Disabled logging for worker: {worker_id}"
)
except Exception as e:
logger.error(
f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}",
@@ -508,6 +521,10 @@ class WorkerManager:
True if update was successful
"""
try:
try:
self.flush_worker_stdout(worker_id)
except Exception:
pass
kwargs = {
"status": result,
"completed_at": datetime.now().isoformat()
@@ -742,17 +759,119 @@ class WorkerManager:
Returns:
True if append was successful
"""
if not text:
return True
now = time.monotonic()
step_label = self._get_last_step(worker_id)
key = (worker_id, channel)
pending_flush: List[Tuple[str, str, Optional[str], str]] = []
try:
with self._lock:
# Initialize last flush time for this buffer
if key not in self._stdout_last_flush:
self._stdout_last_flush[key] = now
current_step = self._stdout_buffer_steps.get(key)
if current_step is None:
self._stdout_buffer_steps[key] = step_label
current_step = step_label
# If step changes, flush existing buffer to keep step tags coherent
if current_step != step_label:
buffered = "".join(self._stdout_buffers.get(key, []))
if buffered:
pending_flush.append((worker_id, channel, current_step, buffered))
self._stdout_buffers[key] = []
self._stdout_buffer_sizes[key] = 0
self._stdout_last_flush[key] = now
self._stdout_buffer_steps[key] = step_label
buf = self._stdout_buffers.setdefault(key, [])
buf.append(text)
size = self._stdout_buffer_sizes.get(key, 0) + len(text)
self._stdout_buffer_sizes[key] = size
last_flush = self._stdout_last_flush.get(key, now)
should_flush = (
size >= self._stdout_flush_bytes
or (now - last_flush) >= self._stdout_flush_interval
)
if should_flush:
buffered = "".join(self._stdout_buffers.get(key, []))
if buffered:
pending_flush.append(
(worker_id, channel, self._stdout_buffer_steps.get(key), buffered)
)
self._stdout_buffers[key] = []
self._stdout_buffer_sizes[key] = 0
self._stdout_last_flush[key] = now
self._stdout_buffer_steps[key] = None
except Exception as e:
logger.error(f"[WorkerManager] Error buffering stdout: {e}", exc_info=True)
return False
ok = True
for wid, ch, step, payload in pending_flush:
try:
with self._db_lock:
result = self.db.append_worker_stdout(
wid,
payload,
step=step,
channel=ch
)
ok = ok and result
except Exception as e:
logger.error(
f"[WorkerManager] Error flushing stdout for {wid}: {e}",
exc_info=True,
)
ok = False
return ok
def flush_worker_stdout(self, worker_id: str) -> bool:
"""Flush any buffered stdout/log data for a worker."""
keys_to_flush: List[Tuple[str, str]] = []
with self._lock:
for key in list(self._stdout_buffers.keys()):
if key[0] == worker_id:
keys_to_flush.append(key)
ok = True
for wid, channel in keys_to_flush:
ok = self._flush_stdout_buffer(wid, channel) and ok
return ok
def _flush_stdout_buffer(self, worker_id: str, channel: str) -> bool:
key = (worker_id, channel)
with self._lock:
chunks = self._stdout_buffers.get(key)
if not chunks:
return True
text = "".join(chunks)
step = self._stdout_buffer_steps.get(key)
self._stdout_buffers[key] = []
self._stdout_buffer_sizes[key] = 0
self._stdout_last_flush[key] = time.monotonic()
self._stdout_buffer_steps[key] = None
if not text:
return True
try:
step_label = self._get_last_step(worker_id)
with self._db_lock:
return self.db.append_worker_stdout(
worker_id,
text,
step=step_label,
channel=channel
step=step,
channel=channel,
)
except Exception as e:
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
logger.error(
f"[WorkerManager] Error flushing stdout for {worker_id}: {e}",
exc_info=True,
)
return False
def get_stdout(self, worker_id: str) -> str:
@@ -799,6 +918,17 @@ class WorkerManager:
def close(self) -> None:
"""Close the worker manager and database connection."""
self.stop_auto_refresh()
try:
self._flush_all_stdout_buffers()
except Exception:
pass
with self._db_lock:
self.db.close()
logger.info("[WorkerManager] Closed")
def _flush_all_stdout_buffers(self) -> None:
keys_to_flush: List[Tuple[str, str]] = []
with self._lock:
keys_to_flush = list(self._stdout_buffers.keys())
for wid, channel in keys_to_flush:
self._flush_stdout_buffer(wid, channel)