d
This commit is contained in:
@@ -6,7 +6,7 @@ persistence to database and optional auto-refresh callbacks.
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Callable
|
||||
from typing import Optional, Dict, Any, List, Callable, Tuple
|
||||
from datetime import datetime
|
||||
from threading import Thread, Lock
|
||||
import time
|
||||
@@ -270,6 +270,13 @@ class WorkerManager:
|
||||
WorkerLoggingHandler] = {} # Track active handlers
|
||||
self._worker_last_step: Dict[str,
|
||||
str] = {}
|
||||
# Buffered stdout/log batching to reduce DB lock contention.
|
||||
self._stdout_buffers: Dict[Tuple[str, str], List[str]] = {}
|
||||
self._stdout_buffer_sizes: Dict[Tuple[str, str], int] = {}
|
||||
self._stdout_buffer_steps: Dict[Tuple[str, str], Optional[str]] = {}
|
||||
self._stdout_last_flush: Dict[Tuple[str, str], float] = {}
|
||||
self._stdout_flush_bytes = 4096
|
||||
self._stdout_flush_interval = 0.75
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the database connection."""
|
||||
@@ -392,9 +399,15 @@ class WorkerManager:
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.removeHandler(handler)
|
||||
|
||||
logger.debug(
|
||||
f"[WorkerManager] Disabled logging for worker: {worker_id}"
|
||||
)
|
||||
# Flush any buffered stdout/log data for this worker
|
||||
try:
|
||||
self.flush_worker_stdout(worker_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.debug(
|
||||
f"[WorkerManager] Disabled logging for worker: {worker_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[WorkerManager] Error disabling logging for worker {worker_id}: {e}",
|
||||
@@ -508,6 +521,10 @@ class WorkerManager:
|
||||
True if update was successful
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
self.flush_worker_stdout(worker_id)
|
||||
except Exception:
|
||||
pass
|
||||
kwargs = {
|
||||
"status": result,
|
||||
"completed_at": datetime.now().isoformat()
|
||||
@@ -742,17 +759,119 @@ class WorkerManager:
|
||||
Returns:
|
||||
True if append was successful
|
||||
"""
|
||||
if not text:
|
||||
return True
|
||||
|
||||
now = time.monotonic()
|
||||
step_label = self._get_last_step(worker_id)
|
||||
key = (worker_id, channel)
|
||||
pending_flush: List[Tuple[str, str, Optional[str], str]] = []
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
# Initialize last flush time for this buffer
|
||||
if key not in self._stdout_last_flush:
|
||||
self._stdout_last_flush[key] = now
|
||||
|
||||
current_step = self._stdout_buffer_steps.get(key)
|
||||
if current_step is None:
|
||||
self._stdout_buffer_steps[key] = step_label
|
||||
current_step = step_label
|
||||
|
||||
# If step changes, flush existing buffer to keep step tags coherent
|
||||
if current_step != step_label:
|
||||
buffered = "".join(self._stdout_buffers.get(key, []))
|
||||
if buffered:
|
||||
pending_flush.append((worker_id, channel, current_step, buffered))
|
||||
self._stdout_buffers[key] = []
|
||||
self._stdout_buffer_sizes[key] = 0
|
||||
self._stdout_last_flush[key] = now
|
||||
self._stdout_buffer_steps[key] = step_label
|
||||
|
||||
buf = self._stdout_buffers.setdefault(key, [])
|
||||
buf.append(text)
|
||||
size = self._stdout_buffer_sizes.get(key, 0) + len(text)
|
||||
self._stdout_buffer_sizes[key] = size
|
||||
|
||||
last_flush = self._stdout_last_flush.get(key, now)
|
||||
should_flush = (
|
||||
size >= self._stdout_flush_bytes
|
||||
or (now - last_flush) >= self._stdout_flush_interval
|
||||
)
|
||||
if should_flush:
|
||||
buffered = "".join(self._stdout_buffers.get(key, []))
|
||||
if buffered:
|
||||
pending_flush.append(
|
||||
(worker_id, channel, self._stdout_buffer_steps.get(key), buffered)
|
||||
)
|
||||
self._stdout_buffers[key] = []
|
||||
self._stdout_buffer_sizes[key] = 0
|
||||
self._stdout_last_flush[key] = now
|
||||
self._stdout_buffer_steps[key] = None
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error buffering stdout: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
ok = True
|
||||
for wid, ch, step, payload in pending_flush:
|
||||
try:
|
||||
with self._db_lock:
|
||||
result = self.db.append_worker_stdout(
|
||||
wid,
|
||||
payload,
|
||||
step=step,
|
||||
channel=ch
|
||||
)
|
||||
ok = ok and result
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[WorkerManager] Error flushing stdout for {wid}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
ok = False
|
||||
return ok
|
||||
|
||||
def flush_worker_stdout(self, worker_id: str) -> bool:
|
||||
"""Flush any buffered stdout/log data for a worker."""
|
||||
keys_to_flush: List[Tuple[str, str]] = []
|
||||
with self._lock:
|
||||
for key in list(self._stdout_buffers.keys()):
|
||||
if key[0] == worker_id:
|
||||
keys_to_flush.append(key)
|
||||
|
||||
ok = True
|
||||
for wid, channel in keys_to_flush:
|
||||
ok = self._flush_stdout_buffer(wid, channel) and ok
|
||||
return ok
|
||||
|
||||
def _flush_stdout_buffer(self, worker_id: str, channel: str) -> bool:
|
||||
key = (worker_id, channel)
|
||||
with self._lock:
|
||||
chunks = self._stdout_buffers.get(key)
|
||||
if not chunks:
|
||||
return True
|
||||
text = "".join(chunks)
|
||||
step = self._stdout_buffer_steps.get(key)
|
||||
self._stdout_buffers[key] = []
|
||||
self._stdout_buffer_sizes[key] = 0
|
||||
self._stdout_last_flush[key] = time.monotonic()
|
||||
self._stdout_buffer_steps[key] = None
|
||||
|
||||
if not text:
|
||||
return True
|
||||
try:
|
||||
step_label = self._get_last_step(worker_id)
|
||||
with self._db_lock:
|
||||
return self.db.append_worker_stdout(
|
||||
worker_id,
|
||||
text,
|
||||
step=step_label,
|
||||
channel=channel
|
||||
step=step,
|
||||
channel=channel,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[WorkerManager] Error appending stdout: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"[WorkerManager] Error flushing stdout for {worker_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
def get_stdout(self, worker_id: str) -> str:
|
||||
@@ -799,6 +918,17 @@ class WorkerManager:
|
||||
def close(self) -> None:
|
||||
"""Close the worker manager and database connection."""
|
||||
self.stop_auto_refresh()
|
||||
try:
|
||||
self._flush_all_stdout_buffers()
|
||||
except Exception:
|
||||
pass
|
||||
with self._db_lock:
|
||||
self.db.close()
|
||||
logger.info("[WorkerManager] Closed")
|
||||
|
||||
def _flush_all_stdout_buffers(self) -> None:
|
||||
keys_to_flush: List[Tuple[str, str]] = []
|
||||
with self._lock:
|
||||
keys_to_flush = list(self._stdout_buffers.keys())
|
||||
for wid, channel in keys_to_flush:
|
||||
self._flush_stdout_buffer(wid, channel)
|
||||
|
||||
Reference in New Issue
Block a user