This commit is contained in:
2026-01-30 10:47:47 -08:00
parent a44b80fd1d
commit ab94c57244
5 changed files with 872 additions and 99 deletions

View File

@@ -5,21 +5,37 @@ from __future__ import annotations
import json
import sqlite3
import time
import os
import traceback
import datetime
import sys
import getpass
import hashlib
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from SYS.logger import log
from SYS.utils import expand_path
from SYS.database import db, get_config_all, save_config_value
from SYS.database import db, get_config_all, save_config_value, rows_to_config
SCRIPT_DIR = Path(__file__).resolve().parent
# Save lock settings (cross-process)
_SAVE_LOCK_DIRNAME = ".medios_save_lock"
_SAVE_LOCK_TIMEOUT = 30.0 # seconds to wait for save lock
_SAVE_LOCK_STALE_SECONDS = 3600 # consider lock stale after 1 hour
_CONFIG_CACHE: Dict[str, Any] = {}
_LAST_SAVED_CONFIG: Dict[str, Any] = {}
_CONFIG_SAVE_MAX_RETRIES = 5
_CONFIG_SAVE_RETRY_DELAY = 0.15
class ConfigSaveConflict(Exception):
"""Raised when a save would overwrite external changes present on disk."""
pass
def global_config() -> List[Dict[str, Any]]:
"""Return configuration schema for global settings."""
return [
@@ -455,6 +471,70 @@ def load_config() -> Dict[str, Any]:
_sync_alldebrid_api_key(db_config)
_CONFIG_CACHE = db_config
_LAST_SAVED_CONFIG = deepcopy(db_config)
try:
# Log a compact summary to help detect startup overwrites/mismatches
provs = list(db_config.get("provider", {}).keys()) if isinstance(db_config.get("provider"), dict) else []
stores = list(db_config.get("store", {}).keys()) if isinstance(db_config.get("store"), dict) else []
mtime = None
try:
mtime = datetime.datetime.utcfromtimestamp(db.db_path.stat().st_mtime).isoformat() + "Z"
except Exception:
mtime = None
summary = (
f"Loaded config from {db.db_path.name}: providers={len(provs)} ({', '.join(provs[:10])}{'...' if len(provs)>10 else ''}), "
f"stores={len(stores)} ({', '.join(stores[:10])}{'...' if len(stores)>10 else ''}), mtime={mtime}"
)
log(summary)
# Try to detect if the most recent audit indicates we previously saved items
# that are no longer present in the loaded config (possible overwrite/restore)
try:
audit_path = Path(db.db_path).with_name("config_audit.log")
if audit_path.exists():
last_line = None
with audit_path.open("r", encoding="utf-8") as fh:
for line in fh:
if line and line.strip():
last_line = line
if last_line:
try:
last_entry = json.loads(last_line)
last_provs = set(last_entry.get("providers") or [])
current_provs = set(provs)
missing = sorted(list(last_provs - current_provs))
if missing:
log(
f"WARNING: Config mismatch on load - last saved providers {sorted(list(last_provs))} "
f"are missing from loaded config: {missing} (last saved {last_entry.get('dt')})"
)
try:
# Write a forensic mismatch record to help diagnose potential overwrites
mismatch_path = Path(db.db_path).with_name("config_mismatch.log")
record = {
"detected": datetime.datetime.utcnow().isoformat() + "Z",
"db": str(db.db_path),
"db_mtime": mtime,
"last_saved_dt": last_entry.get("dt"),
"last_saved_providers": sorted(list(last_provs)),
"missing": missing,
}
try:
backup_dir = Path(db.db_path).with_name("config_backups")
if backup_dir.exists():
files = sorted(backup_dir.glob("medios-backup-*.db"), key=lambda p: p.stat().st_mtime, reverse=True)
record["latest_backup"] = str(files[0]) if files else None
except Exception:
pass
with mismatch_path.open("a", encoding="utf-8") as fh:
fh.write(json.dumps(record) + "\n")
except Exception:
pass
except Exception:
pass
except Exception:
pass
except Exception:
pass
return db_config
_LAST_SAVED_CONFIG = {}
@@ -466,64 +546,320 @@ def reload_config() -> Dict[str, Any]:
return load_config()
def _acquire_save_lock(timeout: float = _SAVE_LOCK_TIMEOUT):
"""Acquire a cross-process save lock implemented as a directory.
Returns the Path to the created lock directory. Raises ConfigSaveConflict
if the lock cannot be acquired within the timeout.
"""
lock_dir = Path(db.db_path).with_name(_SAVE_LOCK_DIRNAME)
start = time.time()
while True:
try:
lock_dir.mkdir(exist_ok=False)
# Write owner metadata for diagnostics
try:
(lock_dir / "owner.json").write_text(json.dumps({
"pid": os.getpid(),
"ts": time.time(),
"cmdline": " ".join(sys.argv),
}))
except Exception:
pass
return lock_dir
except FileExistsError:
# Check for stale lock
try:
owner = lock_dir / "owner.json"
if owner.exists():
data = json.loads(owner.read_text())
ts = data.get("ts") or 0
if time.time() - ts > _SAVE_LOCK_STALE_SECONDS:
try:
import shutil
shutil.rmtree(lock_dir)
continue
except Exception:
pass
else:
# No owner file; if directory is old enough consider it stale
try:
if time.time() - lock_dir.stat().st_mtime > _SAVE_LOCK_STALE_SECONDS:
import shutil
shutil.rmtree(lock_dir)
continue
except Exception:
pass
except Exception:
pass
if time.time() - start > timeout:
raise ConfigSaveConflict("Save lock busy; could not acquire in time")
time.sleep(0.1)
def _release_save_lock(lock_dir: Path) -> None:
try:
owner = lock_dir / "owner.json"
try:
if owner.exists():
owner.unlink()
except Exception:
pass
lock_dir.rmdir()
except Exception:
pass
def save_config(config: Dict[str, Any]) -> int:
global _CONFIG_CACHE, _LAST_SAVED_CONFIG
_sync_alldebrid_api_key(config)
# Acquire cross-process save lock to avoid concurrent saves from different
# processes which can lead to race conditions and DB-level overwrite.
lock_dir = None
try:
lock_dir = _acquire_save_lock()
except ConfigSaveConflict:
# Surface a clear exception to callers so they can retry or handle it.
raise
previous_config = deepcopy(_LAST_SAVED_CONFIG)
changed_count = _count_changed_entries(previous_config, config)
def _write_entries() -> int:
global _CONFIG_CACHE, _LAST_SAVED_CONFIG
count = 0
with db.transaction():
db.execute("DELETE FROM config")
# Use the transaction-provided connection directly to avoid re-acquiring
# the connection lock via db.* helpers which can lead to deadlock.
with db.transaction() as conn:
# Detect concurrent changes by reading the current DB state inside the
# same transaction before mutating it. Use the transaction connection
# directly to avoid acquiring the connection lock again (deadlock).
try:
cur = conn.cursor()
cur.execute("SELECT category, subtype, item_name, key, value FROM config")
rows = cur.fetchall()
current_disk = rows_to_config(rows)
cur.close()
except Exception:
current_disk = {}
if current_disk != _LAST_SAVED_CONFIG:
# If we have no local changes, refresh caches and skip the write.
if changed_count == 0:
log("Skip save: disk configuration changed since last load and no local changes; not writing to DB.")
# Refresh local caches to match the disk
_CONFIG_CACHE = current_disk
_LAST_SAVED_CONFIG = deepcopy(current_disk)
return 0
# Otherwise, abort to avoid overwriting external changes
raise ConfigSaveConflict(
"Configuration on disk changed since you started editing; save aborted to prevent overwrite. Reload and reapply your changes."
)
# Proceed with writing when no conflicting external changes detected
conn.execute("DELETE FROM config")
for key, value in config.items():
if key in ('store', 'provider', 'tool'):
if isinstance(value, dict):
for subtype, instances in value.items():
if not isinstance(instances, dict):
continue
if key == 'store':
for name, settings in instances.items():
if isinstance(settings, dict):
for k, v in settings.items():
save_config_value(key, subtype, name, k, v)
count += 1
else:
normalized_subtype = subtype
if key == 'provider':
normalized_subtype = _normalize_provider_name(subtype)
if not normalized_subtype:
continue
for k, v in instances.items():
save_config_value(key, normalized_subtype, "default", k, v)
count += 1
if key in ('store', 'provider', 'tool') and isinstance(value, dict):
for subtype, instances in value.items():
if not isinstance(instances, dict):
continue
if key == 'store':
for name, settings in instances.items():
if isinstance(settings, dict):
for k, v in settings.items():
val_str = json.dumps(v) if not isinstance(v, str) else v
conn.execute(
"INSERT OR REPLACE INTO config (category, subtype, item_name, key, value) VALUES (?, ?, ?, ?, ?)",
(key, subtype, name, k, val_str),
)
count += 1
else:
normalized_subtype = subtype
if key == 'provider':
normalized_subtype = _normalize_provider_name(subtype)
if not normalized_subtype:
continue
for k, v in instances.items():
val_str = json.dumps(v) if not isinstance(v, str) else v
conn.execute(
"INSERT OR REPLACE INTO config (category, subtype, item_name, key, value) VALUES (?, ?, ?, ?, ?)",
(key, normalized_subtype, "default", k, val_str),
)
count += 1
else:
if not key.startswith("_") and value is not None:
save_config_value("global", "none", "none", key, value)
val_str = json.dumps(value) if not isinstance(value, str) else value
conn.execute(
"INSERT OR REPLACE INTO config (category, subtype, item_name, key, value) VALUES (?, ?, ?, ?, ?)",
("global", "none", "none", key, val_str),
)
count += 1
return count
saved_entries = 0
attempts = 0
while True:
try:
saved_entries = _write_entries()
# Central log entry
log(
f"Synced {saved_entries} entries to {db.db_path} "
f"({changed_count} changed entries)"
)
# Try to checkpoint WAL to ensure main DB file reflects latest state.
# Use a separate short-lived connection to perform the checkpoint so
# we don't contend with our main connection lock or active transactions.
try:
try:
with sqlite3.connect(str(db.db_path), timeout=5.0) as _con:
_con.execute("PRAGMA wal_checkpoint(TRUNCATE)")
except Exception:
with sqlite3.connect(str(db.db_path), timeout=5.0) as _con:
_con.execute("PRAGMA wal_checkpoint")
except Exception as exc:
log(f"Warning: WAL checkpoint failed: {exc}")
# Audit to disk so we can correlate saves across restarts and processes.
# Audit to disk so we can correlate saves across restarts and processes.
try:
audit_path = Path(db.db_path).with_name("config_audit.log")
# Gather non-secret summary info (provider/store names)
provider_names = []
store_names = []
try:
pblock = config.get("provider")
if isinstance(pblock, dict):
provider_names = [str(k) for k in pblock.keys()]
except Exception:
provider_names = []
try:
sblock = config.get("store")
if isinstance(sblock, dict):
store_names = [str(k) for k in sblock.keys()]
except Exception:
store_names = []
stack = traceback.format_stack()
caller = stack[-1].strip() if stack else ""
# Try to include the database file modification time for correlation
db_mtime = None
try:
db_mtime = datetime.datetime.utcfromtimestamp(db.db_path.stat().st_mtime).isoformat() + "Z"
except Exception:
db_mtime = None
# Create a consistent timestamped backup of the DB so we can recover later
backup_path = None
try:
backup_dir = Path(db.db_path).with_name("config_backups")
backup_dir.mkdir(parents=False, exist_ok=True)
ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
candidate = backup_dir / f"medios-backup-{ts}.db"
try:
# Use sqlite backup API for a consistent copy
src_con = sqlite3.connect(str(db.db_path))
dest_con = sqlite3.connect(str(candidate))
src_con.backup(dest_con)
dest_con.close()
src_con.close()
backup_path = str(candidate)
except Exception as e:
log(f"Warning: Failed to create DB backup: {e}")
# Prune older backups (keep last 20)
try:
files = sorted(backup_dir.glob("medios-backup-*.db"), key=lambda p: p.stat().st_mtime, reverse=True)
for old in files[20:]:
try:
old.unlink()
except Exception:
pass
except Exception:
pass
except Exception:
backup_path = None
# Collect process/exec info and a short hash of the config for forensic tracing
try:
exe = sys.executable
argv = list(sys.argv)
cwd = os.getcwd()
user = getpass.getuser()
try:
cfg_hash = hashlib.md5(json.dumps(config, sort_keys=True).encode('utf-8')).hexdigest()
except Exception:
cfg_hash = None
except Exception:
exe = None
argv = None
cwd = None
user = None
cfg_hash = None
entry = {
"ts": time.time(),
"dt": datetime.datetime.utcnow().isoformat() + "Z",
"pid": os.getpid(),
"exe": exe,
"argv": argv,
"cwd": cwd,
"user": user,
"stack": "".join(stack[-20:]),
"caller": caller,
"config_hash": cfg_hash,
"saved_entries": saved_entries,
"changed_count": changed_count,
"db": str(db.db_path),
"db_mtime": db_mtime,
"backup": backup_path,
"providers": provider_names,
"stores": store_names,
}
try:
with audit_path.open("a", encoding="utf-8") as fh:
fh.write(json.dumps(entry) + "\n")
except Exception:
# Best-effort; don't fail the save if audit write fails
log("Warning: Failed to write config audit file")
except Exception:
pass
finally:
# Release the save lock we acquired earlier
try:
if lock_dir is not None and lock_dir.exists():
_release_save_lock(lock_dir)
except Exception:
pass
break
except sqlite3.OperationalError as exc:
attempts += 1
locked_error = "locked" in str(exc).lower()
if not locked_error or attempts >= _CONFIG_SAVE_MAX_RETRIES:
log(f"CRITICAL: Database write failed: {exc}")
# Ensure we release potential save lock before bubbling error
try:
if lock_dir is not None and lock_dir.exists():
_release_save_lock(lock_dir)
except Exception:
pass
raise
delay = _CONFIG_SAVE_RETRY_DELAY * attempts
log(f"Database locked; retry {attempts}/{_CONFIG_SAVE_MAX_RETRIES} in {delay:.2f}s")
time.sleep(delay)
except Exception as exc:
log(f"CRITICAL: Configuration save failed: {exc}")
try:
if lock_dir is not None and lock_dir.exists():
_release_save_lock(lock_dir)
except Exception:
pass
raise
clear_config_cache()
@@ -540,3 +876,12 @@ def load() -> Dict[str, Any]:
def save(config: Dict[str, Any]) -> int:
"""Persist *config* back to disk."""
return save_config(config)
def count_changed_entries(config: Dict[str, Any]) -> int:
"""Return the number of changed configuration entries compared to the last saved snapshot.
This is useful for user-facing messages that want to indicate how many entries
were actually modified, not the total number of rows persisted to the database.
"""
return _count_changed_entries(_LAST_SAVED_CONFIG, config)