This commit is contained in:
2026-01-16 01:47:00 -08:00
parent 41e95d0360
commit 12436e5a6a
4 changed files with 492 additions and 130 deletions

View File

@@ -217,6 +217,7 @@ class API_folder_store:
self.connection: Optional[sqlite3.Connection] = None
# Use the shared lock
self._db_lock = self._shared_db_lock
mm_debug(f"[folder-db] init: root={self.library_root} db={self.db_path}")
self._init_db()
@contextmanager
@@ -284,6 +285,7 @@ class API_folder_store:
"""Initialize database connection and create tables if needed."""
with self._with_db_lock():
try:
mm_debug(f"[folder-db] opening sqlite db: {self.db_path}")
# Ensure the library root exists; sqlite cannot create parent dirs.
try:
# User safety: Folder store must be created in a blank folder/no files in it.
@@ -326,6 +328,7 @@ class API_folder_store:
timeout=20.0
)
self.connection.row_factory = sqlite3.Row
mm_debug(f"[folder-db] sqlite connection opened: {self.db_path}")
# Ensure busy_timeout is set immediately for all subsequent ops (including pragmas)
try:
@@ -337,7 +340,14 @@ class API_folder_store:
# 1. WAL mode for better concurrency and fewer locks
self.connection.execute("PRAGMA journal_mode=WAL")
# 2. auto_vacuum=FULL to automatically reclaim space from deleted rows/logs
self.connection.execute("PRAGMA auto_vacuum = FULL")
try:
self.connection.execute("PRAGMA auto_vacuum = FULL")
except sqlite3.OperationalError as exc:
if "locked" not in str(exc).lower():
raise
logger.warning(
"Database locked; skipping PRAGMA auto_vacuum setup for this session."
)
# 3. Increase page size for modern file systems
self.connection.execute("PRAGMA page_size = 4096")
# 4. Memory and Sync optimizations
@@ -2657,12 +2667,30 @@ class DatabaseAPI:
def __init__(self, search_dir: Path):
self.search_dir = expand_path(search_dir).resolve()
self.db = API_folder_store(self.search_dir)
try:
mm_debug(
f"[folder-db] DatabaseAPI init: root={self.search_dir} db={self.db.db_path}"
)
except Exception:
pass
def __enter__(self):
try:
mm_debug(
f"[folder-db] DatabaseAPI enter: root={self.search_dir} db={self.db.db_path}"
)
except Exception:
pass
self.db.__enter__()
return self
def __exit__(self, *args):
try:
mm_debug(
f"[folder-db] DatabaseAPI exit: root={self.search_dir} db={self.db.db_path}"
)
except Exception:
pass
return self.db.__exit__(*args)
def get_cursor(self):
@@ -2730,6 +2758,9 @@ class DatabaseAPI:
def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
"""Get hashes of files that have any non-empty URL metadata."""
mm_debug(
f"[folder-db] get_file_hashes_with_any_url start: limit={limit or 10000}"
)
cursor = self.get_cursor()
cursor.execute(
"""
@@ -2744,8 +2775,11 @@ class DatabaseAPI:
(limit or 10000,
),
)
return {row[0]
for row in cursor.fetchall()}
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_file_hashes_with_any_url done: {len(rows)} row(s)"
)
return {row[0] for row in rows}
def get_file_hashes_by_url_like(
self,
@@ -2753,6 +2787,9 @@ class DatabaseAPI:
limit: Optional[int] = None
) -> Set[str]:
"""Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
mm_debug(
f"[folder-db] get_file_hashes_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
)
cursor = self.get_cursor()
cursor.execute(
"""
@@ -2766,8 +2803,11 @@ class DatabaseAPI:
(like_pattern.lower(),
limit or 10000),
)
return {row[0]
for row in cursor.fetchall()}
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_file_hashes_by_url_like done: {len(rows)} row(s)"
)
return {row[0] for row in rows}
def get_file_hashes_by_ext(self,
ext_value: str,
@@ -2847,14 +2887,18 @@ class DatabaseAPI:
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
"""Get files that have any non-empty URL metadata.
Returns (hash, file_path, size, ext) tuples.
Returns (hash, file_path, size, ext, url) tuples.
"""
mm_debug(
f"[folder-db] get_files_with_any_url start: limit={limit or 10000}"
)
cursor = self.get_cursor()
cursor.execute(
"""
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
COALESCE(m.url, '') as url
FROM file f
JOIN metadata m ON f.hash = m.hash
WHERE m.url IS NOT NULL
@@ -2866,21 +2910,29 @@ class DatabaseAPI:
(limit or 10000,
),
)
return cursor.fetchall()
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_files_with_any_url done: {len(rows)} row(s)"
)
return rows
def get_files_by_url_like(self,
like_pattern: str,
limit: Optional[int] = None) -> List[tuple]:
"""Get files whose URL metadata contains a substring (case-insensitive).
Returns (hash, file_path, size, ext) tuples.
Returns (hash, file_path, size, ext, url) tuples.
"""
mm_debug(
f"[folder-db] get_files_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
)
cursor = self.get_cursor()
cursor.execute(
"""
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
COALESCE(m.url, '') as url
FROM file f
JOIN metadata m ON f.hash = m.hash
WHERE m.url IS NOT NULL
@@ -2891,7 +2943,11 @@ class DatabaseAPI:
(like_pattern.lower(),
limit or 10000),
)
return cursor.fetchall()
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_files_by_url_like done: {len(rows)} row(s)"
)
return rows
def get_file_metadata(self,
file_hashes: Set[str],
@@ -2899,6 +2955,9 @@ class DatabaseAPI:
"""Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
if not file_hashes:
return []
mm_debug(
f"[folder-db] get_file_metadata start: hashes={len(file_hashes)} limit={limit or len(file_hashes)}"
)
cursor = self.get_cursor()
placeholders = ",".join(["?"] * len(file_hashes))
fetch_sql = f"""
@@ -2911,7 +2970,11 @@ class DatabaseAPI:
LIMIT ?
"""
cursor.execute(fetch_sql, (*file_hashes, limit or len(file_hashes)))
return cursor.fetchall()
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_file_metadata done: {len(rows)} row(s)"
)
return rows
def get_all_files(self, limit: Optional[int] = None) -> List[tuple]:
"""Get all files in database. Returns (hash, file_path, size, ext) tuples."""
@@ -2932,11 +2995,18 @@ class DatabaseAPI:
def get_tags_for_file(self, file_hash: str) -> List[str]:
"""Get all tags for a file given its hash."""
mm_debug(
f"[folder-db] get_tags_for_file start: hash={file_hash}"
)
cursor = self.get_cursor()
cursor.execute("SELECT tag FROM tag WHERE hash = ?",
(file_hash,
))
return [row[0] for row in cursor.fetchall()]
rows = cursor.fetchall()
mm_debug(
f"[folder-db] get_tags_for_file done: {len(rows)} row(s)"
)
return [row[0] for row in rows]
def get_tags_by_namespace_and_file(self,
file_hash: str,