This commit is contained in:
nose
2025-12-20 23:57:44 -08:00
parent b75faa49a2
commit 8ca5783970
39 changed files with 4294 additions and 1722 deletions

View File

@@ -258,6 +258,7 @@ class API_folder_store:
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(file_path)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_hash ON tags(hash)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_metadata_ext ON metadata(ext)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_id ON worker(worker_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_status ON worker(status)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)")
@@ -1858,6 +1859,73 @@ class DatabaseAPI:
)
return {row[0] for row in cursor.fetchall()}
def get_file_hashes_by_ext(self, ext_value: str, limit: Optional[int] = None) -> Set[str]:
"""Get hashes of files whose metadata ext matches the given extension.
Matches case-insensitively and ignores any leading '.' in stored ext.
Supports glob wildcards '*' and '?' in the query.
"""
ext_clean = str(ext_value or "").strip().lower().lstrip(".")
ext_clean = "".join(ch for ch in ext_clean if ch.isalnum())
if not ext_clean:
return set()
cursor = self.get_cursor()
has_glob = ("*" in ext_value) or ("?" in ext_value)
if has_glob:
pattern = str(ext_value or "").strip().lower().lstrip(".")
pattern = pattern.replace("%", "\\%").replace("_", "\\_")
pattern = pattern.replace("*", "%").replace("?", "_")
cursor.execute(
"""
SELECT DISTINCT f.hash
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) LIKE ? ESCAPE '\\'
LIMIT ?
""",
(pattern, limit or 10000),
)
else:
cursor.execute(
"""
SELECT DISTINCT f.hash
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ?
LIMIT ?
""",
(ext_clean, limit or 10000),
)
return {row[0] for row in cursor.fetchall()}
def get_files_by_ext(self, ext_value: str, limit: Optional[int] = None) -> List[tuple]:
"""Get files whose metadata ext matches the given extension.
Returns (hash, file_path, size, ext) tuples.
"""
ext_clean = str(ext_value or "").strip().lower().lstrip(".")
ext_clean = "".join(ch for ch in ext_clean if ch.isalnum())
if not ext_clean:
return []
cursor = self.get_cursor()
cursor.execute(
"""
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ?
ORDER BY f.file_path
LIMIT ?
""",
(ext_clean, limit or 10000),
)
return cursor.fetchall()
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
"""Get files that have any non-empty URL metadata.