dfdfsdd
This commit is contained in:
142
API/folder.py
142
API/folder.py
@@ -407,38 +407,53 @@ class API_folder_store:
|
||||
logger.error(f"Error clearing worker log for {worker_id}: {exc}", exc_info=True)
|
||||
|
||||
def _migrate_metadata_schema(self, cursor) -> None:
|
||||
"""Import legacy metadata from old schema if present. Existing hash-based schema is ready to use."""
|
||||
"""Ensure metadata schema is up-to-date.
|
||||
|
||||
- If a legacy schema is detected, attempt to import/upgrade (best-effort).
|
||||
- If the hash-based schema exists, add any missing columns expected by current code.
|
||||
"""
|
||||
try:
|
||||
# Check if this is a fresh new database (hash-based schema)
|
||||
cursor.execute('PRAGMA table_info(metadata)')
|
||||
existing_columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
# If hash column exists, we're already on the new schema
|
||||
if 'hash' in existing_columns:
|
||||
logger.info("Database is already using hash-based schema - no migration needed")
|
||||
return
|
||||
|
||||
# Legacy migration: If old schema exists, try to import data
|
||||
|
||||
# Legacy migration: If old schema exists, try to import data.
|
||||
# Old schema would have had: id (INTEGER PRIMARY KEY), file_hash (TEXT), etc.
|
||||
if 'id' in existing_columns and 'file_hash' in existing_columns:
|
||||
logger.info("Detected legacy metadata schema - importing to new hash-based schema")
|
||||
# This would be complex legacy migration - for now just note it
|
||||
logger.info("Legacy metadata table detected but import not yet implemented")
|
||||
if 'hash' not in existing_columns:
|
||||
if 'id' in existing_columns and 'file_hash' in existing_columns:
|
||||
logger.info("Detected legacy metadata schema - importing to new hash-based schema")
|
||||
# This would be complex legacy migration - for now just note it.
|
||||
logger.info("Legacy metadata table detected but import not yet implemented")
|
||||
return
|
||||
|
||||
# Unknown/unsupported schema; nothing we can safely do here.
|
||||
return
|
||||
|
||||
# Add any missing columns to the new schema
|
||||
for col_name, col_def in [('size', 'INTEGER'), ('ext', 'TEXT'),
|
||||
('type', 'TEXT'),
|
||||
('time_imported', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'),
|
||||
('time_modified', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP')]:
|
||||
|
||||
# Hash-based schema exists: add any missing columns expected by current code.
|
||||
# These are safe ALTER TABLE additions for older DBs.
|
||||
column_specs = {
|
||||
'size': 'INTEGER',
|
||||
'ext': 'TEXT',
|
||||
'type': 'TEXT',
|
||||
'url': 'TEXT',
|
||||
'relationships': 'TEXT',
|
||||
'duration': 'REAL',
|
||||
'time_imported': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
|
||||
'time_modified': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
|
||||
'created_at': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
|
||||
'updated_at': 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP',
|
||||
}
|
||||
|
||||
for col_name, col_def in column_specs.items():
|
||||
if col_name not in existing_columns:
|
||||
try:
|
||||
cursor.execute(f"ALTER TABLE metadata ADD COLUMN {col_name} {col_def}")
|
||||
existing_columns.add(col_name)
|
||||
logger.info(f"Added '{col_name}' column to metadata table")
|
||||
except Exception as e:
|
||||
logger.debug(f"Column '{col_name}' may already exist: {e}")
|
||||
|
||||
# Populate type column from ext if not already populated
|
||||
|
||||
# Populate type column from ext if not already populated.
|
||||
if 'type' in existing_columns and 'ext' in existing_columns:
|
||||
try:
|
||||
from SYS.utils_constant import get_type_from_ext
|
||||
@@ -451,7 +466,7 @@ class API_folder_store:
|
||||
logger.info(f"Populated type column for {len(rows)} metadata entries")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not populate type column: {e}")
|
||||
|
||||
|
||||
self.connection.commit()
|
||||
except Exception as e:
|
||||
logger.debug(f"Note: Schema import/migration completed with status: {e}")
|
||||
@@ -929,6 +944,13 @@ class API_folder_store:
|
||||
if not fields:
|
||||
return
|
||||
|
||||
# Ensure a metadata row exists so updates don't silently no-op.
|
||||
# This can happen for older DBs or entries created without explicit metadata.
|
||||
cursor.execute(
|
||||
"INSERT OR IGNORE INTO metadata (hash) VALUES (?)",
|
||||
(file_hash,),
|
||||
)
|
||||
|
||||
values.append(file_hash)
|
||||
|
||||
sql = f"UPDATE metadata SET {', '.join(fields)}, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?"
|
||||
@@ -1681,6 +1703,84 @@ class DatabaseAPI:
|
||||
)
|
||||
return {row[0] for row in cursor.fetchall()}
|
||||
|
||||
def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
|
||||
"""Get hashes of files that have any non-empty URL metadata."""
|
||||
cursor = self.get_cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT DISTINCT f.hash
|
||||
FROM files f
|
||||
JOIN metadata m ON f.hash = m.hash
|
||||
WHERE m.url IS NOT NULL
|
||||
AND TRIM(m.url) != ''
|
||||
AND TRIM(m.url) != '[]'
|
||||
LIMIT ?
|
||||
""",
|
||||
(limit or 10000,),
|
||||
)
|
||||
return {row[0] for row in cursor.fetchall()}
|
||||
|
||||
def get_file_hashes_by_url_like(self, like_pattern: str, limit: Optional[int] = None) -> Set[str]:
|
||||
"""Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
|
||||
cursor = self.get_cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT DISTINCT f.hash
|
||||
FROM files f
|
||||
JOIN metadata m ON f.hash = m.hash
|
||||
WHERE m.url IS NOT NULL
|
||||
AND LOWER(m.url) LIKE ?
|
||||
LIMIT ?
|
||||
""",
|
||||
(like_pattern.lower(), limit or 10000),
|
||||
)
|
||||
return {row[0] for row in cursor.fetchall()}
|
||||
|
||||
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
|
||||
"""Get files that have any non-empty URL metadata.
|
||||
|
||||
Returns (hash, file_path, size, ext) tuples.
|
||||
"""
|
||||
cursor = self.get_cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT f.hash, f.file_path,
|
||||
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
||||
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
|
||||
FROM files f
|
||||
JOIN metadata m ON f.hash = m.hash
|
||||
WHERE m.url IS NOT NULL
|
||||
AND TRIM(m.url) != ''
|
||||
AND TRIM(m.url) != '[]'
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""",
|
||||
(limit or 10000,),
|
||||
)
|
||||
return cursor.fetchall()
|
||||
|
||||
def get_files_by_url_like(self, like_pattern: str, limit: Optional[int] = None) -> List[tuple]:
|
||||
"""Get files whose URL metadata contains a substring (case-insensitive).
|
||||
|
||||
Returns (hash, file_path, size, ext) tuples.
|
||||
"""
|
||||
cursor = self.get_cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT f.hash, f.file_path,
|
||||
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
||||
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
|
||||
FROM files f
|
||||
JOIN metadata m ON f.hash = m.hash
|
||||
WHERE m.url IS NOT NULL
|
||||
AND LOWER(m.url) LIKE ?
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""",
|
||||
(like_pattern.lower(), limit or 10000),
|
||||
)
|
||||
return cursor.fetchall()
|
||||
|
||||
def get_file_metadata(self, file_hashes: Set[str], limit: Optional[int] = None) -> List[tuple]:
|
||||
"""Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
|
||||
if not file_hashes:
|
||||
|
||||
Reference in New Issue
Block a user