fdf
This commit is contained in:
162
API/folder.py
162
API/folder.py
@@ -1842,8 +1842,21 @@ class LocalLibraryInitializer:
|
||||
self.db.connection.commit()
|
||||
self._import_sidecars_batch()
|
||||
self.db.connection.commit()
|
||||
|
||||
# Ensure files without sidecars are still imported + renamed to hash.
|
||||
self._hash_and_rename_non_sidecar_media_files()
|
||||
self.db.connection.commit()
|
||||
|
||||
self._cleanup_orphaned_sidecars()
|
||||
self.db.connection.commit()
|
||||
|
||||
try:
|
||||
cursor = self.db.connection.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM files")
|
||||
row = cursor.fetchone()
|
||||
self.stats['files_total_db'] = int(row[0]) if row and row[0] is not None else 0
|
||||
except Exception:
|
||||
self.stats['files_total_db'] = 0
|
||||
|
||||
logger.info(f"Library scan complete. Stats: {self.stats}")
|
||||
return self.stats
|
||||
@@ -1853,12 +1866,140 @@ class LocalLibraryInitializer:
|
||||
raise
|
||||
finally:
|
||||
self.db.close()
|
||||
|
||||
def _hash_and_rename_non_sidecar_media_files(self) -> None:
|
||||
"""Ensure media files are hash-named even when they have no sidecars.
|
||||
|
||||
This keeps the library stable across restarts:
|
||||
- New files get hashed + renamed to <sha256><ext>
|
||||
- DB file_path is updated by hash so the same file isn't re-counted as "new".
|
||||
"""
|
||||
try:
|
||||
renamed = 0
|
||||
skipped_existing_target = 0
|
||||
duplicates_quarantined = 0
|
||||
|
||||
for file_path in self._find_media_files():
|
||||
try:
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
stem = file_path.stem.lower()
|
||||
is_hash_named = len(stem) == 64 and all(ch in "0123456789abcdef" for ch in stem)
|
||||
if is_hash_named:
|
||||
continue
|
||||
|
||||
# If any sidecars exist for this file, let the sidecar importer handle it.
|
||||
if (
|
||||
file_path.with_name(file_path.name + ".tag").exists()
|
||||
or file_path.with_name(file_path.name + ".metadata").exists()
|
||||
or file_path.with_name(file_path.name + ".notes").exists()
|
||||
):
|
||||
continue
|
||||
|
||||
file_hash = sha256_file(file_path)
|
||||
target_path = file_path.with_name(f"{file_hash}{file_path.suffix}")
|
||||
|
||||
# Ensure the DB entry exists with a title tag derived from the original filename.
|
||||
# This intentionally happens BEFORE rename.
|
||||
self.db.get_or_create_file_entry(file_path, file_hash)
|
||||
|
||||
if target_path == file_path:
|
||||
continue
|
||||
|
||||
if target_path.exists():
|
||||
skipped_existing_target += 1
|
||||
# The canonical file already exists as a hash-named file. Keep the DB pointing
|
||||
# at the canonical hash-named path and quarantine this duplicate so it doesn't
|
||||
# get counted as "new" again on future restarts.
|
||||
try:
|
||||
cursor = self.db.connection.cursor()
|
||||
cursor.execute(
|
||||
"UPDATE files SET file_path = ?, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
|
||||
(str(target_path.resolve()), file_hash),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(f"Failed to reset DB path to canonical file for {file_hash}: {exc}")
|
||||
|
||||
try:
|
||||
dup_dir = self.library_root / ".duplicates"
|
||||
dup_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
dest = dup_dir / file_path.name
|
||||
if dest.exists():
|
||||
ts = int(datetime.now().timestamp())
|
||||
dest = dup_dir / f"{file_path.stem}__dup__{ts}{file_path.suffix}"
|
||||
|
||||
logger.warning(
|
||||
f"Duplicate content (hash={file_hash}) detected; moving {file_path} -> {dest}"
|
||||
)
|
||||
file_path.rename(dest)
|
||||
duplicates_quarantined += 1
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
f"Duplicate content (hash={file_hash}) detected but could not quarantine {file_path}: {exc}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
file_path.rename(target_path)
|
||||
except Exception as exc:
|
||||
logger.warning(f"Failed to rename {file_path} -> {target_path}: {exc}")
|
||||
self.stats['errors'] += 1
|
||||
continue
|
||||
|
||||
# Update DB path by hash (more robust than matching the old path).
|
||||
try:
|
||||
cursor = self.db.connection.cursor()
|
||||
cursor.execute(
|
||||
"UPDATE files SET file_path = ?, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
|
||||
(str(target_path.resolve()), file_hash),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Ensure basic metadata exists.
|
||||
try:
|
||||
stat_result = target_path.stat()
|
||||
self.db.save_metadata(
|
||||
target_path,
|
||||
{
|
||||
"hash": file_hash,
|
||||
"ext": target_path.suffix,
|
||||
"size": stat_result.st_size,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
renamed += 1
|
||||
except Exception as exc:
|
||||
logger.warning(f"Error hashing/renaming file {file_path}: {exc}")
|
||||
self.stats['errors'] += 1
|
||||
|
||||
if renamed:
|
||||
self.stats['files_hashed_renamed'] = int(self.stats.get('files_hashed_renamed', 0) or 0) + renamed
|
||||
if skipped_existing_target:
|
||||
self.stats['files_hashed_skipped_target_exists'] = int(
|
||||
self.stats.get('files_hashed_skipped_target_exists', 0) or 0
|
||||
) + skipped_existing_target
|
||||
if duplicates_quarantined:
|
||||
self.stats['duplicates_quarantined'] = int(self.stats.get('duplicates_quarantined', 0) or 0) + duplicates_quarantined
|
||||
except Exception as exc:
|
||||
logger.error(f"Error hashing/renaming non-sidecar media files: {exc}", exc_info=True)
|
||||
self.stats['errors'] += 1
|
||||
|
||||
def _find_media_files(self) -> List[Path]:
|
||||
"""Find all media files in the library folder."""
|
||||
media_files = []
|
||||
try:
|
||||
for file_path in self.library_root.rglob("*"):
|
||||
# Don't repeatedly re-scan quarantined duplicates.
|
||||
try:
|
||||
if ".duplicates" in file_path.parts:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
if file_path.is_file() and file_path.suffix.lower() in MEDIA_EXTENSIONS:
|
||||
media_files.append(file_path)
|
||||
except Exception as e:
|
||||
@@ -1882,7 +2023,7 @@ class LocalLibraryInitializer:
|
||||
logger.error(f"Error getting database files: {e}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _process_file(self, file_path: Path, db_files: Dict[str, int]) -> None:
|
||||
def _process_file(self, file_path: Path, db_files: Dict[str, str]) -> None:
|
||||
"""Process a single media file."""
|
||||
try:
|
||||
normalized = str(file_path.resolve()).lower()
|
||||
@@ -1890,8 +2031,23 @@ class LocalLibraryInitializer:
|
||||
if normalized in db_files:
|
||||
self.stats['files_existing'] += 1
|
||||
else:
|
||||
self.db.get_or_create_file_entry(file_path)
|
||||
self.stats['files_new'] += 1
|
||||
# Path not known. If this file's hash is already in DB, it's duplicate content and
|
||||
# should not be counted as "new".
|
||||
file_hash = sha256_file(file_path)
|
||||
try:
|
||||
cursor = self.db.connection.cursor()
|
||||
cursor.execute("SELECT 1 FROM files WHERE hash = ?", (file_hash,))
|
||||
exists_by_hash = cursor.fetchone() is not None
|
||||
except Exception:
|
||||
exists_by_hash = False
|
||||
|
||||
if exists_by_hash:
|
||||
self.stats['files_existing'] += 1
|
||||
self.stats['duplicates_found'] = int(self.stats.get('duplicates_found', 0) or 0) + 1
|
||||
logger.info(f"Duplicate content detected during scan (hash={file_hash}): {file_path}")
|
||||
else:
|
||||
self.db.get_or_create_file_entry(file_path, file_hash)
|
||||
self.stats['files_new'] += 1
|
||||
|
||||
self.stats['files_scanned'] += 1
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user