This commit is contained in:
nose
2025-12-03 15:18:57 -08:00
parent 89aa24961b
commit 5e4df11dbf
12 changed files with 1953 additions and 346 deletions

View File

@@ -19,6 +19,8 @@ from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any, List, Tuple, Set
from .utils import sha256_file
logger = logging.getLogger(__name__)
# Try to import optional dependencies
@@ -455,6 +457,18 @@ class LocalLibraryDB:
logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True)
raise
def get_file_id(self, file_path: Path) -> Optional[int]:
"""Get the file ID for a file path, or None if not found."""
try:
str_path = str(file_path.resolve())
cursor = self.connection.cursor()
cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,))
row = cursor.fetchone()
return row[0] if row else None
except Exception as e:
logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True)
return None
def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""Get metadata for a file."""
try:
@@ -748,6 +762,177 @@ class LocalLibraryDB:
logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True)
raise
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two local files.
Args:
file_path: Path to the file being related
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related')
"""
try:
str_path = str(file_path.resolve())
str_related_path = str(related_file_path.resolve())
file_id = self.get_or_create_file_entry(file_path)
related_file_id = self.get_or_create_file_entry(related_file_path)
cursor = self.connection.cursor()
# Get hashes for both files
file_hash = sha256_file(file_path)
related_file_hash = sha256_file(related_file_path)
if not file_hash or not related_file_hash:
logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}")
return
# Store the hashes in the files table for future lookups
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (file_hash, file_id))
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (related_file_hash, related_file_id))
# Get current relationships
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
# Use index access to be safe regardless of row_factory
relationships_str = row[0] if row else None
try:
if relationships_str:
relationships = json.loads(relationships_str)
else:
relationships = {}
except (json.JSONDecodeError, TypeError):
relationships = {}
# Ensure relationships is a dict (handle case where DB has a list)
if not isinstance(relationships, dict):
relationships = {}
# Ensure rel_type key exists
if rel_type not in relationships:
relationships[rel_type] = []
# Add the relationship (store as hash string)
if related_file_hash not in relationships[rel_type]:
relationships[rel_type].append(related_file_hash)
# Save the updated relationships for the main file
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships)))
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
# Set reverse relationship (bidirectional)
# For 'alt' and 'related', the reverse is the same
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
reverse_type = rel_type
# Update the related file
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (related_file_id,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
if relationships_str:
reverse_relationships = json.loads(relationships_str)
else:
reverse_relationships = {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
if reverse_type not in reverse_relationships:
reverse_relationships[reverse_type] = []
if file_hash not in reverse_relationships[reverse_type]:
reverse_relationships[reverse_type].append(file_hash)
# Save the updated reverse relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (related_file_id, json.dumps(reverse_relationships)))
self.connection.commit()
except Exception as e:
logger.error(f"Error setting relationship: {e}", exc_info=True)
raise
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path.
Args:
target_path: The file path to look for in other files' relationships
Returns:
List of dicts with {path, type} for files pointing to target
"""
try:
# Get the hash of the target file
target_hash = sha256_file(target_path)
if not target_hash:
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
return []
cursor = self.connection.cursor()
# Scan all metadata (this might be slow on huge DBs but fine for local library)
# We select file_path and relationships json
cursor.execute("""
SELECT f.file_path, m.relationships
FROM metadata m
JOIN files f ON m.file_id = f.id
WHERE m.relationships LIKE ?
""", (f"%{target_hash}%",))
results = []
for row in cursor.fetchall():
f_path = row[0]
rels_json = row[1]
try:
rels = json.loads(rels_json)
if isinstance(rels, dict):
for r_type, hashes in rels.items():
if isinstance(hashes, list):
# Check if target hash is in this relationship type
if target_hash in hashes:
results.append({
"path": f_path,
"type": r_type
})
except (json.JSONDecodeError, TypeError):
continue
return results
except Exception as e:
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
return []
def get_note(self, file_path: Path) -> Optional[str]:
"""Get note for a file."""
try:
@@ -1076,6 +1261,11 @@ class LocalLibraryDB:
if not text:
return True
try:
# Check if connection is valid
if not self.connection:
logger.warning(f"Database connection not available for worker {worker_id}")
return False
cursor = self.connection.cursor()
cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,))
row = cursor.fetchone()
@@ -1097,6 +1287,13 @@ class LocalLibraryDB:
self.connection.commit()
return cursor.rowcount > 0
except sqlite3.ProgrammingError as e:
# Handle "Cannot operate on a closed database" gracefully
if "closed database" in str(e).lower():
logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}")
return False
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False
except Exception as e:
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False
@@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer:
if not self.db:
return None
return self.db.search_by_hash(file_hash)
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two files in the database.
Delegates to LocalLibraryDB.set_relationship().
Args:
file_path: Path to the first file
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related', etc.)
"""
if not self.db:
return
self.db.set_relationship(file_path, related_file_path, rel_type)
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path."""
if not self.db:
return []
return self.db.find_files_pointing_to(target_path)