This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -609,12 +609,153 @@ class API_folder_store:
try:
metadata[field] = json.loads(metadata[field])
except (json.JSONDecodeError, TypeError):
metadata[field] = [] if field == 'url' else []
metadata[field] = [] if field == 'url' else {}
# Ensure relationships is always a dict
if metadata.get('relationships') is None:
metadata['relationships'] = {}
if not isinstance(metadata.get('relationships'), dict):
metadata['relationships'] = {}
return metadata
except Exception as e:
logger.error(f"Error getting metadata for hash {file_hash}: {e}", exc_info=True)
return None
def set_relationship_by_hash(self, file_hash: str, related_file_hash: str, rel_type: str = "alt", *, bidirectional: bool = True) -> None:
"""Set a relationship between two files by hash.
This is the store/hash-first API. It avoids any dependency on local filesystem
paths and only requires that both hashes exist in the DB.
"""
try:
file_hash = str(file_hash or "").strip().lower()
related_file_hash = str(related_file_hash or "").strip().lower()
rel_type = str(rel_type or "alt").strip() or "alt"
if not file_hash or not related_file_hash:
raise ValueError("Missing file hash for relationship")
if file_hash == related_file_hash:
return
cursor = self.connection.cursor()
# Ensure both hashes exist in files table (metadata has FK to files)
cursor.execute("SELECT 1 FROM files WHERE hash = ?", (file_hash,))
if not cursor.fetchone():
raise ValueError(f"Hash not found in store DB: {file_hash}")
cursor.execute("SELECT 1 FROM files WHERE hash = ?", (related_file_hash,))
if not cursor.fetchone():
raise ValueError(f"Hash not found in store DB: {related_file_hash}")
# Load current relationships for the main file
cursor.execute("SELECT relationships FROM metadata WHERE hash = ?", (file_hash,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
relationships = json.loads(relationships_str) if relationships_str else {}
except (json.JSONDecodeError, TypeError):
relationships = {}
if not isinstance(relationships, dict):
relationships = {}
relationships.setdefault(rel_type, [])
if not isinstance(relationships[rel_type], list):
relationships[rel_type] = []
if related_file_hash not in relationships[rel_type]:
relationships[rel_type].append(related_file_hash)
cursor.execute(
"""
INSERT INTO metadata (hash, relationships)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
""",
(file_hash, json.dumps(relationships)),
)
if bidirectional:
# Update the related file as well
cursor.execute("SELECT relationships FROM metadata WHERE hash = ?", (related_file_hash,))
row2 = cursor.fetchone()
relationships_str2 = row2[0] if row2 else None
try:
reverse_relationships = json.loads(relationships_str2) if relationships_str2 else {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
reverse_relationships.setdefault(rel_type, [])
if not isinstance(reverse_relationships[rel_type], list):
reverse_relationships[rel_type] = []
if file_hash not in reverse_relationships[rel_type]:
reverse_relationships[rel_type].append(file_hash)
cursor.execute(
"""
INSERT INTO metadata (hash, relationships)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
""",
(related_file_hash, json.dumps(reverse_relationships)),
)
self.connection.commit()
except Exception as e:
logger.error(f"Error setting relationship by hash: {e}", exc_info=True)
raise
def find_files_pointing_to_hash(self, target_hash: str) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target hash."""
try:
target_hash = str(target_hash or "").strip().lower()
if not target_hash:
return []
cursor = self.connection.cursor()
cursor.execute(
"""
SELECT f.hash, f.file_path, m.relationships
FROM metadata m
JOIN files f ON m.hash = f.hash
WHERE m.relationships LIKE ?
""",
(f"%{target_hash}%",),
)
results: List[Dict[str, Any]] = []
for row in cursor.fetchall():
src_hash = row[0]
src_path = row[1]
rels_json = row[2]
try:
rels = json.loads(rels_json) if rels_json else {}
except (json.JSONDecodeError, TypeError):
continue
if not isinstance(rels, dict):
continue
for r_type, hashes in rels.items():
if not isinstance(hashes, list):
continue
if target_hash in [str(h or "").strip().lower() for h in hashes]:
results.append({
"hash": src_hash,
"path": src_path,
"type": r_type,
})
return results
except Exception as e:
logger.error(f"Error finding files pointing to hash {target_hash}: {e}", exc_info=True)
return []
def save_metadata(self, file_path: Path, metadata: Dict[str, Any]) -> None:
"""Save metadata for a file."""
@@ -961,7 +1102,7 @@ class API_folder_store:
logger.error(f"Error updating metadata for hash {file_hash}: {e}", exc_info=True)
raise
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt", *, bidirectional: bool = True) -> None:
"""Set a relationship between two local files.
Args:
@@ -1018,47 +1159,50 @@ class API_folder_store:
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
# Set reverse relationship (bidirectional)
# For 'alt' and 'related', the reverse is the same
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
reverse_type = rel_type
# Update the related file
cursor.execute("""
SELECT relationships FROM metadata WHERE hash = ?
""", (related_file_hash,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
if relationships_str:
reverse_relationships = json.loads(relationships_str)
else:
if bidirectional:
# Set reverse relationship (bidirectional)
# For 'alt' and 'related', the reverse is the same
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
reverse_type = rel_type
# Update the related file
cursor.execute("""
SELECT relationships FROM metadata WHERE hash = ?
""", (related_file_hash,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
if relationships_str:
reverse_relationships = json.loads(relationships_str)
else:
reverse_relationships = {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
if reverse_type not in reverse_relationships:
reverse_relationships[reverse_type] = []
if file_hash not in reverse_relationships[reverse_type]:
reverse_relationships[reverse_type].append(file_hash)
# Save the updated reverse relationships
cursor.execute("""
INSERT INTO metadata (hash, relationships)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (related_file_hash, json.dumps(reverse_relationships)))
self.connection.commit()
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
if reverse_type not in reverse_relationships:
reverse_relationships[reverse_type] = []
if file_hash not in reverse_relationships[reverse_type]:
reverse_relationships[reverse_type].append(file_hash)
# Save the updated reverse relationships
cursor.execute("""
INSERT INTO metadata (hash, relationships)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (related_file_hash, json.dumps(reverse_relationships)))
self.connection.commit()
else:
self.connection.commit()
except Exception as e:
logger.error(f"Error setting relationship: {e}", exc_info=True)
@@ -1074,44 +1218,22 @@ class API_folder_store:
List of dicts with {path, type} for files pointing to target
"""
try:
# Get the hash of the target file
target_hash = sha256_file(target_path)
# Prefer the DB's stored identity hash for the target.
target_hash = None
try:
target_hash = self.get_file_hash(target_path)
except Exception:
target_hash = None
# Fall back to hashing bytes if the path isn't known to the DB.
if not target_hash:
target_hash = sha256_file(target_path)
if not target_hash:
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
return []
cursor = self.connection.cursor()
# Scan all metadata (this might be slow on huge DBs but fine for local library)
# We select file_path and relationships json
cursor.execute("""
SELECT f.file_path, m.relationships
FROM metadata m
JOIN files f ON m.hash = f.hash
WHERE m.relationships LIKE ?
""", (f"%{target_hash}%",))
results = []
for row in cursor.fetchall():
f_path = row[0]
rels_json = row[1]
try:
rels = json.loads(rels_json)
if isinstance(rels, dict):
for r_type, hashes in rels.items():
if isinstance(hashes, list):
# Check if target hash is in this relationship type
if target_hash in hashes:
results.append({
"path": f_path,
"type": r_type
})
except (json.JSONDecodeError, TypeError):
continue
return results
return self.find_files_pointing_to_hash(target_hash)
except Exception as e:
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
return []
@@ -2630,7 +2752,7 @@ class LocalLibrarySearchOptimizer:
return None
return self.db.search_hash(file_hash)
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt", *, bidirectional: bool = True) -> None:
"""Set a relationship between two files in the database.
Delegates to LocalLibraryDB.set_relationship().
@@ -2642,7 +2764,7 @@ class LocalLibrarySearchOptimizer:
"""
if not self.db:
return
self.db.set_relationship(file_path, related_file_path, rel_type)
self.db.set_relationship(file_path, related_file_path, rel_type, bidirectional=bidirectional)
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path."""