This commit is contained in:
nose
2025-12-03 15:18:57 -08:00
parent 89aa24961b
commit 5e4df11dbf
12 changed files with 1953 additions and 346 deletions

View File

@@ -68,8 +68,7 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config:
if db_root: if db_root:
try: try:
db = LocalLibraryDB(Path(db_root)) with LocalLibraryDB(Path(db_root)) as db:
try:
# Get tags and metadata from database # Get tags and metadata from database
tags = db.get_tags(media_path) or [] tags = db.get_tags(media_path) or []
metadata = db.get_metadata(media_path) or {} metadata = db.get_metadata(media_path) or {}
@@ -79,8 +78,6 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config:
if tags or known_urls or file_hash: if tags or known_urls or file_hash:
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)") debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
return None, file_hash, tags, known_urls return None, file_hash, tags, known_urls
finally:
db.close()
except Exception as exc: except Exception as exc:
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr) log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
except Exception: except Exception:

View File

@@ -14,22 +14,25 @@ from . import register
import models import models
import pipeline as ctx import pipeline as ctx
from helper import hydrus as hydrus_wrapper from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input
from helper.local_library import read_sidecar, find_sidecar from helper.local_library import read_sidecar, find_sidecar
CMDLET = Cmdlet( CMDLET = Cmdlet(
name="add-relationship", name="add-relationship",
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.", summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
usage="add-relationship OR add-relationship -path <file>", usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
args=[ args=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
], ],
details=[ details=[
"- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')", "- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
"- Calls Hydrus API to associate the hashes as relationships", "- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4",
"- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...')",
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)", "- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
"- Works with piped file results or -path argument for direct invocation", "- When using -king, all piped items become the specified relationship type to the king",
], ],
) )
@@ -67,6 +70,81 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
return result return result
def _resolve_king_reference(king_arg: str) -> Optional[str]:
"""Resolve a king reference like '@4' to its actual hash or path.
Supports:
- Direct hash: '0123456789abcdef...' (64 chars)
- Selection reference: '@4' (resolves from pipeline context)
Returns:
- For Hydrus items: normalized hash
- For local storage items: file path
- None if not found
"""
if not king_arg:
return None
# Check if it's already a valid hash
normalized = _normalise_hash_hex(king_arg)
if normalized:
return normalized
# Try to resolve as @N selection from pipeline context
if king_arg.startswith('@'):
try:
# Get the result items from the pipeline context
from pipeline import get_last_result_items
items = get_last_result_items()
if not items:
log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr)
return None
# Parse @N to get the index (1-based)
index_str = king_arg[1:] # Remove '@'
index = int(index_str) - 1 # Convert to 0-based
if 0 <= index < len(items):
item = items[index]
# Try to extract hash from the item (could be dict or object)
item_hash = None
if isinstance(item, dict):
# Dictionary: try common hash field names
item_hash = item.get('hash_hex') or item.get('hash') or item.get('file_hash')
else:
# Object: use getattr
item_hash = getattr(item, 'hash_hex', None) or getattr(item, 'hash', None)
if item_hash:
normalized = _normalise_hash_hex(item_hash)
if normalized:
return normalized
# If no hash, try to get file path (for local storage)
file_path = None
if isinstance(item, dict):
# Dictionary: try common path field names
file_path = item.get('file_path') or item.get('path') or item.get('target')
else:
# Object: use getattr
file_path = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'target', None)
if file_path:
return str(file_path)
log(f"Item {king_arg} has no hash or path information", file=sys.stderr)
return None
else:
log(f"Index {king_arg} out of range", file=sys.stderr)
return None
except (ValueError, IndexError) as e:
log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr)
return None
return None
@register(["add-relationship", "add-rel"]) # primary name and alias @register(["add-relationship", "add-rel"]) # primary name and alias
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus. """Associate file relationships in Hydrus.
@@ -88,6 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Parse arguments using CMDLET spec # Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET) parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None arg_path: Optional[Path] = None
king_arg = parsed.get("king") # New: explicit king argument
rel_type = parsed.get("type", "alt") # New: relationship type (default: alt)
if parsed: if parsed:
# Get the first arg value (e.g., -path) # Get the first arg value (e.g., -path)
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
@@ -98,62 +179,160 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
except Exception: except Exception:
arg_path = Path(str(arg_value)) arg_path = Path(str(arg_value))
# Get Hydrus client # Handle @N selection which creates a list
try: # Use normalize_result_input to handle both single items and lists
client = hydrus_wrapper.get_client(config) items_to_process = normalize_result_input(result)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr) if not items_to_process and not arg_path:
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
return 1 return 1
if client is None: # If no items from pipeline, just process the -path arg
log("Hydrus client unavailable", file=sys.stderr) if not items_to_process and arg_path:
return 1 items_to_process = [{"file_path": arg_path}]
# Handle @N selection which creates a list - extract the first item # Import local storage utilities
if isinstance(result, list) and len(result) > 0: from helper.local_library import LocalLibrarySearchOptimizer
result = result[0] from config import get_local_storage_path
# Check if we're in pipeline mode (have a hash) or file mode local_storage_path = get_local_storage_path(config) if config else None
file_hash = getattr(result, "hash_hex", None)
# PIPELINE MODE: Track relationships across multiple items # Check if any items have Hydrus hashes (file_hash or hash_hex fields)
if file_hash: has_hydrus_hashes = any(
file_hash = _normalise_hash_hex(file_hash) (isinstance(item, dict) and (item.get('hash_hex') or item.get('hash')))
if not file_hash: or (hasattr(item, 'hash_hex') or hasattr(item, 'hash'))
log("Invalid file hash format", file=sys.stderr) for item in items_to_process
return 1 )
# Load or initialize king hash from pipeline context # Only try to initialize Hydrus if we actually have Hydrus hashes to work with
hydrus_client = None
if has_hydrus_hashes:
try: try:
king_hash = ctx.load_value("relationship_king") hydrus_client = hydrus_wrapper.get_client(config)
except Exception: except Exception as exc:
king_hash = None log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr)
# If this is the first item, make it the king # Use local storage if it's available and either Hydrus is not available or items are local files
use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists()))
# Resolve the king reference once (if provided)
king_hash = None
if king_arg:
# Resolve the king reference (could be @4 or a direct hash)
king_hash = _resolve_king_reference(king_arg)
if not king_hash: if not king_hash:
try: log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr)
ctx.store_value("relationship_king", file_hash) return 1
log(f"Established king hash: {file_hash}", file=sys.stderr) log(f"Using king hash: {king_hash}", file=sys.stderr)
return 0 # First item just becomes the king, no relationships yet
except Exception:
pass
# If we already have a king and this is a different hash, link them # Process each item in the list
if king_hash and king_hash != file_hash: for item_idx, item in enumerate(items_to_process):
try: # Extract hash and path from current item
client.set_relationship(file_hash, king_hash, "alt") file_hash = None
log( file_path_from_result = None
f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
file=sys.stderr if isinstance(item, dict):
) file_hash = item.get("hash_hex") or item.get("hash")
return 0 file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
except Exception as exc: else:
log(f"Failed to set relationship: {exc}", file=sys.stderr) file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
# PIPELINE MODE with Hydrus: Track relationships using hash
if file_hash and hydrus_client:
file_hash = _normalise_hash_hex(file_hash)
if not file_hash:
log("Invalid file hash format", file=sys.stderr)
return 1 return 1
return 0 # If explicit -king provided, use it
if king_hash:
try:
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
file=sys.stderr
)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
else:
# Original behavior: no explicit king, first becomes king, rest become alts
try:
existing_king = ctx.load_value("relationship_king")
except Exception:
existing_king = None
# FILE MODE: Read relationships from sidecar # If this is the first item, make it the king
if not existing_king:
try:
ctx.store_value("relationship_king", file_hash)
log(f"Established king hash: {file_hash}", file=sys.stderr)
continue # Move to next item
except Exception:
pass
# If we already have a king and this is a different hash, link them
if existing_king and existing_king != file_hash:
try:
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
log(
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
file=sys.stderr
)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
# LOCAL STORAGE MODE: Handle relationships for local files
elif use_local_storage and file_path_from_result:
try:
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
if king_hash:
# king_hash is a file path from _resolve_king_reference (or a Hydrus hash)
king_file_path = Path(str(king_hash)) if king_hash else None
if king_file_path and king_file_path.exists():
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, king_file_path, rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
else:
log(f"King file not found or invalid: {king_hash}", file=sys.stderr)
return 1
else:
# Original behavior: first becomes king, rest become alts
try:
king_path = ctx.load_value("relationship_king_path")
except Exception:
king_path = None
if not king_path:
try:
ctx.store_value("relationship_king_path", str(file_path_obj))
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
continue # Move to next item
except Exception:
pass
if king_path and king_path != str(file_path_obj):
try:
with LocalLibrarySearchOptimizer(local_storage_path) as db:
db.set_relationship(file_path_obj, Path(king_path), rel_type)
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
except Exception as exc:
log(f"Failed to set relationship: {exc}", file=sys.stderr)
return 1
except Exception as exc:
log(f"Local storage error: {exc}", file=sys.stderr)
return 1
return 0
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr) log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
# Resolve media path from -path arg or result target # Resolve media path from -path arg or result target
@@ -235,7 +414,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
continue continue
try: try:
client.set_relationship(file_hash, related_hash, rel_type) hydrus_client.set_relationship(file_hash, related_hash, rel_type)
log( log(
f"[add-relationship] Set {rel_type} relationship: " f"[add-relationship] Set {rel_type} relationship: "
f"{file_hash} <-> {related_hash}", f"{file_hash} <-> {related_hash}",

View File

@@ -0,0 +1,168 @@
"""Delete file relationships."""
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence
import json
from pathlib import Path
import sys
from helper.logger import log
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input
from helper.local_library import LocalLibrarySearchOptimizer
from config import get_local_storage_path
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Delete relationships from files.
Args:
result: Input result(s) from previous cmdlet
args: Command arguments
config: CLI configuration
Returns:
Exit code (0 = success)
"""
try:
# Parse arguments
parsed_args = parse_cmdlet_args(args, CMDLET)
delete_all_flag = parsed_args.get("all", False)
rel_type_filter = parsed_args.get("type")
# Get storage path
local_storage_path = get_local_storage_path(config)
if not local_storage_path:
log("Local storage path not configured", file=sys.stderr)
return 1
# Normalize input
results = normalize_result_input(result)
if not results:
log("No results to process", file=sys.stderr)
return 1
deleted_count = 0
for single_result in results:
try:
# Get file path from result
file_path_from_result = None
if isinstance(single_result, dict):
file_path_from_result = (
single_result.get("file_path") or
single_result.get("path") or
single_result.get("target")
)
else:
file_path_from_result = (
getattr(single_result, "file_path", None) or
getattr(single_result, "path", None) or
getattr(single_result, "target", None) or
str(single_result)
)
if not file_path_from_result:
log("Could not extract file path from result", file=sys.stderr)
return 1
file_path_obj = Path(str(file_path_from_result))
if not file_path_obj.exists():
log(f"File not found: {file_path_obj}", file=sys.stderr)
return 1
with LocalLibrarySearchOptimizer(local_storage_path) as db:
file_id = db.db.get_file_id(file_path_obj)
if not file_id:
log(f"File not in database: {file_path_obj.name}", file=sys.stderr)
continue
# Get current relationships
cursor = db.db.connection.cursor()
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
if not row:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
relationships_str = row[0]
if not relationships_str:
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
try:
relationships = json.loads(relationships_str)
except json.JSONDecodeError:
log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr)
continue
if not isinstance(relationships, dict):
relationships = {}
# Determine what to delete
if delete_all_flag:
# Delete all relationships
deleted_types = list(relationships.keys())
relationships = {}
log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr)
elif rel_type_filter:
# Delete specific type
if rel_type_filter in relationships:
deleted_count_for_type = len(relationships[rel_type_filter])
del relationships[rel_type_filter]
log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr)
else:
log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr)
continue
else:
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
return 1
# Save updated relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships) if relationships else None))
db.db.connection.commit()
deleted_count += 1
except Exception as exc:
log(f"Error deleting relationship: {exc}", file=sys.stderr)
return 1
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
return 0
except Exception as exc:
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
return 1
CMDLET = Cmdlet(
name="delete-relationship",
summary="Remove relationships from files.",
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
args=[
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
],
details=[
"- Delete all relationships: pipe files | delete-relationship --all",
"- Delete specific type: pipe files | delete-relationship -type alt",
"- Delete all from file: delete-relationship -path <file> --all",
],
)

View File

@@ -243,8 +243,8 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No
# Fallback: assume file is in a library root or use its parent # Fallback: assume file is in a library root or use its parent
local_root = path_obj.parent local_root = path_obj.parent
db = LocalLibraryDB(local_root) with LocalLibraryDB(local_root) as db:
db.remove_tags(path_obj, tags) db.remove_tags(path_obj, tags)
debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)") debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)")
return True return True

View File

@@ -1224,12 +1224,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
from helper.local_library import LocalLibraryDB from helper.local_library import LocalLibraryDB
from config import get_local_storage_path from config import get_local_storage_path
# Define LazyDB proxy to avoid keeping DB connection open for long duration
class LazyDB:
def __init__(self, root):
self.root = root
def _op(self, func_name, *args, **kwargs):
try:
with LocalLibraryDB(self.root) as db:
func = getattr(db, func_name)
return func(*args, **kwargs)
except Exception as e:
# Log error but don't crash
pass
def insert_worker(self, *args, **kwargs): self._op('insert_worker', *args, **kwargs)
def update_worker_status(self, *args, **kwargs): self._op('update_worker_status', *args, **kwargs)
def append_worker_stdout(self, *args, **kwargs): self._op('append_worker_stdout', *args, **kwargs)
def close(self): pass
worker_id = str(uuid.uuid4()) worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {}) library_root = get_local_storage_path(config or {})
db = None db = None
if library_root: if library_root:
try: try:
db = LocalLibraryDB(library_root) db = LazyDB(library_root)
db.insert_worker( db.insert_worker(
worker_id, worker_id,
"download", "download",
@@ -1812,12 +1831,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
current_format_selector = format_id current_format_selector = format_id
# If it's a video-only format (has vcodec but no acodec), add bestaudio # If it's a video-only format (has vcodec but no acodec), add bestaudio
# BUT: Skip this for -section downloads because combining formats causes re-encoding
# For -section, use formats that already have audio (muxed) to avoid FFmpeg re-encoding
vcodec = url.get('vcodec', '') vcodec = url.get('vcodec', '')
acodec = url.get('acodec', '') acodec = url.get('acodec', '')
if vcodec and vcodec != "none" and (not acodec or acodec == "none"): if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
# Video-only format, add bestaudio automatically if not clip_range and not section_ranges:
current_format_selector = f"{format_id}+bestaudio" # Only add bestaudio if NOT doing -section or -clip
debug(f" Video-only format detected, automatically adding bestaudio") # For section downloads, we need muxed formats to avoid re-encoding
current_format_selector = f"{format_id}+bestaudio"
debug(f" Video-only format detected, automatically adding bestaudio")
else:
debug(f" Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)")
actual_url = url.get('source_url') actual_url = url.get('source_url')
url = actual_url # Use the actual URL for further processing url = actual_url # Use the actual URL for further processing
@@ -2488,11 +2513,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
current_format_selector = fmt.get("format_id") current_format_selector = fmt.get("format_id")
# If video-only format is selected, append +bestaudio to merge with best audio # If video-only format is selected, append +bestaudio to merge with best audio
# BUT: Skip this for -section downloads because combining formats causes re-encoding
vcodec = fmt.get("vcodec") vcodec = fmt.get("vcodec")
acodec = fmt.get("acodec") acodec = fmt.get("acodec")
if vcodec and vcodec != "none" and (not acodec or acodec == "none"): if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
current_format_selector = f"{current_format_selector}+bestaudio" if not clip_range and not section_ranges:
debug(f"Video-only format selected, appending bestaudio: {current_format_selector}") # Only add bestaudio if NOT doing -section or -clip
current_format_selector = f"{current_format_selector}+bestaudio"
debug(f"Video-only format selected, appending bestaudio: {current_format_selector}")
else:
debug(f"Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)")
debug(f"Selected format #{idx}: {current_format_selector}") debug(f"Selected format #{idx}: {current_format_selector}")
playlist_items = None # Clear so it doesn't affect download options playlist_items = None # Clear so it doesn't affect download options

View File

@@ -75,23 +75,32 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if not source_title or source_title == "Unknown": if not source_title or source_title == "Unknown":
source_title = path_obj.name source_title = path_obj.name
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
if path_obj.exists(): if path_obj.exists():
storage_path = get_local_storage_path(config) storage_path = get_local_storage_path(config)
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
if storage_path: if storage_path:
with LocalLibraryDB(storage_path) as db: with LocalLibraryDB(storage_path) as db:
metadata = db.get_metadata(path_obj) metadata = db.get_metadata(path_obj)
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
if metadata and metadata.get("relationships"): if metadata and metadata.get("relationships"):
local_db_checked = True local_db_checked = True
rels = metadata["relationships"] rels = metadata["relationships"]
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
if isinstance(rels, dict): if isinstance(rels, dict):
for rel_type, hashes in rels.items(): for rel_type, hashes in rels.items():
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
if hashes: if hashes:
for h in hashes: for h in hashes:
# Try to resolve hash to filename if possible # h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
resolved_path = db.search_by_hash(h) resolved_path = db.search_by_hash(h)
title = h title = h[:16] + "..."
path = None path = None
if resolved_path: if resolved_path and resolved_path.exists():
path = str(resolved_path) path = str(resolved_path)
# Try to get title from tags # Try to get title from tags
try: try:
@@ -114,6 +123,177 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"path": path, "path": path,
"origin": "local" "origin": "local"
}) })
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
if not any(str(r['hash']).lower() == str(path).lower() for r in found_relationships):
parent_title = parent_path_obj.stem
try:
parent_tags = db.get_tags(parent_path_obj)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
found_relationships.append({
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
"origin": "local"
})
else:
# If already in results, ensure it's marked as king if appropriate
for r in found_relationships:
if str(r['hash']).lower() == str(path).lower():
if rel_type.lower() == "alt":
r['type'] = "king"
break
# 1. Check forward relationships from parent (siblings)
parent_metadata = db.get_metadata(parent_path_obj)
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
child_path_obj = db.search_by_hash(child_h)
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
# Skip the current file we're querying
if str(child_path_obj).lower() == str(path_obj).lower():
print(f"[DEBUG] ⏭️ Skipping current file: {child_path_obj}", file=sys.stderr)
continue
# Check if already added (case-insensitive hash check)
if any(str(r['hash']).lower() == str(child_h).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
child_tags = db.get_tags(child_path_obj)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
found_relationships.append({
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
"origin": "local"
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
# Skip the current file
if str(child_path).lower() == str(path_obj).lower():
print(f"[DEBUG] ⏭️ Skipping self", file=sys.stderr)
continue
# Skip if already added (check by path, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
child_tags = db.get_tags(child_path_obj)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
found_relationships.append({
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
"origin": "local"
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
# "origin": "local"
# })
except Exception as e: except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr) log(f"Error checking local relationships: {e}", file=sys.stderr)

View File

@@ -223,6 +223,42 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pause_mode = parsed.get("pause") pause_mode = parsed.get("pause")
save_mode = parsed.get("save") save_mode = parsed.get("save")
load_mode = parsed.get("load") load_mode = parsed.get("load")
current_mode = parsed.get("current")
# Handle --current flag: emit currently playing item to pipeline
if current_mode:
items = _get_playlist()
if items is None:
debug("MPV is not running or not accessible.", file=sys.stderr)
return 1
# Find the currently playing item
current_item = None
for item in items:
if item.get("current", False):
current_item = item
break
if current_item is None:
debug("No item is currently playing.", file=sys.stderr)
return 1
# Build result object with file info
title = _extract_title_from_item(current_item)
filename = current_item.get("filename", "")
# Emit the current item to pipeline
result_obj = {
'file_path': filename,
'title': title,
'cmdlet_name': '.pipe',
'source': 'pipe',
'__pipe_index': items.index(current_item),
}
ctx.emit(result_obj)
debug(f"Emitted current item: {title}")
return 0
# Handle URL queuing # Handle URL queuing
mpv_started = False mpv_started = False
@@ -599,7 +635,7 @@ CMDLET = Cmdlet(
name=".pipe", name=".pipe",
aliases=["pipe", "playlist", "queue", "ls-pipe"], aliases=["pipe", "playlist", "queue", "ls-pipe"],
summary="Manage and play items in the MPV playlist via IPC", summary="Manage and play items in the MPV playlist via IPC",
usage=".pipe [index|url] [-clear] [-url URL]", usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL]",
args=[ args=[
CmdletArg( CmdletArg(
name="index", name="index",
@@ -643,6 +679,11 @@ CMDLET = Cmdlet(
type="flag", type="flag",
description="List saved playlists" description="List saved playlists"
), ),
CmdletArg(
name="current",
type="flag",
description="Emit the currently playing item to pipeline for further processing"
),
], ],
exec=_run exec=_run
) )

View File

@@ -88,9 +88,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try: try:
from helper.local_library import LocalLibraryDB from helper.local_library import LocalLibraryDB
db: LocalLibraryDB | None = None with LocalLibraryDB(library_root) as db:
try:
db = LocalLibraryDB(library_root)
if clear_requested: if clear_requested:
count = db.clear_finished_workers() count = db.clear_finished_workers()
log(f"Cleared {count} finished workers.") log(f"Cleared {count} finished workers.")
@@ -115,9 +113,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if selection_requested: if selection_requested:
return _render_worker_selection(db, result) return _render_worker_selection(db, result)
return _render_worker_list(db, status_filter, limit) return _render_worker_list(db, status_filter, limit)
finally:
if db:
db.close()
except Exception as exc: except Exception as exc:
log(f"Workers query failed: {exc}", file=sys.stderr) log(f"Workers query failed: {exc}", file=sys.stderr)
import traceback import traceback

View File

@@ -8,7 +8,12 @@ Lean, focused downloader without event infrastructure overhead.
""" """
from __future__ import annotations from __future__ import annotations
import re # noqa: F401 import glob # noqa: F401
import hashlib
import json # noqa: F401
import random
import re
import string
import subprocess import subprocess
import sys import sys
import time import time
@@ -157,31 +162,72 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None return None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> None: def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]:
"""Download each section separately so merge-file can combine them. """Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file. yt-dlp with multiple --download-sections args merges them into one file.
We need separate files for merge-file, so download each section individually. We need separate files for merge-file, so download each section individually.
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
thinking sections are already downloaded. The title is extracted and stored in tags.
Returns:
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
""" """
sections_list = ytdl_options.get("download_sections", []) sections_list = ytdl_options.get("download_sections", [])
if not sections_list: if not sections_list:
return return "", {}
# Download each section separately with unique output template # Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
title_from_first = None
# Download each section separately with unique output template using session ID
for section_idx, section in enumerate(sections_list, 1): for section_idx, section in enumerate(sections_list, 1):
# Build unique output template for this section # Build unique output template for this section using session-based filename
# e.g., "title.section_1_of_3.ext" for the first section # e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s") base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
output_dir_path = Path(base_outtmpl).parent
# Insert section number before extension # Use session_id + section index for temp filename
# e.g., "/path/title.hash.webm" → "/path/title.hash.section_1_of_3.webm" # e.g., "/path/{session_id}_1.%(ext)s"
filename_tmpl = f"{session_id}_{section_idx}"
if base_outtmpl.endswith(".%(ext)s"): if base_outtmpl.endswith(".%(ext)s"):
section_outtmpl = base_outtmpl.replace(".%(ext)s", f".section_{section_idx}_of_{len(sections_list)}.%(ext)s") filename_tmpl += ".%(ext)s"
else:
section_outtmpl = base_outtmpl + f".section_{section_idx}_of_{len(sections_list)}"
# Build yt-dlp command for this section # Use Path to handle separators correctly for the OS
section_outtmpl = str(output_dir_path / filename_tmpl)
# For the first section, extract metadata first (separate call)
if section_idx == 1:
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
if ytdl_options.get("cookiefile"):
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
metadata_cmd.extend(["--cookies", cookies_path])
if ytdl_options.get("noplaylist"):
metadata_cmd.append("--no-playlist")
metadata_cmd.append(url)
try:
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
if meta_result.returncode == 0 and meta_result.stdout:
try:
info_dict = json.loads(meta_result.stdout.strip())
first_section_info = info_dict
title_from_first = info_dict.get('title')
debug(f"Extracted title from metadata: {title_from_first}")
except json.JSONDecodeError:
debug("Could not parse JSON metadata")
except Exception as e:
debug(f"Error extracting metadata: {e}")
# Build yt-dlp command for downloading this section
cmd = ["yt-dlp"] cmd = ["yt-dlp"]
# Add format # Add format
@@ -212,15 +258,19 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
cmd.append(url) cmd.append(url)
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}") debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
debug(f"Command: {' '.join(cmd)}")
# Run the subprocess # Run the subprocess - don't capture output so progress is shown
try: try:
result = subprocess.run(cmd, capture_output=False, text=True) result = subprocess.run(cmd)
if result.returncode != 0: if result.returncode != 0:
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}") raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
except Exception as exc: except Exception as exc:
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
return session_id, first_section_info or {}
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
"""Build yt-dlp download options.""" """Build yt-dlp download options."""
@@ -296,8 +346,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args) # Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
base_options["download_sections"] = sections base_options["download_sections"] = sections
debug(f"Download sections configured: {', '.join(sections)}") debug(f"Download sections configured: {', '.join(sections)}")
# Force keyframes at cuts for accurate section boundaries # Note: Not using --force-keyframes-at-cuts to avoid re-encoding
base_options["force_keyframes_at_cuts"] = True # This may result in less precise cuts but faster downloads
# Add playlist items selection if provided # Add playlist items selection if provided
if opts.playlist_items: if opts.playlist_items:
@@ -751,8 +801,10 @@ def download_media(
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}") debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
# Use subprocess when download_sections are present (Python API doesn't support them properly) # Use subprocess when download_sections are present (Python API doesn't support them properly)
session_id = None
first_section_info = {}
if ytdl_options.get("download_sections"): if ytdl_options.get("download_sections"):
_download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", [])) session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
info = None info = None
else: else:
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
@@ -780,7 +832,7 @@ def download_media(
import re import re
# Get the expected filename pattern from outtmpl # Get the expected filename pattern from outtmpl
# For sections: "C:\path\title.section_1_of_3.ext", "C:\path\title.section_2_of_3.ext", etc. # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
# For non-sections: "C:\path\title.ext" # For non-sections: "C:\path\title.ext"
# Wait a moment to ensure files are fully written # Wait a moment to ensure files are fully written
@@ -791,10 +843,10 @@ def download_media(
if not files: if not files:
raise FileNotFoundError(f"No files found in {opts.output_dir}") raise FileNotFoundError(f"No files found in {opts.output_dir}")
# If we downloaded sections, look for files with .section_N_of_M pattern # If we downloaded sections, look for files with the session_id pattern
if opts.clip_sections: if opts.clip_sections and session_id:
# Pattern: "title.section_1_of_3.ext", "title.section_2_of_3.ext", etc. # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
section_pattern = re.compile(r'\.section_(\d+)_of_(\d+)\.') section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
matching_files = [f for f in files if section_pattern.search(f.name)] matching_files = [f for f in files if section_pattern.search(f.name)]
if matching_files: if matching_files:
@@ -804,13 +856,44 @@ def download_media(
return int(match.group(1)) if match else 999 return int(match.group(1)) if match else 999
matching_files.sort(key=extract_section_num) matching_files.sort(key=extract_section_num)
media_path = matching_files[0] # First section debug(f"Found {len(matching_files)} section file(s) matching pattern")
media_paths = matching_files # All sections
debug(f"✓ Downloaded {len(media_paths)} section file(s)") # Now rename section files to use hash-based names
# This ensures unique filenames for each section content
renamed_files = []
for idx, section_file in enumerate(matching_files, 1):
try:
# Calculate hash for the file
file_hash = sha256_file(section_file)
ext = section_file.suffix
new_name = f"{file_hash}{ext}"
new_path = opts.output_dir / new_name
if new_path.exists() and new_path != section_file:
# If file with same hash exists, use it and delete the temp one
debug(f"File with hash {file_hash} already exists, using existing file.")
try:
section_file.unlink()
except OSError:
pass
renamed_files.append(new_path)
else:
section_file.rename(new_path)
debug(f"Renamed section file: {section_file.name}{new_name}")
renamed_files.append(new_path)
except Exception as e:
debug(f"Failed to process section file {section_file.name}: {e}")
renamed_files.append(section_file)
media_path = renamed_files[0]
media_paths = renamed_files
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
else: else:
# Fallback to most recent file if pattern not found # Fallback to most recent file if pattern not found
media_path = files[0] media_path = files[0]
media_paths = None media_paths = None
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
else: else:
# No sections, just take the most recent file # No sections, just take the most recent file
media_path = files[0] media_path = files[0]
@@ -830,10 +913,30 @@ def download_media(
# Create result with minimal data extracted from filename # Create result with minimal data extracted from filename
file_hash = sha256_file(media_path) file_hash = sha256_file(media_path)
# For section downloads, create tags with the title and build proper info dict
tags = []
title = ''
if first_section_info:
title = first_section_info.get('title', '')
if title:
tags.append(f'title:{title}')
debug(f"Added title tag for section download: {title}")
# Build info dict - always use extracted title if available, not hash
if first_section_info:
info_dict = first_section_info
else:
info_dict = {
"id": media_path.stem,
"title": title or media_path.stem,
"ext": media_path.suffix.lstrip(".")
}
return DownloadMediaResult( return DownloadMediaResult(
path=media_path, path=media_path,
info={"id": media_path.stem, "title": media_path.stem, "ext": media_path.suffix.lstrip(".")}, info=info_dict,
tags=[], tags=tags,
source_url=opts.url, source_url=opts.url,
hash_value=file_hash, hash_value=file_hash,
paths=media_paths, # Include all section files if present paths=media_paths, # Include all section files if present

View File

@@ -137,14 +137,14 @@ class LocalStorageBackend(StorageBackend):
# Check for duplicate files using LocalLibraryDB (fast - uses index) # Check for duplicate files using LocalLibraryDB (fast - uses index)
try: try:
db = LocalLibraryDB(dest_dir) with LocalLibraryDB(dest_dir) as db:
existing_path = db.search_by_hash(file_hash) existing_path = db.search_by_hash(file_hash)
if existing_path and existing_path.exists(): if existing_path and existing_path.exists():
log( log(
f"✓ File already in local storage: {existing_path}", f"✓ File already in local storage: {existing_path}",
file=sys.stderr, file=sys.stderr,
) )
return str(existing_path) return str(existing_path)
except Exception as exc: except Exception as exc:
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr) log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
@@ -205,247 +205,156 @@ class LocalStorageBackend(StorageBackend):
# Try database search first (much faster than filesystem scan) # Try database search first (much faster than filesystem scan)
try: try:
db = LocalLibraryDB(search_dir) with LocalLibraryDB(search_dir) as db:
cursor = db.connection.cursor() cursor = db.connection.cursor()
# Check if query is a tag namespace search (format: "namespace:pattern") # Check if query is a tag namespace search (format: "namespace:pattern")
if ":" in query and not query.startswith(":"): if ":" in query and not query.startswith(":"):
namespace, pattern = query.split(":", 1) namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower() namespace = namespace.strip().lower()
pattern = pattern.strip().lower() pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}") debug(f"Performing namespace search: {namespace}:{pattern}")
# Search for tags matching the namespace and pattern # Search for tags matching the namespace and pattern
query_pattern = f"{namespace}:%" query_pattern = f"{namespace}:%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
rows = cursor.fetchall()
debug(f"Found {len(rows)} potential matches in DB")
# Filter results by pattern match
for file_id, file_path_str, size_bytes in rows:
if not file_path_str:
continue
# Get the file's tags and check if any match the pattern
cursor.execute(""" cursor.execute("""
SELECT DISTINCT tag FROM tags SELECT DISTINCT f.id, f.file_path, f.file_size
WHERE file_id = ? FROM files f
AND LOWER(tag) LIKE ? JOIN tags t ON f.id = t.file_id
""", (file_id, query_pattern)) WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tags = [row[0] for row in cursor.fetchall()] rows = cursor.fetchall()
debug(f"Found {len(rows)} potential matches in DB")
# Check if any tag matches the pattern (case-insensitive wildcard) # Filter results by pattern match
for tag in tags: for file_id, file_path_str, size_bytes in rows:
tag_lower = tag.lower() if not file_path_str:
# Extract the value part after "namespace:"
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
# Use fnmatch for wildcard matching
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch all tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
all_tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": all_tags,
})
else:
debug(f"File missing on disk: {file_path}")
break # Don't add same file multiple times
if limit is not None and len(results) >= limit:
return results
elif not match_all:
# Search by filename or simple tags (namespace-agnostic for plain text)
# For plain text search, match:
# 1. Filenames containing the query
# 2. Simple tags (without namespace) containing the query
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
# Use explicit namespace search for that (e.g., "channel:joe*")
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use custom boundary that treats underscores as separators
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
try:
# Match if not preceded or followed by alphanumeric chars
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue continue
seen_files.add(file_path_str)
file_path = Path(file_path_str) # Get the file's tags and check if any match the pattern
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute(""" cursor.execute("""
SELECT tag FROM tags WHERE file_id = ? SELECT DISTINCT tag FROM tags
""", (file_id,)) WHERE file_id = ?
AND LOWER(tag) LIKE ?
""", (file_id, query_pattern))
tags = [row[0] for row in cursor.fetchall()] tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present # Check if any tag matches the pattern (case-insensitive wildcard)
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) for tag in tags:
tag_lower = tag.lower()
# Extract the value part after "namespace:"
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
# Use fnmatch for wildcard matching
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
results.append({ # Fetch all tags for this file
"name": file_path.stem, cursor.execute("""
"title": title_tag or file_path.stem, SELECT tag FROM tags WHERE file_id = ?
"ext": file_path.suffix.lstrip('.'), """, (file_id,))
"path": path_str, all_tags = [row[0] for row in cursor.fetchall()]
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
# Also search for simple tags (without namespace) containing the query # Use title tag if present
# Only perform tag search if single term, or if we want to support multi-term tag search title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
# For now, fallback to single pattern search for tags if multiple terms
# (searching for a tag that contains "term1 term2" or "term1,term2")
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute(""" results.append({
SELECT DISTINCT f.id, f.file_path, f.file_size "name": file_path.stem,
FROM files f "title": title_tag or file_path.stem,
JOIN tags t ON f.id = t.file_id "ext": file_path.suffix.lstrip('.'),
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%' "path": path_str,
ORDER BY f.file_path "target": path_str,
LIMIT ? "origin": "local",
""", (query_pattern, limit or 1000)) "size": size_bytes,
"size_bytes": size_bytes,
tag_rows = cursor.fetchall() "tags": all_tags,
for file_id, file_path_str, size_bytes in tag_rows: })
if not file_path_str or file_path_str in seen_files: else:
continue debug(f"File missing on disk: {file_path}")
seen_files.add(file_path_str) break # Don't add same file multiple times
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit: if limit is not None and len(results) >= limit:
return results return results
else: elif not match_all:
# Match all - get all files from database # Search by filename or simple tags (namespace-agnostic for plain text)
cursor.execute(""" # For plain text search, match:
SELECT id, file_path, file_size # 1. Filenames containing the query
FROM files # 2. Simple tags (without namespace) containing the query
ORDER BY file_path # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
LIMIT ? # Use explicit namespace search for that (e.g., "channel:joe*")
""", (limit or 1000,))
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use custom boundary that treats underscores as separators
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
try:
# Match if not preceded or followed by alphanumeric chars
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue
seen_files.add(file_path_str)
rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in rows:
if file_path_str:
file_path = Path(file_path_str) file_path = Path(file_path_str)
if file_path.exists(): if file_path.exists():
path_str = str(file_path) path_str = str(file_path)
@@ -473,11 +382,102 @@ class LocalStorageBackend(StorageBackend):
"tags": tags, "tags": tags,
}) })
if results: # Also search for simple tags (without namespace) containing the query
debug(f"Returning {len(results)} results from DB") # Only perform tag search if single term, or if we want to support multi-term tag search
else: # For now, fallback to single pattern search for tags if multiple terms
debug("No results found in DB") # (searching for a tag that contains "term1 term2" or "term1,term2")
return results # This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tag_rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in tag_rows:
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit:
return results
else:
# Match all - get all files from database
cursor.execute("""
SELECT id, file_path, file_size
FROM files
ORDER BY file_path
LIMIT ?
""", (limit or 1000,))
rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in rows:
if file_path_str:
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if results:
debug(f"Returning {len(results)} results from DB")
else:
debug("No results found in DB")
return results
except Exception as e: except Exception as e:
log(f"⚠️ Database search failed: {e}", file=sys.stderr) log(f"⚠️ Database search failed: {e}", file=sys.stderr)
@@ -1175,6 +1175,161 @@ class MatrixStorageBackend(StorageBackend):
raise raise
class RemoteStorageBackend(StorageBackend):
"""File storage backend for remote Android/network storage servers.
Connects to a remote storage server (e.g., running on Android phone)
via REST API. All operations are proxied to the remote server.
"""
def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
"""Initialize remote storage backend.
Args:
server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
timeout: Request timeout in seconds
api_key: Optional API key for authentication
"""
try:
import requests
except ImportError:
raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")
self.server_url = server_url.rstrip('/')
self.timeout = timeout
self.api_key = api_key
self._session = requests.Session()
# Add API key to default headers if provided
if self.api_key:
self._session.headers.update({'X-API-Key': self.api_key})
def get_name(self) -> str:
return "remote"
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
"""Make HTTP request to remote server."""
import requests
from urllib.parse import urljoin
url = urljoin(self.server_url, endpoint)
try:
response = self._session.request(
method,
url,
timeout=self.timeout,
**kwargs
)
if response.status_code == 404:
raise Exception(f"Remote resource not found: {endpoint}")
if response.status_code >= 400:
try:
error_data = response.json()
error_msg = error_data.get('error', response.text)
except:
error_msg = response.text
raise Exception(f"Remote server error {response.status_code}: {error_msg}")
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"Connection to {self.server_url} failed: {e}")
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to remote storage.
Args:
file_path: Path to the file to upload
tags: Optional list of tags to add
urls: Optional list of known URLs
Returns:
Remote file hash
"""
from helper.utils import sha256_file
if not file_path.exists():
raise ValueError(f"File not found: {file_path}")
try:
# Index the file on remote server
data = {"path": str(file_path)}
tags = kwargs.get("tags", [])
if tags:
data["tags"] = tags
urls = kwargs.get("urls", [])
if urls:
data["urls"] = urls
result = self._request('POST', '/files/index', json=data)
file_hash = result.get('hash')
if file_hash:
log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
return file_hash
else:
raise Exception("Remote server did not return file hash")
except Exception as exc:
debug(f"Remote upload failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search files on remote storage.
Args:
query: Search query
limit: Maximum results
Returns:
List of search results
"""
limit = kwargs.get("limit")
try:
limit = int(limit) if limit is not None else 100
except (TypeError, ValueError):
limit = 100
if limit <= 0:
limit = 100
try:
response = self._request('GET', '/files/search', params={
'q': query,
'limit': limit
})
files = response.get('files', [])
# Transform remote format to standard result format
results = []
for f in files:
results.append({
"name": f.get('name', '').split('/')[-1], # Get filename from path
"title": f.get('name', f.get('path', '')).split('/')[-1],
"ext": f.get('ext', ''),
"path": f.get('path', ''),
"target": f.get('path', ''),
"hash": f.get('hash', ''),
"origin": "remote",
"size": f.get('size', 0),
"size_bytes": f.get('size', 0),
"tags": f.get('tags', []),
})
debug(f"Remote search found {len(results)} results", file=sys.stderr)
return results
except Exception as exc:
log(f"❌ Remote search failed: {exc}", file=sys.stderr)
raise
class FileStorage: class FileStorage:
"""Unified file storage interface supporting multiple backend services. """Unified file storage interface supporting multiple backend services.
@@ -1224,6 +1379,25 @@ class FileStorage:
# Include Matrix backend # Include Matrix backend
self._backends["matrix"] = MatrixStorageBackend() self._backends["matrix"] = MatrixStorageBackend()
# Include remote storage backends from config (for Android/network servers)
remote_storages = config.get("remote_storages", [])
if isinstance(remote_storages, list):
for remote_config in remote_storages:
if isinstance(remote_config, dict):
name = remote_config.get("name", "remote")
url = remote_config.get("url")
timeout = remote_config.get("timeout", 30)
api_key = remote_config.get("api_key")
if url:
try:
backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
self._backends[name] = backend
auth_status = " (with auth)" if api_key else " (no auth)"
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
except Exception as e:
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
def __getitem__(self, backend_name: str) -> StorageBackend: def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name. """Get a storage backend by name.

View File

@@ -19,6 +19,8 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Optional, Dict, Any, List, Tuple, Set from typing import Optional, Dict, Any, List, Tuple, Set
from .utils import sha256_file
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Try to import optional dependencies # Try to import optional dependencies
@@ -455,6 +457,18 @@ class LocalLibraryDB:
logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True) logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True)
raise raise
def get_file_id(self, file_path: Path) -> Optional[int]:
"""Get the file ID for a file path, or None if not found."""
try:
str_path = str(file_path.resolve())
cursor = self.connection.cursor()
cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,))
row = cursor.fetchone()
return row[0] if row else None
except Exception as e:
logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True)
return None
def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]: def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""Get metadata for a file.""" """Get metadata for a file."""
try: try:
@@ -748,6 +762,177 @@ class LocalLibraryDB:
logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True) logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True)
raise raise
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two local files.
Args:
file_path: Path to the file being related
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related')
"""
try:
str_path = str(file_path.resolve())
str_related_path = str(related_file_path.resolve())
file_id = self.get_or_create_file_entry(file_path)
related_file_id = self.get_or_create_file_entry(related_file_path)
cursor = self.connection.cursor()
# Get hashes for both files
file_hash = sha256_file(file_path)
related_file_hash = sha256_file(related_file_path)
if not file_hash or not related_file_hash:
logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}")
return
# Store the hashes in the files table for future lookups
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (file_hash, file_id))
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (related_file_hash, related_file_id))
# Get current relationships
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
# Use index access to be safe regardless of row_factory
relationships_str = row[0] if row else None
try:
if relationships_str:
relationships = json.loads(relationships_str)
else:
relationships = {}
except (json.JSONDecodeError, TypeError):
relationships = {}
# Ensure relationships is a dict (handle case where DB has a list)
if not isinstance(relationships, dict):
relationships = {}
# Ensure rel_type key exists
if rel_type not in relationships:
relationships[rel_type] = []
# Add the relationship (store as hash string)
if related_file_hash not in relationships[rel_type]:
relationships[rel_type].append(related_file_hash)
# Save the updated relationships for the main file
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships)))
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
# Set reverse relationship (bidirectional)
# For 'alt' and 'related', the reverse is the same
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
reverse_type = rel_type
# Update the related file
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (related_file_id,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
if relationships_str:
reverse_relationships = json.loads(relationships_str)
else:
reverse_relationships = {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
if reverse_type not in reverse_relationships:
reverse_relationships[reverse_type] = []
if file_hash not in reverse_relationships[reverse_type]:
reverse_relationships[reverse_type].append(file_hash)
# Save the updated reverse relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (related_file_id, json.dumps(reverse_relationships)))
self.connection.commit()
except Exception as e:
logger.error(f"Error setting relationship: {e}", exc_info=True)
raise
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path.
Args:
target_path: The file path to look for in other files' relationships
Returns:
List of dicts with {path, type} for files pointing to target
"""
try:
# Get the hash of the target file
target_hash = sha256_file(target_path)
if not target_hash:
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
return []
cursor = self.connection.cursor()
# Scan all metadata (this might be slow on huge DBs but fine for local library)
# We select file_path and relationships json
cursor.execute("""
SELECT f.file_path, m.relationships
FROM metadata m
JOIN files f ON m.file_id = f.id
WHERE m.relationships LIKE ?
""", (f"%{target_hash}%",))
results = []
for row in cursor.fetchall():
f_path = row[0]
rels_json = row[1]
try:
rels = json.loads(rels_json)
if isinstance(rels, dict):
for r_type, hashes in rels.items():
if isinstance(hashes, list):
# Check if target hash is in this relationship type
if target_hash in hashes:
results.append({
"path": f_path,
"type": r_type
})
except (json.JSONDecodeError, TypeError):
continue
return results
except Exception as e:
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
return []
def get_note(self, file_path: Path) -> Optional[str]: def get_note(self, file_path: Path) -> Optional[str]:
"""Get note for a file.""" """Get note for a file."""
try: try:
@@ -1076,6 +1261,11 @@ class LocalLibraryDB:
if not text: if not text:
return True return True
try: try:
# Check if connection is valid
if not self.connection:
logger.warning(f"Database connection not available for worker {worker_id}")
return False
cursor = self.connection.cursor() cursor = self.connection.cursor()
cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,)) cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,))
row = cursor.fetchone() row = cursor.fetchone()
@@ -1097,6 +1287,13 @@ class LocalLibraryDB:
self.connection.commit() self.connection.commit()
return cursor.rowcount > 0 return cursor.rowcount > 0
except sqlite3.ProgrammingError as e:
# Handle "Cannot operate on a closed database" gracefully
if "closed database" in str(e).lower():
logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}")
return False
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False
except Exception as e: except Exception as e:
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True) logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False return False
@@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer:
if not self.db: if not self.db:
return None return None
return self.db.search_by_hash(file_hash) return self.db.search_by_hash(file_hash)
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two files in the database.
Delegates to LocalLibraryDB.set_relationship().
Args:
file_path: Path to the first file
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related', etc.)
"""
if not self.db:
return
self.db.set_relationship(file_path, related_file_path, rel_type)
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path."""
if not self.db:
return []
return self.db.find_files_pointing_to(target_path)

View File

@@ -0,0 +1,523 @@
"""Remote Storage Server - REST API for file management on mobile devices.
This server runs on a mobile device (Android with Termux, iOS with iSH, etc.)
and exposes the local library database as a REST API. Your PC connects to this
server and uses it as a remote storage backend through the RemoteStorageBackend.
## INSTALLATION
### On Android (Termux):
1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux
2. In Termux:
$ apt update && apt install python
$ pip install flask flask-cors
3. Copy this file to your device
4. Run it (with optional API key):
$ python remote_storage_server.py --storage-path /path/to/storage --port 5000
$ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey
5. Server prints connection info automatically (IP, port, API key)
### On PC:
1. Install requests: pip install requests
2. Add to config.json:
{
"remote_storages": [
{
"name": "phone",
"url": "http://192.168.1.100:5000",
"api_key": "mysecretkey",
"timeout": 30
}
]
}
Note: API key is optional. Works on WiFi or cellular data.
## USAGE
After setup, all cmdlets work with the phone:
$ search-file zohar -store phone
$ @1-3 | add-relationship -king @4 -store phone
$ @1 | get-relationship -store phone
The server exposes REST endpoints that RemoteStorageBackend uses internally.
"""
from __future__ import annotations
import os
import sys
import json
import argparse
import logging
from pathlib import Path
from typing import Optional, Dict, Any
from datetime import datetime
from functools import wraps
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from helper.logger import log
# ============================================================================
# CONFIGURATION
# ============================================================================
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] %(levelname)s: %(message)s'
)
logger = logging.getLogger(__name__)
STORAGE_PATH: Optional[Path] = None
API_KEY: Optional[str] = None # API key for authentication (None = no auth required)
# Try importing Flask - will be used in main() only
try:
from flask import Flask, request, jsonify
from flask_cors import CORS
HAS_FLASK = True
except ImportError:
HAS_FLASK = False
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def get_local_ip() -> Optional[str]:
"""Get the local IP address that would be used for external connections."""
import socket
try:
# Create a socket to determine which interface would be used
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80)) # Google DNS
ip = s.getsockname()[0]
s.close()
return ip
except Exception:
return None
# ============================================================================
# FLASK APP FACTORY
# ============================================================================
def create_app():
"""Create and configure Flask app with all routes."""
if not HAS_FLASK:
raise ImportError("Flask not installed. Install with: pip install flask flask-cors")
from flask import Flask, request, jsonify
from flask_cors import CORS
app = Flask(__name__)
CORS(app)
# ========================================================================
# HELPER DECORATORS
# ========================================================================
def require_auth():
"""Decorator to check API key authentication if configured."""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if API_KEY:
# Get API key from header or query parameter
provided_key = request.headers.get('X-API-Key') or request.args.get('api_key')
if not provided_key or provided_key != API_KEY:
return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401
return f(*args, **kwargs)
return decorated_function
return decorator
def require_storage():
"""Decorator to ensure storage path is configured."""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if not STORAGE_PATH:
return jsonify({"error": "Storage path not configured"}), 500
return f(*args, **kwargs)
return decorated_function
return decorator
# ========================================================================
# HEALTH CHECK
# ========================================================================
@app.route('/health', methods=['GET'])
@require_auth()
def health():
"""Check server health and storage availability."""
status = {
"status": "ok",
"storage_configured": STORAGE_PATH is not None,
"timestamp": datetime.now().isoformat()
}
if STORAGE_PATH:
status["storage_path"] = str(STORAGE_PATH)
status["storage_exists"] = STORAGE_PATH.exists()
try:
from helper.local_library import LocalLibraryDB
with LocalLibraryDB(STORAGE_PATH) as db:
status["database_accessible"] = True
except Exception as e:
status["database_accessible"] = False
status["database_error"] = str(e)
return jsonify(status), 200
# ========================================================================
# FILE OPERATIONS
# ========================================================================
@app.route('/files/search', methods=['GET'])
@require_auth()
@require_storage()
def search_files():
"""Search for files by name or tag."""
from helper.local_library import LocalLibrarySearchOptimizer
query = request.args.get('q', '')
limit = request.args.get('limit', 100, type=int)
if not query:
return jsonify({"error": "Search query required"}), 400
try:
with LocalLibrarySearchOptimizer(STORAGE_PATH) as db:
results = db.search_by_name(query, limit)
tag_results = db.search_by_tag(query, limit)
all_results = {r['hash']: r for r in (results + tag_results)}
return jsonify({
"query": query,
"count": len(all_results),
"files": list(all_results.values())
}), 200
except Exception as e:
logger.error(f"Search error: {e}", exc_info=True)
return jsonify({"error": f"Search failed: {str(e)}"}), 500
@app.route('/files/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_file_metadata(file_hash: str):
"""Get metadata for a specific file by hash."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path or not file_path.exists():
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
tags = db.get_tags(file_path)
return jsonify({
"hash": file_hash,
"path": str(file_path),
"size": file_path.stat().st_size,
"metadata": metadata,
"tags": tags
}), 200
except Exception as e:
logger.error(f"Get metadata error: {e}", exc_info=True)
return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500
@app.route('/files/index', methods=['POST'])
@require_auth()
@require_storage()
def index_file():
"""Index a new file in the storage."""
from helper.local_library import LocalLibraryDB
from helper.utils import sha256_file
data = request.get_json() or {}
file_path_str = data.get('path')
tags = data.get('tags', [])
urls = data.get('urls', [])
if not file_path_str:
return jsonify({"error": "File path required"}), 400
try:
file_path = Path(file_path_str)
if not file_path.exists():
return jsonify({"error": "File does not exist"}), 404
with LocalLibraryDB(STORAGE_PATH) as db:
db.get_or_create_file_entry(file_path)
if tags:
db.add_tags(file_path, tags)
if urls:
db.add_known_urls(file_path, urls)
file_hash = sha256_file(file_path)
return jsonify({
"hash": file_hash,
"path": str(file_path),
"tags_added": len(tags),
"urls_added": len(urls)
}), 201
except Exception as e:
logger.error(f"Index error: {e}", exc_info=True)
return jsonify({"error": f"Indexing failed: {str(e)}"}), 500
# ========================================================================
# TAG OPERATIONS
# ========================================================================
@app.route('/tags/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_tags(file_hash: str):
"""Get tags for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
tags = db.get_tags(file_path)
return jsonify({"hash": file_hash, "tags": tags}), 200
except Exception as e:
logger.error(f"Get tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/tags/<file_hash>', methods=['POST'])
@require_auth()
@require_storage()
def add_tags(file_hash: str):
"""Add tags to a file."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
tags = data.get('tags', [])
mode = data.get('mode', 'add')
if not tags:
return jsonify({"error": "Tags required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
if mode == 'replace':
db.remove_tags(file_path, db.get_tags(file_path))
db.add_tags(file_path, tags)
return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200
except Exception as e:
logger.error(f"Add tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/tags/<file_hash>', methods=['DELETE'])
@require_auth()
@require_storage()
def remove_tags(file_hash: str):
"""Remove tags from a file."""
from helper.local_library import LocalLibraryDB
tags_str = request.args.get('tags', '')
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
if tags_str:
tags_to_remove = [t.strip() for t in tags_str.split(',')]
else:
tags_to_remove = db.get_tags(file_path)
db.remove_tags(file_path, tags_to_remove)
return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200
except Exception as e:
logger.error(f"Remove tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
# ========================================================================
# RELATIONSHIP OPERATIONS
# ========================================================================
@app.route('/relationships/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_relationships(file_hash: str):
"""Get relationships for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
relationships = metadata.get('relationships', {}) if metadata else {}
return jsonify({"hash": file_hash, "relationships": relationships}), 200
except Exception as e:
logger.error(f"Get relationships error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/relationships', methods=['POST'])
@require_auth()
@require_storage()
def set_relationship():
"""Set a relationship between two files."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
from_hash = data.get('from_hash')
to_hash = data.get('to_hash')
rel_type = data.get('type', 'alt')
if not from_hash or not to_hash:
return jsonify({"error": "from_hash and to_hash required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
from_path = db.search_by_hash(from_hash)
to_path = db.search_by_hash(to_hash)
if not from_path or not to_path:
return jsonify({"error": "File not found"}), 404
db.set_relationship(from_path, to_path, rel_type)
return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200
except Exception as e:
logger.error(f"Set relationship error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
# ========================================================================
# URL OPERATIONS
# ========================================================================
@app.route('/urls/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_urls(file_hash: str):
"""Get known URLs for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
urls = metadata.get('known_urls', []) if metadata else []
return jsonify({"hash": file_hash, "urls": urls}), 200
except Exception as e:
logger.error(f"Get URLs error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/urls/<file_hash>', methods=['POST'])
@require_auth()
@require_storage()
def add_urls(file_hash: str):
"""Add URLs to a file."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
urls = data.get('urls', [])
if not urls:
return jsonify({"error": "URLs required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
db.add_known_urls(file_path, urls)
return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200
except Exception as e:
logger.error(f"Add URLs error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
return app
# ============================================================================
# MAIN
# ============================================================================
def main():
if not HAS_FLASK:
print("ERROR: Flask and flask-cors required")
print("Install with: pip install flask flask-cors")
sys.exit(1)
parser = argparse.ArgumentParser(
description='Remote Storage Server for Medios-Macina',
epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey'
)
parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory')
parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)')
parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)')
parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
args = parser.parse_args()
global STORAGE_PATH, API_KEY
STORAGE_PATH = Path(args.storage_path).resolve()
API_KEY = args.api_key
if not STORAGE_PATH.exists():
print(f"ERROR: Storage path does not exist: {STORAGE_PATH}")
sys.exit(1)
# Get local IP address
local_ip = get_local_ip()
if not local_ip:
local_ip = "127.0.0.1"
print(f"\n{'='*70}")
print(f"Remote Storage Server - Medios-Macina")
print(f"{'='*70}")
print(f"Storage Path: {STORAGE_PATH}")
print(f"Local IP: {local_ip}")
print(f"Server URL: http://{local_ip}:{args.port}")
print(f"Health URL: http://{local_ip}:{args.port}/health")
print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}")
print(f"Debug Mode: {args.debug}")
print(f"\n📋 Config for config.json:")
config_entry = {
"name": "phone",
"url": f"http://{local_ip}:{args.port}",
"timeout": 30
}
if args.api_key:
config_entry["api_key"] = args.api_key
print(json.dumps(config_entry, indent=2))
print(f"\n{'='*70}\n")
try:
from helper.local_library import LocalLibraryDB
with LocalLibraryDB(STORAGE_PATH) as db:
logger.info("Database initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize database: {e}")
sys.exit(1)
app = create_app()
app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False)
if __name__ == '__main__':
main()