upk
This commit is contained in:
@@ -68,8 +68,7 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config:
|
||||
|
||||
if db_root:
|
||||
try:
|
||||
db = LocalLibraryDB(Path(db_root))
|
||||
try:
|
||||
with LocalLibraryDB(Path(db_root)) as db:
|
||||
# Get tags and metadata from database
|
||||
tags = db.get_tags(media_path) or []
|
||||
metadata = db.get_metadata(media_path) or {}
|
||||
@@ -79,8 +78,6 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config:
|
||||
if tags or known_urls or file_hash:
|
||||
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
|
||||
return None, file_hash, tags, known_urls
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
|
||||
except Exception:
|
||||
|
||||
@@ -14,22 +14,25 @@ from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from helper import hydrus as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input
|
||||
from helper.local_library import read_sidecar, find_sidecar
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-relationship",
|
||||
summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.",
|
||||
usage="add-relationship OR add-relationship -path <file>",
|
||||
usage="@1-3 | add-relationship -king @4 OR add-relationship -path <file> OR @1,@2,@3 | add-relationship",
|
||||
args=[
|
||||
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||
CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"),
|
||||
CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"),
|
||||
],
|
||||
details=[
|
||||
"- Reads relationship tags from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>,hash(related)<HASH>')",
|
||||
"- Calls Hydrus API to associate the hashes as relationships",
|
||||
"- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)",
|
||||
"- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4",
|
||||
"- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king)<HASH>,hash(alt)<HASH>...')",
|
||||
"- Supports three relationship types: king (primary), alt (alternative), related (other versions)",
|
||||
"- Works with piped file results or -path argument for direct invocation",
|
||||
"- When using -king, all piped items become the specified relationship type to the king",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -67,6 +70,81 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]:
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_king_reference(king_arg: str) -> Optional[str]:
|
||||
"""Resolve a king reference like '@4' to its actual hash or path.
|
||||
|
||||
Supports:
|
||||
- Direct hash: '0123456789abcdef...' (64 chars)
|
||||
- Selection reference: '@4' (resolves from pipeline context)
|
||||
|
||||
Returns:
|
||||
- For Hydrus items: normalized hash
|
||||
- For local storage items: file path
|
||||
- None if not found
|
||||
"""
|
||||
if not king_arg:
|
||||
return None
|
||||
|
||||
# Check if it's already a valid hash
|
||||
normalized = _normalise_hash_hex(king_arg)
|
||||
if normalized:
|
||||
return normalized
|
||||
|
||||
# Try to resolve as @N selection from pipeline context
|
||||
if king_arg.startswith('@'):
|
||||
try:
|
||||
# Get the result items from the pipeline context
|
||||
from pipeline import get_last_result_items
|
||||
items = get_last_result_items()
|
||||
if not items:
|
||||
log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Parse @N to get the index (1-based)
|
||||
index_str = king_arg[1:] # Remove '@'
|
||||
index = int(index_str) - 1 # Convert to 0-based
|
||||
|
||||
if 0 <= index < len(items):
|
||||
item = items[index]
|
||||
|
||||
# Try to extract hash from the item (could be dict or object)
|
||||
item_hash = None
|
||||
if isinstance(item, dict):
|
||||
# Dictionary: try common hash field names
|
||||
item_hash = item.get('hash_hex') or item.get('hash') or item.get('file_hash')
|
||||
else:
|
||||
# Object: use getattr
|
||||
item_hash = getattr(item, 'hash_hex', None) or getattr(item, 'hash', None)
|
||||
|
||||
if item_hash:
|
||||
normalized = _normalise_hash_hex(item_hash)
|
||||
if normalized:
|
||||
return normalized
|
||||
|
||||
# If no hash, try to get file path (for local storage)
|
||||
file_path = None
|
||||
if isinstance(item, dict):
|
||||
# Dictionary: try common path field names
|
||||
file_path = item.get('file_path') or item.get('path') or item.get('target')
|
||||
else:
|
||||
# Object: use getattr
|
||||
file_path = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'target', None)
|
||||
|
||||
if file_path:
|
||||
return str(file_path)
|
||||
|
||||
log(f"Item {king_arg} has no hash or path information", file=sys.stderr)
|
||||
return None
|
||||
else:
|
||||
log(f"Index {king_arg} out of range", file=sys.stderr)
|
||||
return None
|
||||
except (ValueError, IndexError) as e:
|
||||
log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@register(["add-relationship", "add-rel"]) # primary name and alias
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Associate file relationships in Hydrus.
|
||||
@@ -88,6 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Parse arguments using CMDLET spec
|
||||
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||
arg_path: Optional[Path] = None
|
||||
king_arg = parsed.get("king") # New: explicit king argument
|
||||
rel_type = parsed.get("type", "alt") # New: relationship type (default: alt)
|
||||
|
||||
if parsed:
|
||||
# Get the first arg value (e.g., -path)
|
||||
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
|
||||
@@ -98,62 +179,160 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
except Exception:
|
||||
arg_path = Path(str(arg_value))
|
||||
|
||||
# Get Hydrus client
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
|
||||
# Handle @N selection which creates a list
|
||||
# Use normalize_result_input to handle both single items and lists
|
||||
items_to_process = normalize_result_input(result)
|
||||
|
||||
if not items_to_process and not arg_path:
|
||||
log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable", file=sys.stderr)
|
||||
return 1
|
||||
# If no items from pipeline, just process the -path arg
|
||||
if not items_to_process and arg_path:
|
||||
items_to_process = [{"file_path": arg_path}]
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
# Import local storage utilities
|
||||
from helper.local_library import LocalLibrarySearchOptimizer
|
||||
from config import get_local_storage_path
|
||||
|
||||
# Check if we're in pipeline mode (have a hash) or file mode
|
||||
file_hash = getattr(result, "hash_hex", None)
|
||||
local_storage_path = get_local_storage_path(config) if config else None
|
||||
|
||||
# PIPELINE MODE: Track relationships across multiple items
|
||||
if file_hash:
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
# Check if any items have Hydrus hashes (file_hash or hash_hex fields)
|
||||
has_hydrus_hashes = any(
|
||||
(isinstance(item, dict) and (item.get('hash_hex') or item.get('hash')))
|
||||
or (hasattr(item, 'hash_hex') or hasattr(item, 'hash'))
|
||||
for item in items_to_process
|
||||
)
|
||||
|
||||
# Load or initialize king hash from pipeline context
|
||||
# Only try to initialize Hydrus if we actually have Hydrus hashes to work with
|
||||
hydrus_client = None
|
||||
if has_hydrus_hashes:
|
||||
try:
|
||||
king_hash = ctx.load_value("relationship_king")
|
||||
except Exception:
|
||||
king_hash = None
|
||||
hydrus_client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr)
|
||||
|
||||
# If this is the first item, make it the king
|
||||
# Use local storage if it's available and either Hydrus is not available or items are local files
|
||||
use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists()))
|
||||
|
||||
# Resolve the king reference once (if provided)
|
||||
king_hash = None
|
||||
if king_arg:
|
||||
# Resolve the king reference (could be @4 or a direct hash)
|
||||
king_hash = _resolve_king_reference(king_arg)
|
||||
if not king_hash:
|
||||
try:
|
||||
ctx.store_value("relationship_king", file_hash)
|
||||
log(f"Established king hash: {file_hash}", file=sys.stderr)
|
||||
return 0 # First item just becomes the king, no relationships yet
|
||||
except Exception:
|
||||
pass
|
||||
log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr)
|
||||
return 1
|
||||
log(f"Using king hash: {king_hash}", file=sys.stderr)
|
||||
|
||||
# If we already have a king and this is a different hash, link them
|
||||
if king_hash and king_hash != file_hash:
|
||||
try:
|
||||
client.set_relationship(file_hash, king_hash, "alt")
|
||||
log(
|
||||
f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return 0
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
# Process each item in the list
|
||||
for item_idx, item in enumerate(items_to_process):
|
||||
# Extract hash and path from current item
|
||||
file_hash = None
|
||||
file_path_from_result = None
|
||||
|
||||
if isinstance(item, dict):
|
||||
file_hash = item.get("hash_hex") or item.get("hash")
|
||||
file_path_from_result = item.get("file_path") or item.get("path") or item.get("target")
|
||||
else:
|
||||
file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None)
|
||||
file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None)
|
||||
|
||||
# PIPELINE MODE with Hydrus: Track relationships using hash
|
||||
if file_hash and hydrus_client:
|
||||
file_hash = _normalise_hash_hex(file_hash)
|
||||
if not file_hash:
|
||||
log("Invalid file hash format", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
# If explicit -king provided, use it
|
||||
if king_hash:
|
||||
try:
|
||||
hydrus_client.set_relationship(file_hash, king_hash, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}",
|
||||
file=sys.stderr
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
# Original behavior: no explicit king, first becomes king, rest become alts
|
||||
try:
|
||||
existing_king = ctx.load_value("relationship_king")
|
||||
except Exception:
|
||||
existing_king = None
|
||||
|
||||
# FILE MODE: Read relationships from sidecar
|
||||
# If this is the first item, make it the king
|
||||
if not existing_king:
|
||||
try:
|
||||
ctx.store_value("relationship_king", file_hash)
|
||||
log(f"Established king hash: {file_hash}", file=sys.stderr)
|
||||
continue # Move to next item
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If we already have a king and this is a different hash, link them
|
||||
if existing_king and existing_king != file_hash:
|
||||
try:
|
||||
hydrus_client.set_relationship(file_hash, existing_king, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}",
|
||||
file=sys.stderr
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# LOCAL STORAGE MODE: Handle relationships for local files
|
||||
elif use_local_storage and file_path_from_result:
|
||||
try:
|
||||
file_path_obj = Path(str(file_path_from_result))
|
||||
|
||||
if not file_path_obj.exists():
|
||||
log(f"File not found: {file_path_obj}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if king_hash:
|
||||
# king_hash is a file path from _resolve_king_reference (or a Hydrus hash)
|
||||
king_file_path = Path(str(king_hash)) if king_hash else None
|
||||
if king_file_path and king_file_path.exists():
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
db.set_relationship(file_path_obj, king_file_path, rel_type)
|
||||
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr)
|
||||
else:
|
||||
log(f"King file not found or invalid: {king_hash}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
# Original behavior: first becomes king, rest become alts
|
||||
try:
|
||||
king_path = ctx.load_value("relationship_king_path")
|
||||
except Exception:
|
||||
king_path = None
|
||||
|
||||
if not king_path:
|
||||
try:
|
||||
ctx.store_value("relationship_king_path", str(file_path_obj))
|
||||
log(f"Established king file: {file_path_obj.name}", file=sys.stderr)
|
||||
continue # Move to next item
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if king_path and king_path != str(file_path_obj):
|
||||
try:
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
db.set_relationship(file_path_obj, Path(king_path), rel_type)
|
||||
log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"Failed to set relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
except Exception as exc:
|
||||
log(f"Local storage error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
# FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only)
|
||||
log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr)
|
||||
|
||||
# Resolve media path from -path arg or result target
|
||||
@@ -235,7 +414,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
continue
|
||||
|
||||
try:
|
||||
client.set_relationship(file_hash, related_hash, rel_type)
|
||||
hydrus_client.set_relationship(file_hash, related_hash, rel_type)
|
||||
log(
|
||||
f"[add-relationship] Set {rel_type} relationship: "
|
||||
f"{file_hash} <-> {related_hash}",
|
||||
|
||||
168
cmdlets/delete_relationship.py
Normal file
168
cmdlets/delete_relationship.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Delete file relationships."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input
|
||||
from helper.local_library import LocalLibrarySearchOptimizer
|
||||
from config import get_local_storage_path
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Delete relationships from files.
|
||||
|
||||
Args:
|
||||
result: Input result(s) from previous cmdlet
|
||||
args: Command arguments
|
||||
config: CLI configuration
|
||||
|
||||
Returns:
|
||||
Exit code (0 = success)
|
||||
"""
|
||||
try:
|
||||
# Parse arguments
|
||||
parsed_args = parse_cmdlet_args(args, CMDLET)
|
||||
delete_all_flag = parsed_args.get("all", False)
|
||||
rel_type_filter = parsed_args.get("type")
|
||||
|
||||
# Get storage path
|
||||
local_storage_path = get_local_storage_path(config)
|
||||
if not local_storage_path:
|
||||
log("Local storage path not configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Normalize input
|
||||
results = normalize_result_input(result)
|
||||
|
||||
if not results:
|
||||
log("No results to process", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
for single_result in results:
|
||||
try:
|
||||
# Get file path from result
|
||||
file_path_from_result = None
|
||||
|
||||
if isinstance(single_result, dict):
|
||||
file_path_from_result = (
|
||||
single_result.get("file_path") or
|
||||
single_result.get("path") or
|
||||
single_result.get("target")
|
||||
)
|
||||
else:
|
||||
file_path_from_result = (
|
||||
getattr(single_result, "file_path", None) or
|
||||
getattr(single_result, "path", None) or
|
||||
getattr(single_result, "target", None) or
|
||||
str(single_result)
|
||||
)
|
||||
|
||||
if not file_path_from_result:
|
||||
log("Could not extract file path from result", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
file_path_obj = Path(str(file_path_from_result))
|
||||
|
||||
if not file_path_obj.exists():
|
||||
log(f"File not found: {file_path_obj}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
with LocalLibrarySearchOptimizer(local_storage_path) as db:
|
||||
file_id = db.db.get_file_id(file_path_obj)
|
||||
|
||||
if not file_id:
|
||||
log(f"File not in database: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Get current relationships
|
||||
cursor = db.db.connection.cursor()
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
relationships_str = row[0]
|
||||
if not relationships_str:
|
||||
log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
try:
|
||||
relationships = json.loads(relationships_str)
|
||||
except json.JSONDecodeError:
|
||||
log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
if not isinstance(relationships, dict):
|
||||
relationships = {}
|
||||
|
||||
# Determine what to delete
|
||||
if delete_all_flag:
|
||||
# Delete all relationships
|
||||
deleted_types = list(relationships.keys())
|
||||
relationships = {}
|
||||
log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr)
|
||||
elif rel_type_filter:
|
||||
# Delete specific type
|
||||
if rel_type_filter in relationships:
|
||||
deleted_count_for_type = len(relationships[rel_type_filter])
|
||||
del relationships[rel_type_filter]
|
||||
log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr)
|
||||
else:
|
||||
log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr)
|
||||
continue
|
||||
else:
|
||||
log("Specify --all to delete all relationships or -type <type> to delete specific type", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Save updated relationships
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (file_id, json.dumps(relationships) if relationships else None))
|
||||
|
||||
db.db.connection.commit()
|
||||
deleted_count += 1
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error deleting relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Error in delete-relationship: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-relationship",
|
||||
summary="Remove relationships from files.",
|
||||
usage="@1 | delete-relationship --all OR delete-relationship -path <file> --all OR @1-3 | delete-relationship -type alt",
|
||||
args=[
|
||||
CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."),
|
||||
CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."),
|
||||
CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."),
|
||||
],
|
||||
details=[
|
||||
"- Delete all relationships: pipe files | delete-relationship --all",
|
||||
"- Delete specific type: pipe files | delete-relationship -type alt",
|
||||
"- Delete all from file: delete-relationship -path <file> --all",
|
||||
],
|
||||
)
|
||||
@@ -243,8 +243,8 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No
|
||||
# Fallback: assume file is in a library root or use its parent
|
||||
local_root = path_obj.parent
|
||||
|
||||
db = LocalLibraryDB(local_root)
|
||||
db.remove_tags(path_obj, tags)
|
||||
with LocalLibraryDB(local_root) as db:
|
||||
db.remove_tags(path_obj, tags)
|
||||
debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)")
|
||||
return True
|
||||
|
||||
|
||||
@@ -1224,12 +1224,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from config import get_local_storage_path
|
||||
|
||||
# Define LazyDB proxy to avoid keeping DB connection open for long duration
|
||||
class LazyDB:
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
|
||||
def _op(self, func_name, *args, **kwargs):
|
||||
try:
|
||||
with LocalLibraryDB(self.root) as db:
|
||||
func = getattr(db, func_name)
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
# Log error but don't crash
|
||||
pass
|
||||
|
||||
def insert_worker(self, *args, **kwargs): self._op('insert_worker', *args, **kwargs)
|
||||
def update_worker_status(self, *args, **kwargs): self._op('update_worker_status', *args, **kwargs)
|
||||
def append_worker_stdout(self, *args, **kwargs): self._op('append_worker_stdout', *args, **kwargs)
|
||||
def close(self): pass
|
||||
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
db = None
|
||||
if library_root:
|
||||
try:
|
||||
db = LocalLibraryDB(library_root)
|
||||
db = LazyDB(library_root)
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"download",
|
||||
@@ -1812,12 +1831,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
current_format_selector = format_id
|
||||
|
||||
# If it's a video-only format (has vcodec but no acodec), add bestaudio
|
||||
# BUT: Skip this for -section downloads because combining formats causes re-encoding
|
||||
# For -section, use formats that already have audio (muxed) to avoid FFmpeg re-encoding
|
||||
vcodec = url.get('vcodec', '')
|
||||
acodec = url.get('acodec', '')
|
||||
if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
|
||||
# Video-only format, add bestaudio automatically
|
||||
current_format_selector = f"{format_id}+bestaudio"
|
||||
debug(f" ℹ️ Video-only format detected, automatically adding bestaudio")
|
||||
if not clip_range and not section_ranges:
|
||||
# Only add bestaudio if NOT doing -section or -clip
|
||||
# For section downloads, we need muxed formats to avoid re-encoding
|
||||
current_format_selector = f"{format_id}+bestaudio"
|
||||
debug(f" ℹ️ Video-only format detected, automatically adding bestaudio")
|
||||
else:
|
||||
debug(f" ℹ️ Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)")
|
||||
|
||||
actual_url = url.get('source_url')
|
||||
url = actual_url # Use the actual URL for further processing
|
||||
@@ -2488,11 +2513,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
|
||||
current_format_selector = fmt.get("format_id")
|
||||
|
||||
# If video-only format is selected, append +bestaudio to merge with best audio
|
||||
# BUT: Skip this for -section downloads because combining formats causes re-encoding
|
||||
vcodec = fmt.get("vcodec")
|
||||
acodec = fmt.get("acodec")
|
||||
if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
|
||||
current_format_selector = f"{current_format_selector}+bestaudio"
|
||||
debug(f"Video-only format selected, appending bestaudio: {current_format_selector}")
|
||||
if not clip_range and not section_ranges:
|
||||
# Only add bestaudio if NOT doing -section or -clip
|
||||
current_format_selector = f"{current_format_selector}+bestaudio"
|
||||
debug(f"Video-only format selected, appending bestaudio: {current_format_selector}")
|
||||
else:
|
||||
debug(f"Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)")
|
||||
|
||||
debug(f"Selected format #{idx}: {current_format_selector}")
|
||||
playlist_items = None # Clear so it doesn't affect download options
|
||||
|
||||
@@ -75,23 +75,32 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if not source_title or source_title == "Unknown":
|
||||
source_title = path_obj.name
|
||||
|
||||
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
|
||||
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
|
||||
|
||||
if path_obj.exists():
|
||||
storage_path = get_local_storage_path(config)
|
||||
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
|
||||
if storage_path:
|
||||
with LocalLibraryDB(storage_path) as db:
|
||||
metadata = db.get_metadata(path_obj)
|
||||
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
|
||||
if metadata and metadata.get("relationships"):
|
||||
local_db_checked = True
|
||||
rels = metadata["relationships"]
|
||||
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
|
||||
if isinstance(rels, dict):
|
||||
for rel_type, hashes in rels.items():
|
||||
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
|
||||
if hashes:
|
||||
for h in hashes:
|
||||
# Try to resolve hash to filename if possible
|
||||
# h is now a file hash (not a path)
|
||||
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
|
||||
# Resolve hash to file path
|
||||
resolved_path = db.search_by_hash(h)
|
||||
title = h
|
||||
title = h[:16] + "..."
|
||||
path = None
|
||||
if resolved_path:
|
||||
if resolved_path and resolved_path.exists():
|
||||
path = str(resolved_path)
|
||||
# Try to get title from tags
|
||||
try:
|
||||
@@ -114,6 +123,177 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"path": path,
|
||||
"origin": "local"
|
||||
})
|
||||
|
||||
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
|
||||
# then we should look up the king's other alts to show siblings.
|
||||
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
|
||||
# the king's direct relationships with its alts.
|
||||
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
|
||||
if rel_type.lower() == "alt" and path:
|
||||
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
|
||||
try:
|
||||
parent_path_obj = Path(path)
|
||||
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
|
||||
|
||||
# Also add the king/parent itself if not already in results
|
||||
if not any(str(r['hash']).lower() == str(path).lower() for r in found_relationships):
|
||||
parent_title = parent_path_obj.stem
|
||||
try:
|
||||
parent_tags = db.get_tags(parent_path_obj)
|
||||
for t in parent_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
parent_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr)
|
||||
found_relationships.append({
|
||||
"hash": str(path),
|
||||
"type": "king" if rel_type.lower() == "alt" else rel_type,
|
||||
"title": parent_title,
|
||||
"path": str(path),
|
||||
"origin": "local"
|
||||
})
|
||||
else:
|
||||
# If already in results, ensure it's marked as king if appropriate
|
||||
for r in found_relationships:
|
||||
if str(r['hash']).lower() == str(path).lower():
|
||||
if rel_type.lower() == "alt":
|
||||
r['type'] = "king"
|
||||
break
|
||||
|
||||
# 1. Check forward relationships from parent (siblings)
|
||||
parent_metadata = db.get_metadata(parent_path_obj)
|
||||
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
|
||||
if parent_metadata:
|
||||
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
|
||||
if parent_metadata and parent_metadata.get("relationships"):
|
||||
parent_rels = parent_metadata["relationships"]
|
||||
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
|
||||
if isinstance(parent_rels, dict):
|
||||
for child_type, child_hashes in parent_rels.items():
|
||||
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
|
||||
if child_hashes:
|
||||
for child_h in child_hashes:
|
||||
# child_h is now a HASH, not a path - resolve it
|
||||
child_path_obj = db.search_by_hash(child_h)
|
||||
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
|
||||
|
||||
if not child_path_obj:
|
||||
# Hash doesn't resolve - skip it
|
||||
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Skip the current file we're querying
|
||||
if str(child_path_obj).lower() == str(path_obj).lower():
|
||||
print(f"[DEBUG] ⏭️ Skipping current file: {child_path_obj}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Check if already added (case-insensitive hash check)
|
||||
if any(str(r['hash']).lower() == str(child_h).lower() for r in found_relationships):
|
||||
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Now child_path_obj is a Path, so we can get tags
|
||||
child_title = child_path_obj.stem
|
||||
try:
|
||||
child_tags = db.get_tags(child_path_obj)
|
||||
for t in child_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
child_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding sibling: {child_title}", file=sys.stderr)
|
||||
found_relationships.append({
|
||||
"hash": child_h,
|
||||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||||
"title": child_title,
|
||||
"path": str(child_path_obj),
|
||||
"origin": "local"
|
||||
})
|
||||
else:
|
||||
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
|
||||
|
||||
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
|
||||
# This handles the case where siblings point to parent but parent doesn't point to siblings
|
||||
reverse_children = db.find_files_pointing_to(parent_path_obj)
|
||||
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
|
||||
for child in reverse_children:
|
||||
child_path = child['path']
|
||||
child_type = child['type']
|
||||
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
|
||||
|
||||
# Skip the current file
|
||||
if str(child_path).lower() == str(path_obj).lower():
|
||||
print(f"[DEBUG] ⏭️ Skipping self", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Skip if already added (check by path, case-insensitive)
|
||||
if any(str(r.get('path', '')).lower() == str(child_path).lower() for r in found_relationships):
|
||||
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
child_path_obj = Path(child_path)
|
||||
child_title = child_path_obj.stem
|
||||
try:
|
||||
child_tags = db.get_tags(child_path_obj)
|
||||
for t in child_tags:
|
||||
if t.lower().startswith('title:'):
|
||||
child_title = t[6:].strip()
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"[DEBUG] ➕ Adding reverse sibling: {child_title}", file=sys.stderr)
|
||||
found_relationships.append({
|
||||
"hash": child_path,
|
||||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||||
"title": child_title,
|
||||
"path": child_path,
|
||||
"origin": "local"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
log(f"Recursive lookup error: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
|
||||
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
|
||||
# We keep this disabled to avoid adding the same relationships twice.
|
||||
# If needed in future, can be re-enabled with better deduplication.
|
||||
# for rev in reverse_rels:
|
||||
# rev_path = rev['path']
|
||||
# rev_type = rev['type']
|
||||
#
|
||||
# if any(r['hash'] == rev_path for r in found_relationships): continue
|
||||
#
|
||||
# rev_path_obj = Path(rev_path)
|
||||
# rev_title = rev_path_obj.stem
|
||||
# try:
|
||||
# rev_tags = db.get_tags(rev_path_obj)
|
||||
# for t in rev_tags:
|
||||
# if t.lower().startswith('title:'):
|
||||
# rev_title = t[6:].strip(); break
|
||||
# except Exception: pass
|
||||
#
|
||||
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
|
||||
# # But we'll just list them with the relationship type they used
|
||||
# found_relationships.append({
|
||||
# "hash": rev_path,
|
||||
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
|
||||
# "title": rev_title,
|
||||
# "path": rev_path,
|
||||
# "origin": "local"
|
||||
# })
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error checking local relationships: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
@@ -223,6 +223,42 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
pause_mode = parsed.get("pause")
|
||||
save_mode = parsed.get("save")
|
||||
load_mode = parsed.get("load")
|
||||
current_mode = parsed.get("current")
|
||||
|
||||
# Handle --current flag: emit currently playing item to pipeline
|
||||
if current_mode:
|
||||
items = _get_playlist()
|
||||
if items is None:
|
||||
debug("MPV is not running or not accessible.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Find the currently playing item
|
||||
current_item = None
|
||||
for item in items:
|
||||
if item.get("current", False):
|
||||
current_item = item
|
||||
break
|
||||
|
||||
if current_item is None:
|
||||
debug("No item is currently playing.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Build result object with file info
|
||||
title = _extract_title_from_item(current_item)
|
||||
filename = current_item.get("filename", "")
|
||||
|
||||
# Emit the current item to pipeline
|
||||
result_obj = {
|
||||
'file_path': filename,
|
||||
'title': title,
|
||||
'cmdlet_name': '.pipe',
|
||||
'source': 'pipe',
|
||||
'__pipe_index': items.index(current_item),
|
||||
}
|
||||
|
||||
ctx.emit(result_obj)
|
||||
debug(f"Emitted current item: {title}")
|
||||
return 0
|
||||
|
||||
# Handle URL queuing
|
||||
mpv_started = False
|
||||
@@ -599,7 +635,7 @@ CMDLET = Cmdlet(
|
||||
name=".pipe",
|
||||
aliases=["pipe", "playlist", "queue", "ls-pipe"],
|
||||
summary="Manage and play items in the MPV playlist via IPC",
|
||||
usage=".pipe [index|url] [-clear] [-url URL]",
|
||||
usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL]",
|
||||
args=[
|
||||
CmdletArg(
|
||||
name="index",
|
||||
@@ -643,6 +679,11 @@ CMDLET = Cmdlet(
|
||||
type="flag",
|
||||
description="List saved playlists"
|
||||
),
|
||||
CmdletArg(
|
||||
name="current",
|
||||
type="flag",
|
||||
description="Emit the currently playing item to pipeline for further processing"
|
||||
),
|
||||
],
|
||||
exec=_run
|
||||
)
|
||||
|
||||
@@ -88,9 +88,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
db: LocalLibraryDB | None = None
|
||||
try:
|
||||
db = LocalLibraryDB(library_root)
|
||||
with LocalLibraryDB(library_root) as db:
|
||||
if clear_requested:
|
||||
count = db.clear_finished_workers()
|
||||
log(f"Cleared {count} finished workers.")
|
||||
@@ -115,9 +113,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if selection_requested:
|
||||
return _render_worker_selection(db, result)
|
||||
return _render_worker_list(db, status_filter, limit)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
except Exception as exc:
|
||||
log(f"Workers query failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
|
||||
@@ -8,7 +8,12 @@ Lean, focused downloader without event infrastructure overhead.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re # noqa: F401
|
||||
import glob # noqa: F401
|
||||
import hashlib
|
||||
import json # noqa: F401
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -157,31 +162,72 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> None:
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
We need separate files for merge-file, so download each section individually.
|
||||
|
||||
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
|
||||
thinking sections are already downloaded. The title is extracted and stored in tags.
|
||||
|
||||
Returns:
|
||||
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
|
||||
"""
|
||||
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
return
|
||||
return "", {}
|
||||
|
||||
# Download each section separately with unique output template
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
first_section_info = None
|
||||
title_from_first = None
|
||||
|
||||
# Download each section separately with unique output template using session ID
|
||||
for section_idx, section in enumerate(sections_list, 1):
|
||||
# Build unique output template for this section
|
||||
# e.g., "title.section_1_of_3.ext" for the first section
|
||||
# Build unique output template for this section using session-based filename
|
||||
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
|
||||
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
||||
output_dir_path = Path(base_outtmpl).parent
|
||||
|
||||
# Insert section number before extension
|
||||
# e.g., "/path/title.hash.webm" → "/path/title.hash.section_1_of_3.webm"
|
||||
# Use session_id + section index for temp filename
|
||||
# e.g., "/path/{session_id}_1.%(ext)s"
|
||||
filename_tmpl = f"{session_id}_{section_idx}"
|
||||
if base_outtmpl.endswith(".%(ext)s"):
|
||||
section_outtmpl = base_outtmpl.replace(".%(ext)s", f".section_{section_idx}_of_{len(sections_list)}.%(ext)s")
|
||||
else:
|
||||
section_outtmpl = base_outtmpl + f".section_{section_idx}_of_{len(sections_list)}"
|
||||
filename_tmpl += ".%(ext)s"
|
||||
|
||||
# Build yt-dlp command for this section
|
||||
# Use Path to handle separators correctly for the OS
|
||||
section_outtmpl = str(output_dir_path / filename_tmpl)
|
||||
|
||||
# For the first section, extract metadata first (separate call)
|
||||
if section_idx == 1:
|
||||
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
||||
if ytdl_options.get("cookiefile"):
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
metadata_cmd.extend(["--cookies", cookies_path])
|
||||
if ytdl_options.get("noplaylist"):
|
||||
metadata_cmd.append("--no-playlist")
|
||||
metadata_cmd.append(url)
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
debug("Could not parse JSON metadata")
|
||||
except Exception as e:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
|
||||
# Add format
|
||||
@@ -212,15 +258,19 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
cmd.append(url)
|
||||
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=False, text=True)
|
||||
result = subprocess.run(cmd)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
|
||||
except Exception as exc:
|
||||
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
|
||||
|
||||
return session_id, first_section_info or {}
|
||||
|
||||
|
||||
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
"""Build yt-dlp download options."""
|
||||
@@ -296,8 +346,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
|
||||
base_options["download_sections"] = sections
|
||||
debug(f"Download sections configured: {', '.join(sections)}")
|
||||
# Force keyframes at cuts for accurate section boundaries
|
||||
base_options["force_keyframes_at_cuts"] = True
|
||||
# Note: Not using --force-keyframes-at-cuts to avoid re-encoding
|
||||
# This may result in less precise cuts but faster downloads
|
||||
|
||||
# Add playlist items selection if provided
|
||||
if opts.playlist_items:
|
||||
@@ -751,8 +801,10 @@ def download_media(
|
||||
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
|
||||
|
||||
# Use subprocess when download_sections are present (Python API doesn't support them properly)
|
||||
session_id = None
|
||||
first_section_info = {}
|
||||
if ytdl_options.get("download_sections"):
|
||||
_download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
|
||||
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
|
||||
info = None
|
||||
else:
|
||||
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
||||
@@ -780,7 +832,7 @@ def download_media(
|
||||
import re
|
||||
|
||||
# Get the expected filename pattern from outtmpl
|
||||
# For sections: "C:\path\title.section_1_of_3.ext", "C:\path\title.section_2_of_3.ext", etc.
|
||||
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
|
||||
# For non-sections: "C:\path\title.ext"
|
||||
|
||||
# Wait a moment to ensure files are fully written
|
||||
@@ -791,10 +843,10 @@ def download_media(
|
||||
if not files:
|
||||
raise FileNotFoundError(f"No files found in {opts.output_dir}")
|
||||
|
||||
# If we downloaded sections, look for files with .section_N_of_M pattern
|
||||
if opts.clip_sections:
|
||||
# Pattern: "title.section_1_of_3.ext", "title.section_2_of_3.ext", etc.
|
||||
section_pattern = re.compile(r'\.section_(\d+)_of_(\d+)\.')
|
||||
# If we downloaded sections, look for files with the session_id pattern
|
||||
if opts.clip_sections and session_id:
|
||||
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
|
||||
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
|
||||
matching_files = [f for f in files if section_pattern.search(f.name)]
|
||||
|
||||
if matching_files:
|
||||
@@ -804,13 +856,44 @@ def download_media(
|
||||
return int(match.group(1)) if match else 999
|
||||
|
||||
matching_files.sort(key=extract_section_num)
|
||||
media_path = matching_files[0] # First section
|
||||
media_paths = matching_files # All sections
|
||||
debug(f"✓ Downloaded {len(media_paths)} section file(s)")
|
||||
debug(f"Found {len(matching_files)} section file(s) matching pattern")
|
||||
|
||||
# Now rename section files to use hash-based names
|
||||
# This ensures unique filenames for each section content
|
||||
renamed_files = []
|
||||
|
||||
for idx, section_file in enumerate(matching_files, 1):
|
||||
try:
|
||||
# Calculate hash for the file
|
||||
file_hash = sha256_file(section_file)
|
||||
ext = section_file.suffix
|
||||
new_name = f"{file_hash}{ext}"
|
||||
new_path = opts.output_dir / new_name
|
||||
|
||||
if new_path.exists() and new_path != section_file:
|
||||
# If file with same hash exists, use it and delete the temp one
|
||||
debug(f"File with hash {file_hash} already exists, using existing file.")
|
||||
try:
|
||||
section_file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
renamed_files.append(new_path)
|
||||
else:
|
||||
section_file.rename(new_path)
|
||||
debug(f"Renamed section file: {section_file.name} → {new_name}")
|
||||
renamed_files.append(new_path)
|
||||
except Exception as e:
|
||||
debug(f"Failed to process section file {section_file.name}: {e}")
|
||||
renamed_files.append(section_file)
|
||||
|
||||
media_path = renamed_files[0]
|
||||
media_paths = renamed_files
|
||||
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
|
||||
else:
|
||||
# Fallback to most recent file if pattern not found
|
||||
media_path = files[0]
|
||||
media_paths = None
|
||||
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
|
||||
else:
|
||||
# No sections, just take the most recent file
|
||||
media_path = files[0]
|
||||
@@ -830,10 +913,30 @@ def download_media(
|
||||
|
||||
# Create result with minimal data extracted from filename
|
||||
file_hash = sha256_file(media_path)
|
||||
|
||||
# For section downloads, create tags with the title and build proper info dict
|
||||
tags = []
|
||||
title = ''
|
||||
if first_section_info:
|
||||
title = first_section_info.get('title', '')
|
||||
if title:
|
||||
tags.append(f'title:{title}')
|
||||
debug(f"Added title tag for section download: {title}")
|
||||
|
||||
# Build info dict - always use extracted title if available, not hash
|
||||
if first_section_info:
|
||||
info_dict = first_section_info
|
||||
else:
|
||||
info_dict = {
|
||||
"id": media_path.stem,
|
||||
"title": title or media_path.stem,
|
||||
"ext": media_path.suffix.lstrip(".")
|
||||
}
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info={"id": media_path.stem, "title": media_path.stem, "ext": media_path.suffix.lstrip(".")},
|
||||
tags=[],
|
||||
info=info_dict,
|
||||
tags=tags,
|
||||
source_url=opts.url,
|
||||
hash_value=file_hash,
|
||||
paths=media_paths, # Include all section files if present
|
||||
|
||||
@@ -137,14 +137,14 @@ class LocalStorageBackend(StorageBackend):
|
||||
|
||||
# Check for duplicate files using LocalLibraryDB (fast - uses index)
|
||||
try:
|
||||
db = LocalLibraryDB(dest_dir)
|
||||
existing_path = db.search_by_hash(file_hash)
|
||||
if existing_path and existing_path.exists():
|
||||
log(
|
||||
f"✓ File already in local storage: {existing_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return str(existing_path)
|
||||
with LocalLibraryDB(dest_dir) as db:
|
||||
existing_path = db.search_by_hash(file_hash)
|
||||
if existing_path and existing_path.exists():
|
||||
log(
|
||||
f"✓ File already in local storage: {existing_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return str(existing_path)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
|
||||
|
||||
@@ -205,247 +205,156 @@ class LocalStorageBackend(StorageBackend):
|
||||
|
||||
# Try database search first (much faster than filesystem scan)
|
||||
try:
|
||||
db = LocalLibraryDB(search_dir)
|
||||
cursor = db.connection.cursor()
|
||||
with LocalLibraryDB(search_dir) as db:
|
||||
cursor = db.connection.cursor()
|
||||
|
||||
# Check if query is a tag namespace search (format: "namespace:pattern")
|
||||
if ":" in query and not query.startswith(":"):
|
||||
namespace, pattern = query.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||||
# Check if query is a tag namespace search (format: "namespace:pattern")
|
||||
if ":" in query and not query.startswith(":"):
|
||||
namespace, pattern = query.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||||
|
||||
# Search for tags matching the namespace and pattern
|
||||
query_pattern = f"{namespace}:%"
|
||||
# Search for tags matching the namespace and pattern
|
||||
query_pattern = f"{namespace}:%"
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ?
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} potential matches in DB")
|
||||
|
||||
# Filter results by pattern match
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
|
||||
# Get the file's tags and check if any match the pattern
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT tag FROM tags
|
||||
WHERE file_id = ?
|
||||
AND LOWER(tag) LIKE ?
|
||||
""", (file_id, query_pattern))
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ?
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} potential matches in DB")
|
||||
|
||||
# Check if any tag matches the pattern (case-insensitive wildcard)
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
# Extract the value part after "namespace:"
|
||||
if tag_lower.startswith(f"{namespace}:"):
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
# Use fnmatch for wildcard matching
|
||||
if fnmatch(value, pattern):
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch all tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
all_tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": all_tags,
|
||||
})
|
||||
else:
|
||||
debug(f"File missing on disk: {file_path}")
|
||||
break # Don't add same file multiple times
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
elif not match_all:
|
||||
# Search by filename or simple tags (namespace-agnostic for plain text)
|
||||
# For plain text search, match:
|
||||
# 1. Filenames containing the query
|
||||
# 2. Simple tags (without namespace) containing the query
|
||||
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
|
||||
# Use explicit namespace search for that (e.g., "channel:joe*")
|
||||
|
||||
# Split query into terms for AND logic
|
||||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||||
if not terms:
|
||||
terms = [query_lower]
|
||||
|
||||
debug(f"Performing filename/tag search for terms: {terms}")
|
||||
|
||||
# Fetch more results than requested to allow for filtering
|
||||
fetch_limit = (limit or 45) * 50
|
||||
|
||||
# 1. Filename search (AND logic)
|
||||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||||
params = [f"%{t}%" for t in terms]
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
cursor.execute(f"""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
WHERE {where_clause}
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (*params, fetch_limit))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||||
|
||||
# Compile regex for whole word matching (only if single term, otherwise skip)
|
||||
word_regex = None
|
||||
if len(terms) == 1:
|
||||
term = terms[0]
|
||||
# Check if term contains wildcard characters
|
||||
has_wildcard = '*' in term or '?' in term
|
||||
|
||||
if has_wildcard:
|
||||
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
|
||||
try:
|
||||
from fnmatch import translate
|
||||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
else:
|
||||
# Use custom boundary that treats underscores as separators
|
||||
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
|
||||
try:
|
||||
# Match if not preceded or followed by alphanumeric chars
|
||||
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
||||
word_regex = re.compile(pattern, re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
|
||||
seen_files = set()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
|
||||
# Apply whole word filter on filename if single term
|
||||
if word_regex:
|
||||
p = Path(file_path_str)
|
||||
if not word_regex.search(p.name):
|
||||
# Filter results by pattern match
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
# Get the file's tags and check if any match the pattern
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
SELECT DISTINCT tag FROM tags
|
||||
WHERE file_id = ?
|
||||
AND LOWER(tag) LIKE ?
|
||||
""", (file_id, query_pattern))
|
||||
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
# Check if any tag matches the pattern (case-insensitive wildcard)
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
# Extract the value part after "namespace:"
|
||||
if tag_lower.startswith(f"{namespace}:"):
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
# Use fnmatch for wildcard matching
|
||||
if fnmatch(value, pattern):
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
# Fetch all tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
all_tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Also search for simple tags (without namespace) containing the query
|
||||
# Only perform tag search if single term, or if we want to support multi-term tag search
|
||||
# For now, fallback to single pattern search for tags if multiple terms
|
||||
# (searching for a tag that contains "term1 term2" or "term1,term2")
|
||||
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
|
||||
query_pattern = f"%{query_lower}%"
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tag_rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in tag_rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": all_tags,
|
||||
})
|
||||
else:
|
||||
debug(f"File missing on disk: {file_path}")
|
||||
break # Don't add same file multiple times
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
else:
|
||||
# Match all - get all files from database
|
||||
cursor.execute("""
|
||||
SELECT id, file_path, file_size
|
||||
FROM files
|
||||
ORDER BY file_path
|
||||
LIMIT ?
|
||||
""", (limit or 1000,))
|
||||
elif not match_all:
|
||||
# Search by filename or simple tags (namespace-agnostic for plain text)
|
||||
# For plain text search, match:
|
||||
# 1. Filenames containing the query
|
||||
# 2. Simple tags (without namespace) containing the query
|
||||
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
|
||||
# Use explicit namespace search for that (e.g., "channel:joe*")
|
||||
|
||||
# Split query into terms for AND logic
|
||||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||||
if not terms:
|
||||
terms = [query_lower]
|
||||
|
||||
debug(f"Performing filename/tag search for terms: {terms}")
|
||||
|
||||
# Fetch more results than requested to allow for filtering
|
||||
fetch_limit = (limit or 45) * 50
|
||||
|
||||
# 1. Filename search (AND logic)
|
||||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||||
params = [f"%{t}%" for t in terms]
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
cursor.execute(f"""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
WHERE {where_clause}
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (*params, fetch_limit))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||||
|
||||
# Compile regex for whole word matching (only if single term, otherwise skip)
|
||||
word_regex = None
|
||||
if len(terms) == 1:
|
||||
term = terms[0]
|
||||
# Check if term contains wildcard characters
|
||||
has_wildcard = '*' in term or '?' in term
|
||||
|
||||
if has_wildcard:
|
||||
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
|
||||
try:
|
||||
from fnmatch import translate
|
||||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
else:
|
||||
# Use custom boundary that treats underscores as separators
|
||||
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
|
||||
try:
|
||||
# Match if not preceded or followed by alphanumeric chars
|
||||
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
||||
word_regex = re.compile(pattern, re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
|
||||
seen_files = set()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
|
||||
# Apply whole word filter on filename if single term
|
||||
if word_regex:
|
||||
p = Path(file_path_str)
|
||||
if not word_regex.search(p.name):
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if file_path_str:
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
@@ -473,11 +382,102 @@ class LocalStorageBackend(StorageBackend):
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if results:
|
||||
debug(f"Returning {len(results)} results from DB")
|
||||
else:
|
||||
debug("No results found in DB")
|
||||
return results
|
||||
# Also search for simple tags (without namespace) containing the query
|
||||
# Only perform tag search if single term, or if we want to support multi-term tag search
|
||||
# For now, fallback to single pattern search for tags if multiple terms
|
||||
# (searching for a tag that contains "term1 term2" or "term1,term2")
|
||||
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
|
||||
query_pattern = f"%{query_lower}%"
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tag_rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in tag_rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
else:
|
||||
# Match all - get all files from database
|
||||
cursor.execute("""
|
||||
SELECT id, file_path, file_size
|
||||
FROM files
|
||||
ORDER BY file_path
|
||||
LIMIT ?
|
||||
""", (limit or 1000,))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if file_path_str:
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if results:
|
||||
debug(f"Returning {len(results)} results from DB")
|
||||
else:
|
||||
debug("No results found in DB")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
|
||||
@@ -1175,6 +1175,161 @@ class MatrixStorageBackend(StorageBackend):
|
||||
raise
|
||||
|
||||
|
||||
class RemoteStorageBackend(StorageBackend):
|
||||
"""File storage backend for remote Android/network storage servers.
|
||||
|
||||
Connects to a remote storage server (e.g., running on Android phone)
|
||||
via REST API. All operations are proxied to the remote server.
|
||||
"""
|
||||
|
||||
def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
|
||||
"""Initialize remote storage backend.
|
||||
|
||||
Args:
|
||||
server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
|
||||
timeout: Request timeout in seconds
|
||||
api_key: Optional API key for authentication
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")
|
||||
|
||||
self.server_url = server_url.rstrip('/')
|
||||
self.timeout = timeout
|
||||
self.api_key = api_key
|
||||
self._session = requests.Session()
|
||||
|
||||
# Add API key to default headers if provided
|
||||
if self.api_key:
|
||||
self._session.headers.update({'X-API-Key': self.api_key})
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "remote"
|
||||
|
||||
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Make HTTP request to remote server."""
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
|
||||
url = urljoin(self.server_url, endpoint)
|
||||
|
||||
try:
|
||||
response = self._session.request(
|
||||
method,
|
||||
url,
|
||||
timeout=self.timeout,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
raise Exception(f"Remote resource not found: {endpoint}")
|
||||
|
||||
if response.status_code >= 400:
|
||||
try:
|
||||
error_data = response.json()
|
||||
error_msg = error_data.get('error', response.text)
|
||||
except:
|
||||
error_msg = response.text
|
||||
raise Exception(f"Remote server error {response.status_code}: {error_msg}")
|
||||
|
||||
return response.json()
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"Connection to {self.server_url} failed: {e}")
|
||||
|
||||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||||
"""Upload file to remote storage.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to upload
|
||||
tags: Optional list of tags to add
|
||||
urls: Optional list of known URLs
|
||||
|
||||
Returns:
|
||||
Remote file hash
|
||||
"""
|
||||
from helper.utils import sha256_file
|
||||
|
||||
if not file_path.exists():
|
||||
raise ValueError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
# Index the file on remote server
|
||||
data = {"path": str(file_path)}
|
||||
|
||||
tags = kwargs.get("tags", [])
|
||||
if tags:
|
||||
data["tags"] = tags
|
||||
|
||||
urls = kwargs.get("urls", [])
|
||||
if urls:
|
||||
data["urls"] = urls
|
||||
|
||||
result = self._request('POST', '/files/index', json=data)
|
||||
file_hash = result.get('hash')
|
||||
|
||||
if file_hash:
|
||||
log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
|
||||
return file_hash
|
||||
else:
|
||||
raise Exception("Remote server did not return file hash")
|
||||
|
||||
except Exception as exc:
|
||||
debug(f"Remote upload failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||||
"""Search files on remote storage.
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
limit: Maximum results
|
||||
|
||||
Returns:
|
||||
List of search results
|
||||
"""
|
||||
limit = kwargs.get("limit")
|
||||
try:
|
||||
limit = int(limit) if limit is not None else 100
|
||||
except (TypeError, ValueError):
|
||||
limit = 100
|
||||
|
||||
if limit <= 0:
|
||||
limit = 100
|
||||
|
||||
try:
|
||||
response = self._request('GET', '/files/search', params={
|
||||
'q': query,
|
||||
'limit': limit
|
||||
})
|
||||
|
||||
files = response.get('files', [])
|
||||
|
||||
# Transform remote format to standard result format
|
||||
results = []
|
||||
for f in files:
|
||||
results.append({
|
||||
"name": f.get('name', '').split('/')[-1], # Get filename from path
|
||||
"title": f.get('name', f.get('path', '')).split('/')[-1],
|
||||
"ext": f.get('ext', ''),
|
||||
"path": f.get('path', ''),
|
||||
"target": f.get('path', ''),
|
||||
"hash": f.get('hash', ''),
|
||||
"origin": "remote",
|
||||
"size": f.get('size', 0),
|
||||
"size_bytes": f.get('size', 0),
|
||||
"tags": f.get('tags', []),
|
||||
})
|
||||
|
||||
debug(f"Remote search found {len(results)} results", file=sys.stderr)
|
||||
return results
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Remote search failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
class FileStorage:
|
||||
"""Unified file storage interface supporting multiple backend services.
|
||||
|
||||
@@ -1224,6 +1379,25 @@ class FileStorage:
|
||||
# Include Matrix backend
|
||||
self._backends["matrix"] = MatrixStorageBackend()
|
||||
|
||||
# Include remote storage backends from config (for Android/network servers)
|
||||
remote_storages = config.get("remote_storages", [])
|
||||
if isinstance(remote_storages, list):
|
||||
for remote_config in remote_storages:
|
||||
if isinstance(remote_config, dict):
|
||||
name = remote_config.get("name", "remote")
|
||||
url = remote_config.get("url")
|
||||
timeout = remote_config.get("timeout", 30)
|
||||
api_key = remote_config.get("api_key")
|
||||
|
||||
if url:
|
||||
try:
|
||||
backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
|
||||
self._backends[name] = backend
|
||||
auth_status = " (with auth)" if api_key else " (no auth)"
|
||||
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
|
||||
|
||||
def __getitem__(self, backend_name: str) -> StorageBackend:
|
||||
"""Get a storage backend by name.
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Tuple, Set
|
||||
|
||||
from .utils import sha256_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import optional dependencies
|
||||
@@ -455,6 +457,18 @@ class LocalLibraryDB:
|
||||
logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def get_file_id(self, file_path: Path) -> Optional[int]:
|
||||
"""Get the file ID for a file path, or None if not found."""
|
||||
try:
|
||||
str_path = str(file_path.resolve())
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,))
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]:
|
||||
"""Get metadata for a file."""
|
||||
try:
|
||||
@@ -748,6 +762,177 @@ class LocalLibraryDB:
|
||||
logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
|
||||
"""Set a relationship between two local files.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file being related
|
||||
related_file_path: Path to the related file
|
||||
rel_type: Type of relationship ('king', 'alt', 'related')
|
||||
"""
|
||||
try:
|
||||
str_path = str(file_path.resolve())
|
||||
str_related_path = str(related_file_path.resolve())
|
||||
|
||||
file_id = self.get_or_create_file_entry(file_path)
|
||||
related_file_id = self.get_or_create_file_entry(related_file_path)
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Get hashes for both files
|
||||
file_hash = sha256_file(file_path)
|
||||
related_file_hash = sha256_file(related_file_path)
|
||||
|
||||
if not file_hash or not related_file_hash:
|
||||
logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}")
|
||||
return
|
||||
|
||||
# Store the hashes in the files table for future lookups
|
||||
cursor.execute("""
|
||||
UPDATE files SET file_hash = ? WHERE id = ?
|
||||
""", (file_hash, file_id))
|
||||
cursor.execute("""
|
||||
UPDATE files SET file_hash = ? WHERE id = ?
|
||||
""", (related_file_hash, related_file_id))
|
||||
|
||||
# Get current relationships
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
# Use index access to be safe regardless of row_factory
|
||||
relationships_str = row[0] if row else None
|
||||
|
||||
try:
|
||||
if relationships_str:
|
||||
relationships = json.loads(relationships_str)
|
||||
else:
|
||||
relationships = {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
relationships = {}
|
||||
|
||||
# Ensure relationships is a dict (handle case where DB has a list)
|
||||
if not isinstance(relationships, dict):
|
||||
relationships = {}
|
||||
|
||||
# Ensure rel_type key exists
|
||||
if rel_type not in relationships:
|
||||
relationships[rel_type] = []
|
||||
|
||||
# Add the relationship (store as hash string)
|
||||
if related_file_hash not in relationships[rel_type]:
|
||||
relationships[rel_type].append(related_file_hash)
|
||||
|
||||
# Save the updated relationships for the main file
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (file_id, json.dumps(relationships)))
|
||||
|
||||
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
|
||||
|
||||
# Set reverse relationship (bidirectional)
|
||||
# For 'alt' and 'related', the reverse is the same
|
||||
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
|
||||
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
|
||||
reverse_type = rel_type
|
||||
|
||||
# Update the related file
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (related_file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
relationships_str = row[0] if row else None
|
||||
|
||||
try:
|
||||
if relationships_str:
|
||||
reverse_relationships = json.loads(relationships_str)
|
||||
else:
|
||||
reverse_relationships = {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
reverse_relationships = {}
|
||||
|
||||
if not isinstance(reverse_relationships, dict):
|
||||
reverse_relationships = {}
|
||||
|
||||
if reverse_type not in reverse_relationships:
|
||||
reverse_relationships[reverse_type] = []
|
||||
|
||||
if file_hash not in reverse_relationships[reverse_type]:
|
||||
reverse_relationships[reverse_type].append(file_hash)
|
||||
|
||||
# Save the updated reverse relationships
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (related_file_id, json.dumps(reverse_relationships)))
|
||||
|
||||
self.connection.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting relationship: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Find all files that have a relationship pointing to the target path.
|
||||
|
||||
Args:
|
||||
target_path: The file path to look for in other files' relationships
|
||||
|
||||
Returns:
|
||||
List of dicts with {path, type} for files pointing to target
|
||||
"""
|
||||
try:
|
||||
# Get the hash of the target file
|
||||
target_hash = sha256_file(target_path)
|
||||
if not target_hash:
|
||||
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
|
||||
return []
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Scan all metadata (this might be slow on huge DBs but fine for local library)
|
||||
# We select file_path and relationships json
|
||||
cursor.execute("""
|
||||
SELECT f.file_path, m.relationships
|
||||
FROM metadata m
|
||||
JOIN files f ON m.file_id = f.id
|
||||
WHERE m.relationships LIKE ?
|
||||
""", (f"%{target_hash}%",))
|
||||
|
||||
results = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
f_path = row[0]
|
||||
rels_json = row[1]
|
||||
|
||||
try:
|
||||
rels = json.loads(rels_json)
|
||||
if isinstance(rels, dict):
|
||||
for r_type, hashes in rels.items():
|
||||
if isinstance(hashes, list):
|
||||
# Check if target hash is in this relationship type
|
||||
if target_hash in hashes:
|
||||
results.append({
|
||||
"path": f_path,
|
||||
"type": r_type
|
||||
})
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_note(self, file_path: Path) -> Optional[str]:
|
||||
"""Get note for a file."""
|
||||
try:
|
||||
@@ -1076,6 +1261,11 @@ class LocalLibraryDB:
|
||||
if not text:
|
||||
return True
|
||||
try:
|
||||
# Check if connection is valid
|
||||
if not self.connection:
|
||||
logger.warning(f"Database connection not available for worker {worker_id}")
|
||||
return False
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,))
|
||||
row = cursor.fetchone()
|
||||
@@ -1097,6 +1287,13 @@ class LocalLibraryDB:
|
||||
|
||||
self.connection.commit()
|
||||
return cursor.rowcount > 0
|
||||
except sqlite3.ProgrammingError as e:
|
||||
# Handle "Cannot operate on a closed database" gracefully
|
||||
if "closed database" in str(e).lower():
|
||||
logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}")
|
||||
return False
|
||||
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
@@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer:
|
||||
if not self.db:
|
||||
return None
|
||||
return self.db.search_by_hash(file_hash)
|
||||
|
||||
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
|
||||
"""Set a relationship between two files in the database.
|
||||
|
||||
Delegates to LocalLibraryDB.set_relationship().
|
||||
|
||||
Args:
|
||||
file_path: Path to the first file
|
||||
related_file_path: Path to the related file
|
||||
rel_type: Type of relationship ('king', 'alt', 'related', etc.)
|
||||
"""
|
||||
if not self.db:
|
||||
return
|
||||
self.db.set_relationship(file_path, related_file_path, rel_type)
|
||||
|
||||
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Find all files that have a relationship pointing to the target path."""
|
||||
if not self.db:
|
||||
return []
|
||||
return self.db.find_files_pointing_to(target_path)
|
||||
|
||||
523
helper/remote_storage_server.py
Normal file
523
helper/remote_storage_server.py
Normal file
@@ -0,0 +1,523 @@
|
||||
"""Remote Storage Server - REST API for file management on mobile devices.
|
||||
|
||||
This server runs on a mobile device (Android with Termux, iOS with iSH, etc.)
|
||||
and exposes the local library database as a REST API. Your PC connects to this
|
||||
server and uses it as a remote storage backend through the RemoteStorageBackend.
|
||||
|
||||
## INSTALLATION
|
||||
|
||||
### On Android (Termux):
|
||||
1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux
|
||||
2. In Termux:
|
||||
$ apt update && apt install python
|
||||
$ pip install flask flask-cors
|
||||
3. Copy this file to your device
|
||||
4. Run it (with optional API key):
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --port 5000
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey
|
||||
5. Server prints connection info automatically (IP, port, API key)
|
||||
|
||||
### On PC:
|
||||
1. Install requests: pip install requests
|
||||
2. Add to config.json:
|
||||
{
|
||||
"remote_storages": [
|
||||
{
|
||||
"name": "phone",
|
||||
"url": "http://192.168.1.100:5000",
|
||||
"api_key": "mysecretkey",
|
||||
"timeout": 30
|
||||
}
|
||||
]
|
||||
}
|
||||
Note: API key is optional. Works on WiFi or cellular data.
|
||||
|
||||
## USAGE
|
||||
|
||||
After setup, all cmdlets work with the phone:
|
||||
$ search-file zohar -store phone
|
||||
$ @1-3 | add-relationship -king @4 -store phone
|
||||
$ @1 | get-relationship -store phone
|
||||
|
||||
The server exposes REST endpoints that RemoteStorageBackend uses internally.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STORAGE_PATH: Optional[Path] = None
|
||||
API_KEY: Optional[str] = None # API key for authentication (None = no auth required)
|
||||
|
||||
# Try importing Flask - will be used in main() only
|
||||
try:
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
HAS_FLASK = True
|
||||
except ImportError:
|
||||
HAS_FLASK = False
|
||||
|
||||
# ============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
def get_local_ip() -> Optional[str]:
|
||||
"""Get the local IP address that would be used for external connections."""
|
||||
import socket
|
||||
try:
|
||||
# Create a socket to determine which interface would be used
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80)) # Google DNS
|
||||
ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return ip
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# ============================================================================
|
||||
# FLASK APP FACTORY
|
||||
# ============================================================================
|
||||
|
||||
def create_app():
|
||||
"""Create and configure Flask app with all routes."""
|
||||
if not HAS_FLASK:
|
||||
raise ImportError("Flask not installed. Install with: pip install flask flask-cors")
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# ========================================================================
|
||||
# HELPER DECORATORS
|
||||
# ========================================================================
|
||||
|
||||
def require_auth():
|
||||
"""Decorator to check API key authentication if configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if API_KEY:
|
||||
# Get API key from header or query parameter
|
||||
provided_key = request.headers.get('X-API-Key') or request.args.get('api_key')
|
||||
if not provided_key or provided_key != API_KEY:
|
||||
return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
def require_storage():
|
||||
"""Decorator to ensure storage path is configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if not STORAGE_PATH:
|
||||
return jsonify({"error": "Storage path not configured"}), 500
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
# ========================================================================
|
||||
# HEALTH CHECK
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
@require_auth()
|
||||
def health():
|
||||
"""Check server health and storage availability."""
|
||||
status = {
|
||||
"status": "ok",
|
||||
"storage_configured": STORAGE_PATH is not None,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
if STORAGE_PATH:
|
||||
status["storage_path"] = str(STORAGE_PATH)
|
||||
status["storage_exists"] = STORAGE_PATH.exists()
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
status["database_accessible"] = True
|
||||
except Exception as e:
|
||||
status["database_accessible"] = False
|
||||
status["database_error"] = str(e)
|
||||
|
||||
return jsonify(status), 200
|
||||
|
||||
# ========================================================================
|
||||
# FILE OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/files/search', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def search_files():
|
||||
"""Search for files by name or tag."""
|
||||
from helper.local_library import LocalLibrarySearchOptimizer
|
||||
|
||||
query = request.args.get('q', '')
|
||||
limit = request.args.get('limit', 100, type=int)
|
||||
|
||||
if not query:
|
||||
return jsonify({"error": "Search query required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibrarySearchOptimizer(STORAGE_PATH) as db:
|
||||
results = db.search_by_name(query, limit)
|
||||
tag_results = db.search_by_tag(query, limit)
|
||||
all_results = {r['hash']: r for r in (results + tag_results)}
|
||||
|
||||
return jsonify({
|
||||
"query": query,
|
||||
"count": len(all_results),
|
||||
"files": list(all_results.values())
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Search error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Search failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_file_metadata(file_hash: str):
|
||||
"""Get metadata for a specific file by hash."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
|
||||
if not file_path or not file_path.exists():
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
tags = db.get_tags(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"size": file_path.stat().st_size,
|
||||
"metadata": metadata,
|
||||
"tags": tags
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get metadata error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/index', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def index_file():
|
||||
"""Index a new file in the storage."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.utils import sha256_file
|
||||
|
||||
data = request.get_json() or {}
|
||||
file_path_str = data.get('path')
|
||||
tags = data.get('tags', [])
|
||||
urls = data.get('urls', [])
|
||||
|
||||
if not file_path_str:
|
||||
return jsonify({"error": "File path required"}), 400
|
||||
|
||||
try:
|
||||
file_path = Path(file_path_str)
|
||||
|
||||
if not file_path.exists():
|
||||
return jsonify({"error": "File does not exist"}), 404
|
||||
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
db.get_or_create_file_entry(file_path)
|
||||
|
||||
if tags:
|
||||
db.add_tags(file_path, tags)
|
||||
|
||||
if urls:
|
||||
db.add_known_urls(file_path, urls)
|
||||
|
||||
file_hash = sha256_file(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"tags_added": len(tags),
|
||||
"urls_added": len(urls)
|
||||
}), 201
|
||||
except Exception as e:
|
||||
logger.error(f"Index error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Indexing failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# TAG OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_tags(file_hash: str):
|
||||
"""Get tags for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
tags = db.get_tags(file_path)
|
||||
return jsonify({"hash": file_hash, "tags": tags}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_tags(file_hash: str):
|
||||
"""Add tags to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
tags = data.get('tags', [])
|
||||
mode = data.get('mode', 'add')
|
||||
|
||||
if not tags:
|
||||
return jsonify({"error": "Tags required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if mode == 'replace':
|
||||
db.remove_tags(file_path, db.get_tags(file_path))
|
||||
|
||||
db.add_tags(file_path, tags)
|
||||
return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['DELETE'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def remove_tags(file_hash: str):
|
||||
"""Remove tags from a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
tags_str = request.args.get('tags', '')
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if tags_str:
|
||||
tags_to_remove = [t.strip() for t in tags_str.split(',')]
|
||||
else:
|
||||
tags_to_remove = db.get_tags(file_path)
|
||||
|
||||
db.remove_tags(file_path, tags_to_remove)
|
||||
return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Remove tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# RELATIONSHIP OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/relationships/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_relationships(file_hash: str):
|
||||
"""Get relationships for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
relationships = metadata.get('relationships', {}) if metadata else {}
|
||||
return jsonify({"hash": file_hash, "relationships": relationships}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get relationships error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/relationships', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def set_relationship():
|
||||
"""Set a relationship between two files."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
from_hash = data.get('from_hash')
|
||||
to_hash = data.get('to_hash')
|
||||
rel_type = data.get('type', 'alt')
|
||||
|
||||
if not from_hash or not to_hash:
|
||||
return jsonify({"error": "from_hash and to_hash required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
from_path = db.search_by_hash(from_hash)
|
||||
to_path = db.search_by_hash(to_hash)
|
||||
|
||||
if not from_path or not to_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.set_relationship(from_path, to_path, rel_type)
|
||||
return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Set relationship error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# URL OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_urls(file_hash: str):
|
||||
"""Get known URLs for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
urls = metadata.get('known_urls', []) if metadata else []
|
||||
return jsonify({"hash": file_hash, "urls": urls}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get URLs error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_urls(file_hash: str):
|
||||
"""Add URLs to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
urls = data.get('urls', [])
|
||||
|
||||
if not urls:
|
||||
return jsonify({"error": "URLs required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.add_known_urls(file_path, urls)
|
||||
return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add URLs error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
return app
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
if not HAS_FLASK:
|
||||
print("ERROR: Flask and flask-cors required")
|
||||
print("Install with: pip install flask flask-cors")
|
||||
sys.exit(1)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Remote Storage Server for Medios-Macina',
|
||||
epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey'
|
||||
)
|
||||
parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory')
|
||||
parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)')
|
||||
parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)')
|
||||
parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
global STORAGE_PATH, API_KEY
|
||||
STORAGE_PATH = Path(args.storage_path).resolve()
|
||||
API_KEY = args.api_key
|
||||
|
||||
if not STORAGE_PATH.exists():
|
||||
print(f"ERROR: Storage path does not exist: {STORAGE_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get local IP address
|
||||
local_ip = get_local_ip()
|
||||
if not local_ip:
|
||||
local_ip = "127.0.0.1"
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Remote Storage Server - Medios-Macina")
|
||||
print(f"{'='*70}")
|
||||
print(f"Storage Path: {STORAGE_PATH}")
|
||||
print(f"Local IP: {local_ip}")
|
||||
print(f"Server URL: http://{local_ip}:{args.port}")
|
||||
print(f"Health URL: http://{local_ip}:{args.port}/health")
|
||||
print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}")
|
||||
print(f"Debug Mode: {args.debug}")
|
||||
print(f"\n📋 Config for config.json:")
|
||||
config_entry = {
|
||||
"name": "phone",
|
||||
"url": f"http://{local_ip}:{args.port}",
|
||||
"timeout": 30
|
||||
}
|
||||
if args.api_key:
|
||||
config_entry["api_key"] = args.api_key
|
||||
print(json.dumps(config_entry, indent=2))
|
||||
print(f"\n{'='*70}\n")
|
||||
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
logger.info("Database initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize database: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
app = create_app()
|
||||
app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user