upk
This commit is contained in:
@@ -8,7 +8,12 @@ Lean, focused downloader without event infrastructure overhead.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re # noqa: F401
|
||||
import glob # noqa: F401
|
||||
import hashlib
|
||||
import json # noqa: F401
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -157,31 +162,72 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> None:
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
We need separate files for merge-file, so download each section individually.
|
||||
|
||||
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
|
||||
thinking sections are already downloaded. The title is extracted and stored in tags.
|
||||
|
||||
Returns:
|
||||
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
|
||||
"""
|
||||
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
return
|
||||
return "", {}
|
||||
|
||||
# Download each section separately with unique output template
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
first_section_info = None
|
||||
title_from_first = None
|
||||
|
||||
# Download each section separately with unique output template using session ID
|
||||
for section_idx, section in enumerate(sections_list, 1):
|
||||
# Build unique output template for this section
|
||||
# e.g., "title.section_1_of_3.ext" for the first section
|
||||
# Build unique output template for this section using session-based filename
|
||||
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
|
||||
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
||||
output_dir_path = Path(base_outtmpl).parent
|
||||
|
||||
# Insert section number before extension
|
||||
# e.g., "/path/title.hash.webm" → "/path/title.hash.section_1_of_3.webm"
|
||||
# Use session_id + section index for temp filename
|
||||
# e.g., "/path/{session_id}_1.%(ext)s"
|
||||
filename_tmpl = f"{session_id}_{section_idx}"
|
||||
if base_outtmpl.endswith(".%(ext)s"):
|
||||
section_outtmpl = base_outtmpl.replace(".%(ext)s", f".section_{section_idx}_of_{len(sections_list)}.%(ext)s")
|
||||
else:
|
||||
section_outtmpl = base_outtmpl + f".section_{section_idx}_of_{len(sections_list)}"
|
||||
filename_tmpl += ".%(ext)s"
|
||||
|
||||
# Use Path to handle separators correctly for the OS
|
||||
section_outtmpl = str(output_dir_path / filename_tmpl)
|
||||
|
||||
# Build yt-dlp command for this section
|
||||
# For the first section, extract metadata first (separate call)
|
||||
if section_idx == 1:
|
||||
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
||||
if ytdl_options.get("cookiefile"):
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
metadata_cmd.extend(["--cookies", cookies_path])
|
||||
if ytdl_options.get("noplaylist"):
|
||||
metadata_cmd.append("--no-playlist")
|
||||
metadata_cmd.append(url)
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
debug("Could not parse JSON metadata")
|
||||
except Exception as e:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
|
||||
# Add format
|
||||
@@ -212,14 +258,18 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
cmd.append(url)
|
||||
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
# Run the subprocess
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=False, text=True)
|
||||
result = subprocess.run(cmd)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
|
||||
except Exception as exc:
|
||||
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
|
||||
|
||||
return session_id, first_section_info or {}
|
||||
|
||||
|
||||
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
@@ -296,8 +346,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
|
||||
base_options["download_sections"] = sections
|
||||
debug(f"Download sections configured: {', '.join(sections)}")
|
||||
# Force keyframes at cuts for accurate section boundaries
|
||||
base_options["force_keyframes_at_cuts"] = True
|
||||
# Note: Not using --force-keyframes-at-cuts to avoid re-encoding
|
||||
# This may result in less precise cuts but faster downloads
|
||||
|
||||
# Add playlist items selection if provided
|
||||
if opts.playlist_items:
|
||||
@@ -751,8 +801,10 @@ def download_media(
|
||||
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
|
||||
|
||||
# Use subprocess when download_sections are present (Python API doesn't support them properly)
|
||||
session_id = None
|
||||
first_section_info = {}
|
||||
if ytdl_options.get("download_sections"):
|
||||
_download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
|
||||
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
|
||||
info = None
|
||||
else:
|
||||
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
||||
@@ -780,7 +832,7 @@ def download_media(
|
||||
import re
|
||||
|
||||
# Get the expected filename pattern from outtmpl
|
||||
# For sections: "C:\path\title.section_1_of_3.ext", "C:\path\title.section_2_of_3.ext", etc.
|
||||
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
|
||||
# For non-sections: "C:\path\title.ext"
|
||||
|
||||
# Wait a moment to ensure files are fully written
|
||||
@@ -791,10 +843,10 @@ def download_media(
|
||||
if not files:
|
||||
raise FileNotFoundError(f"No files found in {opts.output_dir}")
|
||||
|
||||
# If we downloaded sections, look for files with .section_N_of_M pattern
|
||||
if opts.clip_sections:
|
||||
# Pattern: "title.section_1_of_3.ext", "title.section_2_of_3.ext", etc.
|
||||
section_pattern = re.compile(r'\.section_(\d+)_of_(\d+)\.')
|
||||
# If we downloaded sections, look for files with the session_id pattern
|
||||
if opts.clip_sections and session_id:
|
||||
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
|
||||
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
|
||||
matching_files = [f for f in files if section_pattern.search(f.name)]
|
||||
|
||||
if matching_files:
|
||||
@@ -804,13 +856,44 @@ def download_media(
|
||||
return int(match.group(1)) if match else 999
|
||||
|
||||
matching_files.sort(key=extract_section_num)
|
||||
media_path = matching_files[0] # First section
|
||||
media_paths = matching_files # All sections
|
||||
debug(f"✓ Downloaded {len(media_paths)} section file(s)")
|
||||
debug(f"Found {len(matching_files)} section file(s) matching pattern")
|
||||
|
||||
# Now rename section files to use hash-based names
|
||||
# This ensures unique filenames for each section content
|
||||
renamed_files = []
|
||||
|
||||
for idx, section_file in enumerate(matching_files, 1):
|
||||
try:
|
||||
# Calculate hash for the file
|
||||
file_hash = sha256_file(section_file)
|
||||
ext = section_file.suffix
|
||||
new_name = f"{file_hash}{ext}"
|
||||
new_path = opts.output_dir / new_name
|
||||
|
||||
if new_path.exists() and new_path != section_file:
|
||||
# If file with same hash exists, use it and delete the temp one
|
||||
debug(f"File with hash {file_hash} already exists, using existing file.")
|
||||
try:
|
||||
section_file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
renamed_files.append(new_path)
|
||||
else:
|
||||
section_file.rename(new_path)
|
||||
debug(f"Renamed section file: {section_file.name} → {new_name}")
|
||||
renamed_files.append(new_path)
|
||||
except Exception as e:
|
||||
debug(f"Failed to process section file {section_file.name}: {e}")
|
||||
renamed_files.append(section_file)
|
||||
|
||||
media_path = renamed_files[0]
|
||||
media_paths = renamed_files
|
||||
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
|
||||
else:
|
||||
# Fallback to most recent file if pattern not found
|
||||
media_path = files[0]
|
||||
media_paths = None
|
||||
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
|
||||
else:
|
||||
# No sections, just take the most recent file
|
||||
media_path = files[0]
|
||||
@@ -830,10 +913,30 @@ def download_media(
|
||||
|
||||
# Create result with minimal data extracted from filename
|
||||
file_hash = sha256_file(media_path)
|
||||
|
||||
# For section downloads, create tags with the title and build proper info dict
|
||||
tags = []
|
||||
title = ''
|
||||
if first_section_info:
|
||||
title = first_section_info.get('title', '')
|
||||
if title:
|
||||
tags.append(f'title:{title}')
|
||||
debug(f"Added title tag for section download: {title}")
|
||||
|
||||
# Build info dict - always use extracted title if available, not hash
|
||||
if first_section_info:
|
||||
info_dict = first_section_info
|
||||
else:
|
||||
info_dict = {
|
||||
"id": media_path.stem,
|
||||
"title": title or media_path.stem,
|
||||
"ext": media_path.suffix.lstrip(".")
|
||||
}
|
||||
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info={"id": media_path.stem, "title": media_path.stem, "ext": media_path.suffix.lstrip(".")},
|
||||
tags=[],
|
||||
info=info_dict,
|
||||
tags=tags,
|
||||
source_url=opts.url,
|
||||
hash_value=file_hash,
|
||||
paths=media_paths, # Include all section files if present
|
||||
|
||||
@@ -137,14 +137,14 @@ class LocalStorageBackend(StorageBackend):
|
||||
|
||||
# Check for duplicate files using LocalLibraryDB (fast - uses index)
|
||||
try:
|
||||
db = LocalLibraryDB(dest_dir)
|
||||
existing_path = db.search_by_hash(file_hash)
|
||||
if existing_path and existing_path.exists():
|
||||
log(
|
||||
f"✓ File already in local storage: {existing_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return str(existing_path)
|
||||
with LocalLibraryDB(dest_dir) as db:
|
||||
existing_path = db.search_by_hash(file_hash)
|
||||
if existing_path and existing_path.exists():
|
||||
log(
|
||||
f"✓ File already in local storage: {existing_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return str(existing_path)
|
||||
except Exception as exc:
|
||||
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
|
||||
|
||||
@@ -205,247 +205,156 @@ class LocalStorageBackend(StorageBackend):
|
||||
|
||||
# Try database search first (much faster than filesystem scan)
|
||||
try:
|
||||
db = LocalLibraryDB(search_dir)
|
||||
cursor = db.connection.cursor()
|
||||
|
||||
# Check if query is a tag namespace search (format: "namespace:pattern")
|
||||
if ":" in query and not query.startswith(":"):
|
||||
namespace, pattern = query.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||||
with LocalLibraryDB(search_dir) as db:
|
||||
cursor = db.connection.cursor()
|
||||
|
||||
# Search for tags matching the namespace and pattern
|
||||
query_pattern = f"{namespace}:%"
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ?
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} potential matches in DB")
|
||||
|
||||
# Filter results by pattern match
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
# Check if query is a tag namespace search (format: "namespace:pattern")
|
||||
if ":" in query and not query.startswith(":"):
|
||||
namespace, pattern = query.split(":", 1)
|
||||
namespace = namespace.strip().lower()
|
||||
pattern = pattern.strip().lower()
|
||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
||||
|
||||
# Search for tags matching the namespace and pattern
|
||||
query_pattern = f"{namespace}:%"
|
||||
|
||||
# Get the file's tags and check if any match the pattern
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT tag FROM tags
|
||||
WHERE file_id = ?
|
||||
AND LOWER(tag) LIKE ?
|
||||
""", (file_id, query_pattern))
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ?
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} potential matches in DB")
|
||||
|
||||
# Check if any tag matches the pattern (case-insensitive wildcard)
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
# Extract the value part after "namespace:"
|
||||
if tag_lower.startswith(f"{namespace}:"):
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
# Use fnmatch for wildcard matching
|
||||
if fnmatch(value, pattern):
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch all tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
all_tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": all_tags,
|
||||
})
|
||||
else:
|
||||
debug(f"File missing on disk: {file_path}")
|
||||
break # Don't add same file multiple times
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
elif not match_all:
|
||||
# Search by filename or simple tags (namespace-agnostic for plain text)
|
||||
# For plain text search, match:
|
||||
# 1. Filenames containing the query
|
||||
# 2. Simple tags (without namespace) containing the query
|
||||
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
|
||||
# Use explicit namespace search for that (e.g., "channel:joe*")
|
||||
|
||||
# Split query into terms for AND logic
|
||||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||||
if not terms:
|
||||
terms = [query_lower]
|
||||
|
||||
debug(f"Performing filename/tag search for terms: {terms}")
|
||||
|
||||
# Fetch more results than requested to allow for filtering
|
||||
fetch_limit = (limit or 45) * 50
|
||||
|
||||
# 1. Filename search (AND logic)
|
||||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||||
params = [f"%{t}%" for t in terms]
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
cursor.execute(f"""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
WHERE {where_clause}
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (*params, fetch_limit))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||||
|
||||
# Compile regex for whole word matching (only if single term, otherwise skip)
|
||||
word_regex = None
|
||||
if len(terms) == 1:
|
||||
term = terms[0]
|
||||
# Check if term contains wildcard characters
|
||||
has_wildcard = '*' in term or '?' in term
|
||||
|
||||
if has_wildcard:
|
||||
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
|
||||
try:
|
||||
from fnmatch import translate
|
||||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
else:
|
||||
# Use custom boundary that treats underscores as separators
|
||||
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
|
||||
try:
|
||||
# Match if not preceded or followed by alphanumeric chars
|
||||
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
||||
word_regex = re.compile(pattern, re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
|
||||
seen_files = set()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
|
||||
# Apply whole word filter on filename if single term
|
||||
if word_regex:
|
||||
p = Path(file_path_str)
|
||||
if not word_regex.search(p.name):
|
||||
# Filter results by pattern match
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
# Get the file's tags and check if any match the pattern
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
SELECT DISTINCT tag FROM tags
|
||||
WHERE file_id = ?
|
||||
AND LOWER(tag) LIKE ?
|
||||
""", (file_id, query_pattern))
|
||||
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
# Also search for simple tags (without namespace) containing the query
|
||||
# Only perform tag search if single term, or if we want to support multi-term tag search
|
||||
# For now, fallback to single pattern search for tags if multiple terms
|
||||
# (searching for a tag that contains "term1 term2" or "term1,term2")
|
||||
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
|
||||
query_pattern = f"%{query_lower}%"
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tag_rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in tag_rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
# Check if any tag matches the pattern (case-insensitive wildcard)
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
# Extract the value part after "namespace:"
|
||||
if tag_lower.startswith(f"{namespace}:"):
|
||||
value = tag_lower[len(namespace)+1:]
|
||||
# Use fnmatch for wildcard matching
|
||||
if fnmatch(value, pattern):
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch all tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
all_tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": all_tags,
|
||||
})
|
||||
else:
|
||||
debug(f"File missing on disk: {file_path}")
|
||||
break # Don't add same file multiple times
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
else:
|
||||
# Match all - get all files from database
|
||||
cursor.execute("""
|
||||
SELECT id, file_path, file_size
|
||||
FROM files
|
||||
ORDER BY file_path
|
||||
LIMIT ?
|
||||
""", (limit or 1000,))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if file_path_str:
|
||||
elif not match_all:
|
||||
# Search by filename or simple tags (namespace-agnostic for plain text)
|
||||
# For plain text search, match:
|
||||
# 1. Filenames containing the query
|
||||
# 2. Simple tags (without namespace) containing the query
|
||||
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
|
||||
# Use explicit namespace search for that (e.g., "channel:joe*")
|
||||
|
||||
# Split query into terms for AND logic
|
||||
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
|
||||
if not terms:
|
||||
terms = [query_lower]
|
||||
|
||||
debug(f"Performing filename/tag search for terms: {terms}")
|
||||
|
||||
# Fetch more results than requested to allow for filtering
|
||||
fetch_limit = (limit or 45) * 50
|
||||
|
||||
# 1. Filename search (AND logic)
|
||||
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
|
||||
params = [f"%{t}%" for t in terms]
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
cursor.execute(f"""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
WHERE {where_clause}
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (*params, fetch_limit))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
|
||||
|
||||
# Compile regex for whole word matching (only if single term, otherwise skip)
|
||||
word_regex = None
|
||||
if len(terms) == 1:
|
||||
term = terms[0]
|
||||
# Check if term contains wildcard characters
|
||||
has_wildcard = '*' in term or '?' in term
|
||||
|
||||
if has_wildcard:
|
||||
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
|
||||
try:
|
||||
from fnmatch import translate
|
||||
word_regex = re.compile(translate(term), re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
else:
|
||||
# Use custom boundary that treats underscores as separators
|
||||
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
|
||||
try:
|
||||
# Match if not preceded or followed by alphanumeric chars
|
||||
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
|
||||
word_regex = re.compile(pattern, re.IGNORECASE)
|
||||
except Exception:
|
||||
word_regex = None
|
||||
|
||||
seen_files = set()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
|
||||
# Apply whole word filter on filename if single term
|
||||
if word_regex:
|
||||
p = Path(file_path_str)
|
||||
if not word_regex.search(p.name):
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
@@ -472,12 +381,103 @@ class LocalStorageBackend(StorageBackend):
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if results:
|
||||
debug(f"Returning {len(results)} results from DB")
|
||||
else:
|
||||
debug("No results found in DB")
|
||||
return results
|
||||
|
||||
# Also search for simple tags (without namespace) containing the query
|
||||
# Only perform tag search if single term, or if we want to support multi-term tag search
|
||||
# For now, fallback to single pattern search for tags if multiple terms
|
||||
# (searching for a tag that contains "term1 term2" or "term1,term2")
|
||||
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
|
||||
query_pattern = f"%{query_lower}%"
|
||||
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT f.id, f.file_path, f.file_size
|
||||
FROM files f
|
||||
JOIN tags t ON f.id = t.file_id
|
||||
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
|
||||
ORDER BY f.file_path
|
||||
LIMIT ?
|
||||
""", (query_pattern, limit or 1000))
|
||||
|
||||
tag_rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in tag_rows:
|
||||
if not file_path_str or file_path_str in seen_files:
|
||||
continue
|
||||
seen_files.add(file_path_str)
|
||||
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if limit is not None and len(results) >= limit:
|
||||
return results
|
||||
|
||||
else:
|
||||
# Match all - get all files from database
|
||||
cursor.execute("""
|
||||
SELECT id, file_path, file_size
|
||||
FROM files
|
||||
ORDER BY file_path
|
||||
LIMIT ?
|
||||
""", (limit or 1000,))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
for file_id, file_path_str, size_bytes in rows:
|
||||
if file_path_str:
|
||||
file_path = Path(file_path_str)
|
||||
if file_path.exists():
|
||||
path_str = str(file_path)
|
||||
if size_bytes is None:
|
||||
size_bytes = file_path.stat().st_size
|
||||
|
||||
# Fetch tags for this file
|
||||
cursor.execute("""
|
||||
SELECT tag FROM tags WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
"origin": "local",
|
||||
"size": size_bytes,
|
||||
"size_bytes": size_bytes,
|
||||
"tags": tags,
|
||||
})
|
||||
|
||||
if results:
|
||||
debug(f"Returning {len(results)} results from DB")
|
||||
else:
|
||||
debug("No results found in DB")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
|
||||
@@ -1175,6 +1175,161 @@ class MatrixStorageBackend(StorageBackend):
|
||||
raise
|
||||
|
||||
|
||||
class RemoteStorageBackend(StorageBackend):
|
||||
"""File storage backend for remote Android/network storage servers.
|
||||
|
||||
Connects to a remote storage server (e.g., running on Android phone)
|
||||
via REST API. All operations are proxied to the remote server.
|
||||
"""
|
||||
|
||||
def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
|
||||
"""Initialize remote storage backend.
|
||||
|
||||
Args:
|
||||
server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
|
||||
timeout: Request timeout in seconds
|
||||
api_key: Optional API key for authentication
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")
|
||||
|
||||
self.server_url = server_url.rstrip('/')
|
||||
self.timeout = timeout
|
||||
self.api_key = api_key
|
||||
self._session = requests.Session()
|
||||
|
||||
# Add API key to default headers if provided
|
||||
if self.api_key:
|
||||
self._session.headers.update({'X-API-Key': self.api_key})
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "remote"
|
||||
|
||||
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Make HTTP request to remote server."""
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
|
||||
url = urljoin(self.server_url, endpoint)
|
||||
|
||||
try:
|
||||
response = self._session.request(
|
||||
method,
|
||||
url,
|
||||
timeout=self.timeout,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
raise Exception(f"Remote resource not found: {endpoint}")
|
||||
|
||||
if response.status_code >= 400:
|
||||
try:
|
||||
error_data = response.json()
|
||||
error_msg = error_data.get('error', response.text)
|
||||
except:
|
||||
error_msg = response.text
|
||||
raise Exception(f"Remote server error {response.status_code}: {error_msg}")
|
||||
|
||||
return response.json()
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"Connection to {self.server_url} failed: {e}")
|
||||
|
||||
def upload(self, file_path: Path, **kwargs: Any) -> str:
|
||||
"""Upload file to remote storage.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to upload
|
||||
tags: Optional list of tags to add
|
||||
urls: Optional list of known URLs
|
||||
|
||||
Returns:
|
||||
Remote file hash
|
||||
"""
|
||||
from helper.utils import sha256_file
|
||||
|
||||
if not file_path.exists():
|
||||
raise ValueError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
# Index the file on remote server
|
||||
data = {"path": str(file_path)}
|
||||
|
||||
tags = kwargs.get("tags", [])
|
||||
if tags:
|
||||
data["tags"] = tags
|
||||
|
||||
urls = kwargs.get("urls", [])
|
||||
if urls:
|
||||
data["urls"] = urls
|
||||
|
||||
result = self._request('POST', '/files/index', json=data)
|
||||
file_hash = result.get('hash')
|
||||
|
||||
if file_hash:
|
||||
log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
|
||||
return file_hash
|
||||
else:
|
||||
raise Exception("Remote server did not return file hash")
|
||||
|
||||
except Exception as exc:
|
||||
debug(f"Remote upload failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||||
"""Search files on remote storage.
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
limit: Maximum results
|
||||
|
||||
Returns:
|
||||
List of search results
|
||||
"""
|
||||
limit = kwargs.get("limit")
|
||||
try:
|
||||
limit = int(limit) if limit is not None else 100
|
||||
except (TypeError, ValueError):
|
||||
limit = 100
|
||||
|
||||
if limit <= 0:
|
||||
limit = 100
|
||||
|
||||
try:
|
||||
response = self._request('GET', '/files/search', params={
|
||||
'q': query,
|
||||
'limit': limit
|
||||
})
|
||||
|
||||
files = response.get('files', [])
|
||||
|
||||
# Transform remote format to standard result format
|
||||
results = []
|
||||
for f in files:
|
||||
results.append({
|
||||
"name": f.get('name', '').split('/')[-1], # Get filename from path
|
||||
"title": f.get('name', f.get('path', '')).split('/')[-1],
|
||||
"ext": f.get('ext', ''),
|
||||
"path": f.get('path', ''),
|
||||
"target": f.get('path', ''),
|
||||
"hash": f.get('hash', ''),
|
||||
"origin": "remote",
|
||||
"size": f.get('size', 0),
|
||||
"size_bytes": f.get('size', 0),
|
||||
"tags": f.get('tags', []),
|
||||
})
|
||||
|
||||
debug(f"Remote search found {len(results)} results", file=sys.stderr)
|
||||
return results
|
||||
|
||||
except Exception as exc:
|
||||
log(f"❌ Remote search failed: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
class FileStorage:
|
||||
"""Unified file storage interface supporting multiple backend services.
|
||||
|
||||
@@ -1223,6 +1378,25 @@ class FileStorage:
|
||||
|
||||
# Include Matrix backend
|
||||
self._backends["matrix"] = MatrixStorageBackend()
|
||||
|
||||
# Include remote storage backends from config (for Android/network servers)
|
||||
remote_storages = config.get("remote_storages", [])
|
||||
if isinstance(remote_storages, list):
|
||||
for remote_config in remote_storages:
|
||||
if isinstance(remote_config, dict):
|
||||
name = remote_config.get("name", "remote")
|
||||
url = remote_config.get("url")
|
||||
timeout = remote_config.get("timeout", 30)
|
||||
api_key = remote_config.get("api_key")
|
||||
|
||||
if url:
|
||||
try:
|
||||
backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
|
||||
self._backends[name] = backend
|
||||
auth_status = " (with auth)" if api_key else " (no auth)"
|
||||
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
|
||||
|
||||
def __getitem__(self, backend_name: str) -> StorageBackend:
|
||||
"""Get a storage backend by name.
|
||||
|
||||
@@ -19,6 +19,8 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Tuple, Set
|
||||
|
||||
from .utils import sha256_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Try to import optional dependencies
|
||||
@@ -455,6 +457,18 @@ class LocalLibraryDB:
|
||||
logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def get_file_id(self, file_path: Path) -> Optional[int]:
|
||||
"""Get the file ID for a file path, or None if not found."""
|
||||
try:
|
||||
str_path = str(file_path.resolve())
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,))
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]:
|
||||
"""Get metadata for a file."""
|
||||
try:
|
||||
@@ -748,6 +762,177 @@ class LocalLibraryDB:
|
||||
logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
|
||||
"""Set a relationship between two local files.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file being related
|
||||
related_file_path: Path to the related file
|
||||
rel_type: Type of relationship ('king', 'alt', 'related')
|
||||
"""
|
||||
try:
|
||||
str_path = str(file_path.resolve())
|
||||
str_related_path = str(related_file_path.resolve())
|
||||
|
||||
file_id = self.get_or_create_file_entry(file_path)
|
||||
related_file_id = self.get_or_create_file_entry(related_file_path)
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Get hashes for both files
|
||||
file_hash = sha256_file(file_path)
|
||||
related_file_hash = sha256_file(related_file_path)
|
||||
|
||||
if not file_hash or not related_file_hash:
|
||||
logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}")
|
||||
return
|
||||
|
||||
# Store the hashes in the files table for future lookups
|
||||
cursor.execute("""
|
||||
UPDATE files SET file_hash = ? WHERE id = ?
|
||||
""", (file_hash, file_id))
|
||||
cursor.execute("""
|
||||
UPDATE files SET file_hash = ? WHERE id = ?
|
||||
""", (related_file_hash, related_file_id))
|
||||
|
||||
# Get current relationships
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
# Use index access to be safe regardless of row_factory
|
||||
relationships_str = row[0] if row else None
|
||||
|
||||
try:
|
||||
if relationships_str:
|
||||
relationships = json.loads(relationships_str)
|
||||
else:
|
||||
relationships = {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
relationships = {}
|
||||
|
||||
# Ensure relationships is a dict (handle case where DB has a list)
|
||||
if not isinstance(relationships, dict):
|
||||
relationships = {}
|
||||
|
||||
# Ensure rel_type key exists
|
||||
if rel_type not in relationships:
|
||||
relationships[rel_type] = []
|
||||
|
||||
# Add the relationship (store as hash string)
|
||||
if related_file_hash not in relationships[rel_type]:
|
||||
relationships[rel_type].append(related_file_hash)
|
||||
|
||||
# Save the updated relationships for the main file
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (file_id, json.dumps(relationships)))
|
||||
|
||||
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
|
||||
|
||||
# Set reverse relationship (bidirectional)
|
||||
# For 'alt' and 'related', the reverse is the same
|
||||
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
|
||||
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
|
||||
reverse_type = rel_type
|
||||
|
||||
# Update the related file
|
||||
cursor.execute("""
|
||||
SELECT relationships FROM metadata WHERE file_id = ?
|
||||
""", (related_file_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
relationships_str = row[0] if row else None
|
||||
|
||||
try:
|
||||
if relationships_str:
|
||||
reverse_relationships = json.loads(relationships_str)
|
||||
else:
|
||||
reverse_relationships = {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
reverse_relationships = {}
|
||||
|
||||
if not isinstance(reverse_relationships, dict):
|
||||
reverse_relationships = {}
|
||||
|
||||
if reverse_type not in reverse_relationships:
|
||||
reverse_relationships[reverse_type] = []
|
||||
|
||||
if file_hash not in reverse_relationships[reverse_type]:
|
||||
reverse_relationships[reverse_type].append(file_hash)
|
||||
|
||||
# Save the updated reverse relationships
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (file_id, relationships)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
relationships = excluded.relationships,
|
||||
time_modified = CURRENT_TIMESTAMP
|
||||
""", (related_file_id, json.dumps(reverse_relationships)))
|
||||
|
||||
self.connection.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting relationship: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Find all files that have a relationship pointing to the target path.
|
||||
|
||||
Args:
|
||||
target_path: The file path to look for in other files' relationships
|
||||
|
||||
Returns:
|
||||
List of dicts with {path, type} for files pointing to target
|
||||
"""
|
||||
try:
|
||||
# Get the hash of the target file
|
||||
target_hash = sha256_file(target_path)
|
||||
if not target_hash:
|
||||
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
|
||||
return []
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Scan all metadata (this might be slow on huge DBs but fine for local library)
|
||||
# We select file_path and relationships json
|
||||
cursor.execute("""
|
||||
SELECT f.file_path, m.relationships
|
||||
FROM metadata m
|
||||
JOIN files f ON m.file_id = f.id
|
||||
WHERE m.relationships LIKE ?
|
||||
""", (f"%{target_hash}%",))
|
||||
|
||||
results = []
|
||||
|
||||
for row in cursor.fetchall():
|
||||
f_path = row[0]
|
||||
rels_json = row[1]
|
||||
|
||||
try:
|
||||
rels = json.loads(rels_json)
|
||||
if isinstance(rels, dict):
|
||||
for r_type, hashes in rels.items():
|
||||
if isinstance(hashes, list):
|
||||
# Check if target hash is in this relationship type
|
||||
if target_hash in hashes:
|
||||
results.append({
|
||||
"path": f_path,
|
||||
"type": r_type
|
||||
})
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def get_note(self, file_path: Path) -> Optional[str]:
|
||||
"""Get note for a file."""
|
||||
try:
|
||||
@@ -1076,6 +1261,11 @@ class LocalLibraryDB:
|
||||
if not text:
|
||||
return True
|
||||
try:
|
||||
# Check if connection is valid
|
||||
if not self.connection:
|
||||
logger.warning(f"Database connection not available for worker {worker_id}")
|
||||
return False
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,))
|
||||
row = cursor.fetchone()
|
||||
@@ -1097,6 +1287,13 @@ class LocalLibraryDB:
|
||||
|
||||
self.connection.commit()
|
||||
return cursor.rowcount > 0
|
||||
except sqlite3.ProgrammingError as e:
|
||||
# Handle "Cannot operate on a closed database" gracefully
|
||||
if "closed database" in str(e).lower():
|
||||
logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}")
|
||||
return False
|
||||
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
|
||||
return False
|
||||
@@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer:
|
||||
if not self.db:
|
||||
return None
|
||||
return self.db.search_by_hash(file_hash)
|
||||
|
||||
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
|
||||
"""Set a relationship between two files in the database.
|
||||
|
||||
Delegates to LocalLibraryDB.set_relationship().
|
||||
|
||||
Args:
|
||||
file_path: Path to the first file
|
||||
related_file_path: Path to the related file
|
||||
rel_type: Type of relationship ('king', 'alt', 'related', etc.)
|
||||
"""
|
||||
if not self.db:
|
||||
return
|
||||
self.db.set_relationship(file_path, related_file_path, rel_type)
|
||||
|
||||
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Find all files that have a relationship pointing to the target path."""
|
||||
if not self.db:
|
||||
return []
|
||||
return self.db.find_files_pointing_to(target_path)
|
||||
|
||||
523
helper/remote_storage_server.py
Normal file
523
helper/remote_storage_server.py
Normal file
@@ -0,0 +1,523 @@
|
||||
"""Remote Storage Server - REST API for file management on mobile devices.
|
||||
|
||||
This server runs on a mobile device (Android with Termux, iOS with iSH, etc.)
|
||||
and exposes the local library database as a REST API. Your PC connects to this
|
||||
server and uses it as a remote storage backend through the RemoteStorageBackend.
|
||||
|
||||
## INSTALLATION
|
||||
|
||||
### On Android (Termux):
|
||||
1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux
|
||||
2. In Termux:
|
||||
$ apt update && apt install python
|
||||
$ pip install flask flask-cors
|
||||
3. Copy this file to your device
|
||||
4. Run it (with optional API key):
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --port 5000
|
||||
$ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey
|
||||
5. Server prints connection info automatically (IP, port, API key)
|
||||
|
||||
### On PC:
|
||||
1. Install requests: pip install requests
|
||||
2. Add to config.json:
|
||||
{
|
||||
"remote_storages": [
|
||||
{
|
||||
"name": "phone",
|
||||
"url": "http://192.168.1.100:5000",
|
||||
"api_key": "mysecretkey",
|
||||
"timeout": 30
|
||||
}
|
||||
]
|
||||
}
|
||||
Note: API key is optional. Works on WiFi or cellular data.
|
||||
|
||||
## USAGE
|
||||
|
||||
After setup, all cmdlets work with the phone:
|
||||
$ search-file zohar -store phone
|
||||
$ @1-3 | add-relationship -king @4 -store phone
|
||||
$ @1 | get-relationship -store phone
|
||||
|
||||
The server exposes REST endpoints that RemoteStorageBackend uses internally.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from functools import wraps
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from helper.logger import log
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s: %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STORAGE_PATH: Optional[Path] = None
|
||||
API_KEY: Optional[str] = None # API key for authentication (None = no auth required)
|
||||
|
||||
# Try importing Flask - will be used in main() only
|
||||
try:
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
HAS_FLASK = True
|
||||
except ImportError:
|
||||
HAS_FLASK = False
|
||||
|
||||
# ============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
def get_local_ip() -> Optional[str]:
|
||||
"""Get the local IP address that would be used for external connections."""
|
||||
import socket
|
||||
try:
|
||||
# Create a socket to determine which interface would be used
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80)) # Google DNS
|
||||
ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return ip
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# ============================================================================
|
||||
# FLASK APP FACTORY
|
||||
# ============================================================================
|
||||
|
||||
def create_app():
|
||||
"""Create and configure Flask app with all routes."""
|
||||
if not HAS_FLASK:
|
||||
raise ImportError("Flask not installed. Install with: pip install flask flask-cors")
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# ========================================================================
|
||||
# HELPER DECORATORS
|
||||
# ========================================================================
|
||||
|
||||
def require_auth():
|
||||
"""Decorator to check API key authentication if configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if API_KEY:
|
||||
# Get API key from header or query parameter
|
||||
provided_key = request.headers.get('X-API-Key') or request.args.get('api_key')
|
||||
if not provided_key or provided_key != API_KEY:
|
||||
return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
def require_storage():
|
||||
"""Decorator to ensure storage path is configured."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def decorated_function(*args, **kwargs):
|
||||
if not STORAGE_PATH:
|
||||
return jsonify({"error": "Storage path not configured"}), 500
|
||||
return f(*args, **kwargs)
|
||||
return decorated_function
|
||||
return decorator
|
||||
|
||||
# ========================================================================
|
||||
# HEALTH CHECK
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
@require_auth()
|
||||
def health():
|
||||
"""Check server health and storage availability."""
|
||||
status = {
|
||||
"status": "ok",
|
||||
"storage_configured": STORAGE_PATH is not None,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
if STORAGE_PATH:
|
||||
status["storage_path"] = str(STORAGE_PATH)
|
||||
status["storage_exists"] = STORAGE_PATH.exists()
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
status["database_accessible"] = True
|
||||
except Exception as e:
|
||||
status["database_accessible"] = False
|
||||
status["database_error"] = str(e)
|
||||
|
||||
return jsonify(status), 200
|
||||
|
||||
# ========================================================================
|
||||
# FILE OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/files/search', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def search_files():
|
||||
"""Search for files by name or tag."""
|
||||
from helper.local_library import LocalLibrarySearchOptimizer
|
||||
|
||||
query = request.args.get('q', '')
|
||||
limit = request.args.get('limit', 100, type=int)
|
||||
|
||||
if not query:
|
||||
return jsonify({"error": "Search query required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibrarySearchOptimizer(STORAGE_PATH) as db:
|
||||
results = db.search_by_name(query, limit)
|
||||
tag_results = db.search_by_tag(query, limit)
|
||||
all_results = {r['hash']: r for r in (results + tag_results)}
|
||||
|
||||
return jsonify({
|
||||
"query": query,
|
||||
"count": len(all_results),
|
||||
"files": list(all_results.values())
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Search error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Search failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_file_metadata(file_hash: str):
|
||||
"""Get metadata for a specific file by hash."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
|
||||
if not file_path or not file_path.exists():
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
tags = db.get_tags(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"size": file_path.stat().st_size,
|
||||
"metadata": metadata,
|
||||
"tags": tags
|
||||
}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get metadata error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500
|
||||
|
||||
@app.route('/files/index', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def index_file():
|
||||
"""Index a new file in the storage."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from helper.utils import sha256_file
|
||||
|
||||
data = request.get_json() or {}
|
||||
file_path_str = data.get('path')
|
||||
tags = data.get('tags', [])
|
||||
urls = data.get('urls', [])
|
||||
|
||||
if not file_path_str:
|
||||
return jsonify({"error": "File path required"}), 400
|
||||
|
||||
try:
|
||||
file_path = Path(file_path_str)
|
||||
|
||||
if not file_path.exists():
|
||||
return jsonify({"error": "File does not exist"}), 404
|
||||
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
db.get_or_create_file_entry(file_path)
|
||||
|
||||
if tags:
|
||||
db.add_tags(file_path, tags)
|
||||
|
||||
if urls:
|
||||
db.add_known_urls(file_path, urls)
|
||||
|
||||
file_hash = sha256_file(file_path)
|
||||
|
||||
return jsonify({
|
||||
"hash": file_hash,
|
||||
"path": str(file_path),
|
||||
"tags_added": len(tags),
|
||||
"urls_added": len(urls)
|
||||
}), 201
|
||||
except Exception as e:
|
||||
logger.error(f"Index error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Indexing failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# TAG OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_tags(file_hash: str):
|
||||
"""Get tags for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
tags = db.get_tags(file_path)
|
||||
return jsonify({"hash": file_hash, "tags": tags}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_tags(file_hash: str):
|
||||
"""Add tags to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
tags = data.get('tags', [])
|
||||
mode = data.get('mode', 'add')
|
||||
|
||||
if not tags:
|
||||
return jsonify({"error": "Tags required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if mode == 'replace':
|
||||
db.remove_tags(file_path, db.get_tags(file_path))
|
||||
|
||||
db.add_tags(file_path, tags)
|
||||
return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/tags/<file_hash>', methods=['DELETE'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def remove_tags(file_hash: str):
|
||||
"""Remove tags from a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
tags_str = request.args.get('tags', '')
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
if tags_str:
|
||||
tags_to_remove = [t.strip() for t in tags_str.split(',')]
|
||||
else:
|
||||
tags_to_remove = db.get_tags(file_path)
|
||||
|
||||
db.remove_tags(file_path, tags_to_remove)
|
||||
return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Remove tags error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# RELATIONSHIP OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/relationships/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_relationships(file_hash: str):
|
||||
"""Get relationships for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
relationships = metadata.get('relationships', {}) if metadata else {}
|
||||
return jsonify({"hash": file_hash, "relationships": relationships}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get relationships error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/relationships', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def set_relationship():
|
||||
"""Set a relationship between two files."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
from_hash = data.get('from_hash')
|
||||
to_hash = data.get('to_hash')
|
||||
rel_type = data.get('type', 'alt')
|
||||
|
||||
if not from_hash or not to_hash:
|
||||
return jsonify({"error": "from_hash and to_hash required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
from_path = db.search_by_hash(from_hash)
|
||||
to_path = db.search_by_hash(to_hash)
|
||||
|
||||
if not from_path or not to_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.set_relationship(from_path, to_path, rel_type)
|
||||
return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Set relationship error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
# ========================================================================
|
||||
# URL OPERATIONS
|
||||
# ========================================================================
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['GET'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def get_urls(file_hash: str):
|
||||
"""Get known URLs for a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
metadata = db.get_metadata(file_path)
|
||||
urls = metadata.get('known_urls', []) if metadata else []
|
||||
return jsonify({"hash": file_hash, "urls": urls}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Get URLs error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
@app.route('/urls/<file_hash>', methods=['POST'])
|
||||
@require_auth()
|
||||
@require_storage()
|
||||
def add_urls(file_hash: str):
|
||||
"""Add URLs to a file."""
|
||||
from helper.local_library import LocalLibraryDB
|
||||
|
||||
data = request.get_json() or {}
|
||||
urls = data.get('urls', [])
|
||||
|
||||
if not urls:
|
||||
return jsonify({"error": "URLs required"}), 400
|
||||
|
||||
try:
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
file_path = db.search_by_hash(file_hash)
|
||||
if not file_path:
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
db.add_known_urls(file_path, urls)
|
||||
return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200
|
||||
except Exception as e:
|
||||
logger.error(f"Add URLs error: {e}", exc_info=True)
|
||||
return jsonify({"error": f"Failed: {str(e)}"}), 500
|
||||
|
||||
return app
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
if not HAS_FLASK:
|
||||
print("ERROR: Flask and flask-cors required")
|
||||
print("Install with: pip install flask flask-cors")
|
||||
sys.exit(1)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Remote Storage Server for Medios-Macina',
|
||||
epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey'
|
||||
)
|
||||
parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory')
|
||||
parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)')
|
||||
parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)')
|
||||
parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)')
|
||||
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
global STORAGE_PATH, API_KEY
|
||||
STORAGE_PATH = Path(args.storage_path).resolve()
|
||||
API_KEY = args.api_key
|
||||
|
||||
if not STORAGE_PATH.exists():
|
||||
print(f"ERROR: Storage path does not exist: {STORAGE_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get local IP address
|
||||
local_ip = get_local_ip()
|
||||
if not local_ip:
|
||||
local_ip = "127.0.0.1"
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Remote Storage Server - Medios-Macina")
|
||||
print(f"{'='*70}")
|
||||
print(f"Storage Path: {STORAGE_PATH}")
|
||||
print(f"Local IP: {local_ip}")
|
||||
print(f"Server URL: http://{local_ip}:{args.port}")
|
||||
print(f"Health URL: http://{local_ip}:{args.port}/health")
|
||||
print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}")
|
||||
print(f"Debug Mode: {args.debug}")
|
||||
print(f"\n📋 Config for config.json:")
|
||||
config_entry = {
|
||||
"name": "phone",
|
||||
"url": f"http://{local_ip}:{args.port}",
|
||||
"timeout": 30
|
||||
}
|
||||
if args.api_key:
|
||||
config_entry["api_key"] = args.api_key
|
||||
print(json.dumps(config_entry, indent=2))
|
||||
print(f"\n{'='*70}\n")
|
||||
|
||||
try:
|
||||
from helper.local_library import LocalLibraryDB
|
||||
with LocalLibraryDB(STORAGE_PATH) as db:
|
||||
logger.info("Database initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize database: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
app = create_app()
|
||||
app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user