This commit is contained in:
nose
2025-12-03 15:18:57 -08:00
parent 89aa24961b
commit 5e4df11dbf
12 changed files with 1953 additions and 346 deletions

View File

@@ -8,7 +8,12 @@ Lean, focused downloader without event infrastructure overhead.
"""
from __future__ import annotations
import re # noqa: F401
import glob # noqa: F401
import hashlib
import json # noqa: F401
import random
import re
import string
import subprocess
import sys
import time
@@ -157,31 +162,72 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> None:
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]:
"""Download each section separately so merge-file can combine them.
yt-dlp with multiple --download-sections args merges them into one file.
We need separate files for merge-file, so download each section individually.
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
thinking sections are already downloaded. The title is extracted and stored in tags.
Returns:
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
"""
sections_list = ytdl_options.get("download_sections", [])
if not sections_list:
return
return "", {}
# Download each section separately with unique output template
# Generate a unique hash-based ID for this download session
# This ensures different videos/downloads don't have filename collisions
session_id = hashlib.md5(
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
).hexdigest()[:12]
first_section_info = None
title_from_first = None
# Download each section separately with unique output template using session ID
for section_idx, section in enumerate(sections_list, 1):
# Build unique output template for this section
# e.g., "title.section_1_of_3.ext" for the first section
# Build unique output template for this section using session-based filename
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
output_dir_path = Path(base_outtmpl).parent
# Insert section number before extension
# e.g., "/path/title.hash.webm" → "/path/title.hash.section_1_of_3.webm"
# Use session_id + section index for temp filename
# e.g., "/path/{session_id}_1.%(ext)s"
filename_tmpl = f"{session_id}_{section_idx}"
if base_outtmpl.endswith(".%(ext)s"):
section_outtmpl = base_outtmpl.replace(".%(ext)s", f".section_{section_idx}_of_{len(sections_list)}.%(ext)s")
else:
section_outtmpl = base_outtmpl + f".section_{section_idx}_of_{len(sections_list)}"
filename_tmpl += ".%(ext)s"
# Use Path to handle separators correctly for the OS
section_outtmpl = str(output_dir_path / filename_tmpl)
# Build yt-dlp command for this section
# For the first section, extract metadata first (separate call)
if section_idx == 1:
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
if ytdl_options.get("cookiefile"):
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
metadata_cmd.extend(["--cookies", cookies_path])
if ytdl_options.get("noplaylist"):
metadata_cmd.append("--no-playlist")
metadata_cmd.append(url)
try:
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
if meta_result.returncode == 0 and meta_result.stdout:
try:
info_dict = json.loads(meta_result.stdout.strip())
first_section_info = info_dict
title_from_first = info_dict.get('title')
debug(f"Extracted title from metadata: {title_from_first}")
except json.JSONDecodeError:
debug("Could not parse JSON metadata")
except Exception as e:
debug(f"Error extracting metadata: {e}")
# Build yt-dlp command for downloading this section
cmd = ["yt-dlp"]
# Add format
@@ -212,14 +258,18 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
cmd.append(url)
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
debug(f"Command: {' '.join(cmd)}")
# Run the subprocess
# Run the subprocess - don't capture output so progress is shown
try:
result = subprocess.run(cmd, capture_output=False, text=True)
result = subprocess.run(cmd)
if result.returncode != 0:
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
except Exception as exc:
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
return session_id, first_section_info or {}
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
@@ -296,8 +346,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
base_options["download_sections"] = sections
debug(f"Download sections configured: {', '.join(sections)}")
# Force keyframes at cuts for accurate section boundaries
base_options["force_keyframes_at_cuts"] = True
# Note: Not using --force-keyframes-at-cuts to avoid re-encoding
# This may result in less precise cuts but faster downloads
# Add playlist items selection if provided
if opts.playlist_items:
@@ -751,8 +801,10 @@ def download_media(
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
# Use subprocess when download_sections are present (Python API doesn't support them properly)
session_id = None
first_section_info = {}
if ytdl_options.get("download_sections"):
_download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []))
info = None
else:
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
@@ -780,7 +832,7 @@ def download_media(
import re
# Get the expected filename pattern from outtmpl
# For sections: "C:\path\title.section_1_of_3.ext", "C:\path\title.section_2_of_3.ext", etc.
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
# For non-sections: "C:\path\title.ext"
# Wait a moment to ensure files are fully written
@@ -791,10 +843,10 @@ def download_media(
if not files:
raise FileNotFoundError(f"No files found in {opts.output_dir}")
# If we downloaded sections, look for files with .section_N_of_M pattern
if opts.clip_sections:
# Pattern: "title.section_1_of_3.ext", "title.section_2_of_3.ext", etc.
section_pattern = re.compile(r'\.section_(\d+)_of_(\d+)\.')
# If we downloaded sections, look for files with the session_id pattern
if opts.clip_sections and session_id:
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
matching_files = [f for f in files if section_pattern.search(f.name)]
if matching_files:
@@ -804,13 +856,44 @@ def download_media(
return int(match.group(1)) if match else 999
matching_files.sort(key=extract_section_num)
media_path = matching_files[0] # First section
media_paths = matching_files # All sections
debug(f"✓ Downloaded {len(media_paths)} section file(s)")
debug(f"Found {len(matching_files)} section file(s) matching pattern")
# Now rename section files to use hash-based names
# This ensures unique filenames for each section content
renamed_files = []
for idx, section_file in enumerate(matching_files, 1):
try:
# Calculate hash for the file
file_hash = sha256_file(section_file)
ext = section_file.suffix
new_name = f"{file_hash}{ext}"
new_path = opts.output_dir / new_name
if new_path.exists() and new_path != section_file:
# If file with same hash exists, use it and delete the temp one
debug(f"File with hash {file_hash} already exists, using existing file.")
try:
section_file.unlink()
except OSError:
pass
renamed_files.append(new_path)
else:
section_file.rename(new_path)
debug(f"Renamed section file: {section_file.name}{new_name}")
renamed_files.append(new_path)
except Exception as e:
debug(f"Failed to process section file {section_file.name}: {e}")
renamed_files.append(section_file)
media_path = renamed_files[0]
media_paths = renamed_files
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
else:
# Fallback to most recent file if pattern not found
media_path = files[0]
media_paths = None
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
else:
# No sections, just take the most recent file
media_path = files[0]
@@ -830,10 +913,30 @@ def download_media(
# Create result with minimal data extracted from filename
file_hash = sha256_file(media_path)
# For section downloads, create tags with the title and build proper info dict
tags = []
title = ''
if first_section_info:
title = first_section_info.get('title', '')
if title:
tags.append(f'title:{title}')
debug(f"Added title tag for section download: {title}")
# Build info dict - always use extracted title if available, not hash
if first_section_info:
info_dict = first_section_info
else:
info_dict = {
"id": media_path.stem,
"title": title or media_path.stem,
"ext": media_path.suffix.lstrip(".")
}
return DownloadMediaResult(
path=media_path,
info={"id": media_path.stem, "title": media_path.stem, "ext": media_path.suffix.lstrip(".")},
tags=[],
info=info_dict,
tags=tags,
source_url=opts.url,
hash_value=file_hash,
paths=media_paths, # Include all section files if present

View File

@@ -137,14 +137,14 @@ class LocalStorageBackend(StorageBackend):
# Check for duplicate files using LocalLibraryDB (fast - uses index)
try:
db = LocalLibraryDB(dest_dir)
existing_path = db.search_by_hash(file_hash)
if existing_path and existing_path.exists():
log(
f"✓ File already in local storage: {existing_path}",
file=sys.stderr,
)
return str(existing_path)
with LocalLibraryDB(dest_dir) as db:
existing_path = db.search_by_hash(file_hash)
if existing_path and existing_path.exists():
log(
f"✓ File already in local storage: {existing_path}",
file=sys.stderr,
)
return str(existing_path)
except Exception as exc:
log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr)
@@ -205,247 +205,156 @@ class LocalStorageBackend(StorageBackend):
# Try database search first (much faster than filesystem scan)
try:
db = LocalLibraryDB(search_dir)
cursor = db.connection.cursor()
# Check if query is a tag namespace search (format: "namespace:pattern")
if ":" in query and not query.startswith(":"):
namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}")
with LocalLibraryDB(search_dir) as db:
cursor = db.connection.cursor()
# Search for tags matching the namespace and pattern
query_pattern = f"{namespace}:%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
rows = cursor.fetchall()
debug(f"Found {len(rows)} potential matches in DB")
# Filter results by pattern match
for file_id, file_path_str, size_bytes in rows:
if not file_path_str:
continue
# Check if query is a tag namespace search (format: "namespace:pattern")
if ":" in query and not query.startswith(":"):
namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}")
# Search for tags matching the namespace and pattern
query_pattern = f"{namespace}:%"
# Get the file's tags and check if any match the pattern
cursor.execute("""
SELECT DISTINCT tag FROM tags
WHERE file_id = ?
AND LOWER(tag) LIKE ?
""", (file_id, query_pattern))
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ?
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tags = [row[0] for row in cursor.fetchall()]
rows = cursor.fetchall()
debug(f"Found {len(rows)} potential matches in DB")
# Check if any tag matches the pattern (case-insensitive wildcard)
for tag in tags:
tag_lower = tag.lower()
# Extract the value part after "namespace:"
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
# Use fnmatch for wildcard matching
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch all tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
all_tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": all_tags,
})
else:
debug(f"File missing on disk: {file_path}")
break # Don't add same file multiple times
if limit is not None and len(results) >= limit:
return results
elif not match_all:
# Search by filename or simple tags (namespace-agnostic for plain text)
# For plain text search, match:
# 1. Filenames containing the query
# 2. Simple tags (without namespace) containing the query
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
# Use explicit namespace search for that (e.g., "channel:joe*")
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use custom boundary that treats underscores as separators
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
try:
# Match if not preceded or followed by alphanumeric chars
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
# Filter results by pattern match
for file_id, file_path_str, size_bytes in rows:
if not file_path_str:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
# Get the file's tags and check if any match the pattern
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
SELECT DISTINCT tag FROM tags
WHERE file_id = ?
AND LOWER(tag) LIKE ?
""", (file_id, query_pattern))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
# Also search for simple tags (without namespace) containing the query
# Only perform tag search if single term, or if we want to support multi-term tag search
# For now, fallback to single pattern search for tags if multiple terms
# (searching for a tag that contains "term1 term2" or "term1,term2")
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tag_rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in tag_rows:
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
# Check if any tag matches the pattern (case-insensitive wildcard)
for tag in tags:
tag_lower = tag.lower()
# Extract the value part after "namespace:"
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
# Use fnmatch for wildcard matching
if fnmatch(value, pattern):
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch all tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
all_tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": all_tags,
})
else:
debug(f"File missing on disk: {file_path}")
break # Don't add same file multiple times
if limit is not None and len(results) >= limit:
return results
else:
# Match all - get all files from database
cursor.execute("""
SELECT id, file_path, file_size
FROM files
ORDER BY file_path
LIMIT ?
""", (limit or 1000,))
rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in rows:
if file_path_str:
elif not match_all:
# Search by filename or simple tags (namespace-agnostic for plain text)
# For plain text search, match:
# 1. Filenames containing the query
# 2. Simple tags (without namespace) containing the query
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
# Use explicit namespace search for that (e.g., "channel:joe*")
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use custom boundary that treats underscores as separators
# \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
try:
# Match if not preceded or followed by alphanumeric chars
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
@@ -472,12 +381,103 @@ class LocalStorageBackend(StorageBackend):
"size_bytes": size_bytes,
"tags": tags,
})
if results:
debug(f"Returning {len(results)} results from DB")
else:
debug("No results found in DB")
return results
# Also search for simple tags (without namespace) containing the query
# Only perform tag search if single term, or if we want to support multi-term tag search
# For now, fallback to single pattern search for tags if multiple terms
# (searching for a tag that contains "term1 term2" or "term1,term2")
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
JOIN tags t ON f.id = t.file_id
WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, limit or 1000))
tag_rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in tag_rows:
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if limit is not None and len(results) >= limit:
return results
else:
# Match all - get all files from database
cursor.execute("""
SELECT id, file_path, file_size
FROM files
ORDER BY file_path
LIMIT ?
""", (limit or 1000,))
rows = cursor.fetchall()
for file_id, file_path_str, size_bytes in rows:
if file_path_str:
file_path = Path(file_path_str)
if file_path.exists():
path_str = str(file_path)
if size_bytes is None:
size_bytes = file_path.stat().st_size
# Fetch tags for this file
cursor.execute("""
SELECT tag FROM tags WHERE file_id = ?
""", (file_id,))
tags = [row[0] for row in cursor.fetchall()]
# Use title tag if present
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
results.append({
"name": file_path.stem,
"title": title_tag or file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
"size": size_bytes,
"size_bytes": size_bytes,
"tags": tags,
})
if results:
debug(f"Returning {len(results)} results from DB")
else:
debug("No results found in DB")
return results
except Exception as e:
log(f"⚠️ Database search failed: {e}", file=sys.stderr)
@@ -1175,6 +1175,161 @@ class MatrixStorageBackend(StorageBackend):
raise
class RemoteStorageBackend(StorageBackend):
"""File storage backend for remote Android/network storage servers.
Connects to a remote storage server (e.g., running on Android phone)
via REST API. All operations are proxied to the remote server.
"""
def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
"""Initialize remote storage backend.
Args:
server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
timeout: Request timeout in seconds
api_key: Optional API key for authentication
"""
try:
import requests
except ImportError:
raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")
self.server_url = server_url.rstrip('/')
self.timeout = timeout
self.api_key = api_key
self._session = requests.Session()
# Add API key to default headers if provided
if self.api_key:
self._session.headers.update({'X-API-Key': self.api_key})
def get_name(self) -> str:
return "remote"
def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
"""Make HTTP request to remote server."""
import requests
from urllib.parse import urljoin
url = urljoin(self.server_url, endpoint)
try:
response = self._session.request(
method,
url,
timeout=self.timeout,
**kwargs
)
if response.status_code == 404:
raise Exception(f"Remote resource not found: {endpoint}")
if response.status_code >= 400:
try:
error_data = response.json()
error_msg = error_data.get('error', response.text)
except:
error_msg = response.text
raise Exception(f"Remote server error {response.status_code}: {error_msg}")
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"Connection to {self.server_url} failed: {e}")
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to remote storage.
Args:
file_path: Path to the file to upload
tags: Optional list of tags to add
urls: Optional list of known URLs
Returns:
Remote file hash
"""
from helper.utils import sha256_file
if not file_path.exists():
raise ValueError(f"File not found: {file_path}")
try:
# Index the file on remote server
data = {"path": str(file_path)}
tags = kwargs.get("tags", [])
if tags:
data["tags"] = tags
urls = kwargs.get("urls", [])
if urls:
data["urls"] = urls
result = self._request('POST', '/files/index', json=data)
file_hash = result.get('hash')
if file_hash:
log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
return file_hash
else:
raise Exception("Remote server did not return file hash")
except Exception as exc:
debug(f"Remote upload failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search files on remote storage.
Args:
query: Search query
limit: Maximum results
Returns:
List of search results
"""
limit = kwargs.get("limit")
try:
limit = int(limit) if limit is not None else 100
except (TypeError, ValueError):
limit = 100
if limit <= 0:
limit = 100
try:
response = self._request('GET', '/files/search', params={
'q': query,
'limit': limit
})
files = response.get('files', [])
# Transform remote format to standard result format
results = []
for f in files:
results.append({
"name": f.get('name', '').split('/')[-1], # Get filename from path
"title": f.get('name', f.get('path', '')).split('/')[-1],
"ext": f.get('ext', ''),
"path": f.get('path', ''),
"target": f.get('path', ''),
"hash": f.get('hash', ''),
"origin": "remote",
"size": f.get('size', 0),
"size_bytes": f.get('size', 0),
"tags": f.get('tags', []),
})
debug(f"Remote search found {len(results)} results", file=sys.stderr)
return results
except Exception as exc:
log(f"❌ Remote search failed: {exc}", file=sys.stderr)
raise
class FileStorage:
"""Unified file storage interface supporting multiple backend services.
@@ -1223,6 +1378,25 @@ class FileStorage:
# Include Matrix backend
self._backends["matrix"] = MatrixStorageBackend()
# Include remote storage backends from config (for Android/network servers)
remote_storages = config.get("remote_storages", [])
if isinstance(remote_storages, list):
for remote_config in remote_storages:
if isinstance(remote_config, dict):
name = remote_config.get("name", "remote")
url = remote_config.get("url")
timeout = remote_config.get("timeout", 30)
api_key = remote_config.get("api_key")
if url:
try:
backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
self._backends[name] = backend
auth_status = " (with auth)" if api_key else " (no auth)"
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
except Exception as e:
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name.

View File

@@ -19,6 +19,8 @@ from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any, List, Tuple, Set
from .utils import sha256_file
logger = logging.getLogger(__name__)
# Try to import optional dependencies
@@ -455,6 +457,18 @@ class LocalLibraryDB:
logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True)
raise
def get_file_id(self, file_path: Path) -> Optional[int]:
"""Get the file ID for a file path, or None if not found."""
try:
str_path = str(file_path.resolve())
cursor = self.connection.cursor()
cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,))
row = cursor.fetchone()
return row[0] if row else None
except Exception as e:
logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True)
return None
def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""Get metadata for a file."""
try:
@@ -748,6 +762,177 @@ class LocalLibraryDB:
logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True)
raise
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two local files.
Args:
file_path: Path to the file being related
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related')
"""
try:
str_path = str(file_path.resolve())
str_related_path = str(related_file_path.resolve())
file_id = self.get_or_create_file_entry(file_path)
related_file_id = self.get_or_create_file_entry(related_file_path)
cursor = self.connection.cursor()
# Get hashes for both files
file_hash = sha256_file(file_path)
related_file_hash = sha256_file(related_file_path)
if not file_hash or not related_file_hash:
logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}")
return
# Store the hashes in the files table for future lookups
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (file_hash, file_id))
cursor.execute("""
UPDATE files SET file_hash = ? WHERE id = ?
""", (related_file_hash, related_file_id))
# Get current relationships
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (file_id,))
row = cursor.fetchone()
# Use index access to be safe regardless of row_factory
relationships_str = row[0] if row else None
try:
if relationships_str:
relationships = json.loads(relationships_str)
else:
relationships = {}
except (json.JSONDecodeError, TypeError):
relationships = {}
# Ensure relationships is a dict (handle case where DB has a list)
if not isinstance(relationships, dict):
relationships = {}
# Ensure rel_type key exists
if rel_type not in relationships:
relationships[rel_type] = []
# Add the relationship (store as hash string)
if related_file_hash not in relationships[rel_type]:
relationships[rel_type].append(related_file_hash)
# Save the updated relationships for the main file
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (file_id, json.dumps(relationships)))
logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})")
# Set reverse relationship (bidirectional)
# For 'alt' and 'related', the reverse is the same
# For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does)
# Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates'
reverse_type = rel_type
# Update the related file
cursor.execute("""
SELECT relationships FROM metadata WHERE file_id = ?
""", (related_file_id,))
row = cursor.fetchone()
relationships_str = row[0] if row else None
try:
if relationships_str:
reverse_relationships = json.loads(relationships_str)
else:
reverse_relationships = {}
except (json.JSONDecodeError, TypeError):
reverse_relationships = {}
if not isinstance(reverse_relationships, dict):
reverse_relationships = {}
if reverse_type not in reverse_relationships:
reverse_relationships[reverse_type] = []
if file_hash not in reverse_relationships[reverse_type]:
reverse_relationships[reverse_type].append(file_hash)
# Save the updated reverse relationships
cursor.execute("""
INSERT INTO metadata (file_id, relationships)
VALUES (?, ?)
ON CONFLICT(file_id) DO UPDATE SET
relationships = excluded.relationships,
time_modified = CURRENT_TIMESTAMP
""", (related_file_id, json.dumps(reverse_relationships)))
self.connection.commit()
except Exception as e:
logger.error(f"Error setting relationship: {e}", exc_info=True)
raise
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path.
Args:
target_path: The file path to look for in other files' relationships
Returns:
List of dicts with {path, type} for files pointing to target
"""
try:
# Get the hash of the target file
target_hash = sha256_file(target_path)
if not target_hash:
logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash")
return []
cursor = self.connection.cursor()
# Scan all metadata (this might be slow on huge DBs but fine for local library)
# We select file_path and relationships json
cursor.execute("""
SELECT f.file_path, m.relationships
FROM metadata m
JOIN files f ON m.file_id = f.id
WHERE m.relationships LIKE ?
""", (f"%{target_hash}%",))
results = []
for row in cursor.fetchall():
f_path = row[0]
rels_json = row[1]
try:
rels = json.loads(rels_json)
if isinstance(rels, dict):
for r_type, hashes in rels.items():
if isinstance(hashes, list):
# Check if target hash is in this relationship type
if target_hash in hashes:
results.append({
"path": f_path,
"type": r_type
})
except (json.JSONDecodeError, TypeError):
continue
return results
except Exception as e:
logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True)
return []
def get_note(self, file_path: Path) -> Optional[str]:
"""Get note for a file."""
try:
@@ -1076,6 +1261,11 @@ class LocalLibraryDB:
if not text:
return True
try:
# Check if connection is valid
if not self.connection:
logger.warning(f"Database connection not available for worker {worker_id}")
return False
cursor = self.connection.cursor()
cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,))
row = cursor.fetchone()
@@ -1097,6 +1287,13 @@ class LocalLibraryDB:
self.connection.commit()
return cursor.rowcount > 0
except sqlite3.ProgrammingError as e:
# Handle "Cannot operate on a closed database" gracefully
if "closed database" in str(e).lower():
logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}")
return False
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False
except Exception as e:
logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True)
return False
@@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer:
if not self.db:
return None
return self.db.search_by_hash(file_hash)
def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None:
"""Set a relationship between two files in the database.
Delegates to LocalLibraryDB.set_relationship().
Args:
file_path: Path to the first file
related_file_path: Path to the related file
rel_type: Type of relationship ('king', 'alt', 'related', etc.)
"""
if not self.db:
return
self.db.set_relationship(file_path, related_file_path, rel_type)
def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]:
"""Find all files that have a relationship pointing to the target path."""
if not self.db:
return []
return self.db.find_files_pointing_to(target_path)

View File

@@ -0,0 +1,523 @@
"""Remote Storage Server - REST API for file management on mobile devices.
This server runs on a mobile device (Android with Termux, iOS with iSH, etc.)
and exposes the local library database as a REST API. Your PC connects to this
server and uses it as a remote storage backend through the RemoteStorageBackend.
## INSTALLATION
### On Android (Termux):
1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux
2. In Termux:
$ apt update && apt install python
$ pip install flask flask-cors
3. Copy this file to your device
4. Run it (with optional API key):
$ python remote_storage_server.py --storage-path /path/to/storage --port 5000
$ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey
5. Server prints connection info automatically (IP, port, API key)
### On PC:
1. Install requests: pip install requests
2. Add to config.json:
{
"remote_storages": [
{
"name": "phone",
"url": "http://192.168.1.100:5000",
"api_key": "mysecretkey",
"timeout": 30
}
]
}
Note: API key is optional. Works on WiFi or cellular data.
## USAGE
After setup, all cmdlets work with the phone:
$ search-file zohar -store phone
$ @1-3 | add-relationship -king @4 -store phone
$ @1 | get-relationship -store phone
The server exposes REST endpoints that RemoteStorageBackend uses internally.
"""
from __future__ import annotations
import os
import sys
import json
import argparse
import logging
from pathlib import Path
from typing import Optional, Dict, Any
from datetime import datetime
from functools import wraps
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from helper.logger import log
# ============================================================================
# CONFIGURATION
# ============================================================================
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] %(levelname)s: %(message)s'
)
logger = logging.getLogger(__name__)
STORAGE_PATH: Optional[Path] = None
API_KEY: Optional[str] = None # API key for authentication (None = no auth required)
# Try importing Flask - will be used in main() only
try:
from flask import Flask, request, jsonify
from flask_cors import CORS
HAS_FLASK = True
except ImportError:
HAS_FLASK = False
# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================
def get_local_ip() -> Optional[str]:
"""Get the local IP address that would be used for external connections."""
import socket
try:
# Create a socket to determine which interface would be used
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80)) # Google DNS
ip = s.getsockname()[0]
s.close()
return ip
except Exception:
return None
# ============================================================================
# FLASK APP FACTORY
# ============================================================================
def create_app():
"""Create and configure Flask app with all routes."""
if not HAS_FLASK:
raise ImportError("Flask not installed. Install with: pip install flask flask-cors")
from flask import Flask, request, jsonify
from flask_cors import CORS
app = Flask(__name__)
CORS(app)
# ========================================================================
# HELPER DECORATORS
# ========================================================================
def require_auth():
"""Decorator to check API key authentication if configured."""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if API_KEY:
# Get API key from header or query parameter
provided_key = request.headers.get('X-API-Key') or request.args.get('api_key')
if not provided_key or provided_key != API_KEY:
return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401
return f(*args, **kwargs)
return decorated_function
return decorator
def require_storage():
"""Decorator to ensure storage path is configured."""
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
if not STORAGE_PATH:
return jsonify({"error": "Storage path not configured"}), 500
return f(*args, **kwargs)
return decorated_function
return decorator
# ========================================================================
# HEALTH CHECK
# ========================================================================
@app.route('/health', methods=['GET'])
@require_auth()
def health():
"""Check server health and storage availability."""
status = {
"status": "ok",
"storage_configured": STORAGE_PATH is not None,
"timestamp": datetime.now().isoformat()
}
if STORAGE_PATH:
status["storage_path"] = str(STORAGE_PATH)
status["storage_exists"] = STORAGE_PATH.exists()
try:
from helper.local_library import LocalLibraryDB
with LocalLibraryDB(STORAGE_PATH) as db:
status["database_accessible"] = True
except Exception as e:
status["database_accessible"] = False
status["database_error"] = str(e)
return jsonify(status), 200
# ========================================================================
# FILE OPERATIONS
# ========================================================================
@app.route('/files/search', methods=['GET'])
@require_auth()
@require_storage()
def search_files():
"""Search for files by name or tag."""
from helper.local_library import LocalLibrarySearchOptimizer
query = request.args.get('q', '')
limit = request.args.get('limit', 100, type=int)
if not query:
return jsonify({"error": "Search query required"}), 400
try:
with LocalLibrarySearchOptimizer(STORAGE_PATH) as db:
results = db.search_by_name(query, limit)
tag_results = db.search_by_tag(query, limit)
all_results = {r['hash']: r for r in (results + tag_results)}
return jsonify({
"query": query,
"count": len(all_results),
"files": list(all_results.values())
}), 200
except Exception as e:
logger.error(f"Search error: {e}", exc_info=True)
return jsonify({"error": f"Search failed: {str(e)}"}), 500
@app.route('/files/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_file_metadata(file_hash: str):
"""Get metadata for a specific file by hash."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path or not file_path.exists():
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
tags = db.get_tags(file_path)
return jsonify({
"hash": file_hash,
"path": str(file_path),
"size": file_path.stat().st_size,
"metadata": metadata,
"tags": tags
}), 200
except Exception as e:
logger.error(f"Get metadata error: {e}", exc_info=True)
return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500
@app.route('/files/index', methods=['POST'])
@require_auth()
@require_storage()
def index_file():
"""Index a new file in the storage."""
from helper.local_library import LocalLibraryDB
from helper.utils import sha256_file
data = request.get_json() or {}
file_path_str = data.get('path')
tags = data.get('tags', [])
urls = data.get('urls', [])
if not file_path_str:
return jsonify({"error": "File path required"}), 400
try:
file_path = Path(file_path_str)
if not file_path.exists():
return jsonify({"error": "File does not exist"}), 404
with LocalLibraryDB(STORAGE_PATH) as db:
db.get_or_create_file_entry(file_path)
if tags:
db.add_tags(file_path, tags)
if urls:
db.add_known_urls(file_path, urls)
file_hash = sha256_file(file_path)
return jsonify({
"hash": file_hash,
"path": str(file_path),
"tags_added": len(tags),
"urls_added": len(urls)
}), 201
except Exception as e:
logger.error(f"Index error: {e}", exc_info=True)
return jsonify({"error": f"Indexing failed: {str(e)}"}), 500
# ========================================================================
# TAG OPERATIONS
# ========================================================================
@app.route('/tags/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_tags(file_hash: str):
"""Get tags for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
tags = db.get_tags(file_path)
return jsonify({"hash": file_hash, "tags": tags}), 200
except Exception as e:
logger.error(f"Get tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/tags/<file_hash>', methods=['POST'])
@require_auth()
@require_storage()
def add_tags(file_hash: str):
"""Add tags to a file."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
tags = data.get('tags', [])
mode = data.get('mode', 'add')
if not tags:
return jsonify({"error": "Tags required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
if mode == 'replace':
db.remove_tags(file_path, db.get_tags(file_path))
db.add_tags(file_path, tags)
return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200
except Exception as e:
logger.error(f"Add tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/tags/<file_hash>', methods=['DELETE'])
@require_auth()
@require_storage()
def remove_tags(file_hash: str):
"""Remove tags from a file."""
from helper.local_library import LocalLibraryDB
tags_str = request.args.get('tags', '')
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
if tags_str:
tags_to_remove = [t.strip() for t in tags_str.split(',')]
else:
tags_to_remove = db.get_tags(file_path)
db.remove_tags(file_path, tags_to_remove)
return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200
except Exception as e:
logger.error(f"Remove tags error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
# ========================================================================
# RELATIONSHIP OPERATIONS
# ========================================================================
@app.route('/relationships/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_relationships(file_hash: str):
"""Get relationships for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
relationships = metadata.get('relationships', {}) if metadata else {}
return jsonify({"hash": file_hash, "relationships": relationships}), 200
except Exception as e:
logger.error(f"Get relationships error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/relationships', methods=['POST'])
@require_auth()
@require_storage()
def set_relationship():
"""Set a relationship between two files."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
from_hash = data.get('from_hash')
to_hash = data.get('to_hash')
rel_type = data.get('type', 'alt')
if not from_hash or not to_hash:
return jsonify({"error": "from_hash and to_hash required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
from_path = db.search_by_hash(from_hash)
to_path = db.search_by_hash(to_hash)
if not from_path or not to_path:
return jsonify({"error": "File not found"}), 404
db.set_relationship(from_path, to_path, rel_type)
return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200
except Exception as e:
logger.error(f"Set relationship error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
# ========================================================================
# URL OPERATIONS
# ========================================================================
@app.route('/urls/<file_hash>', methods=['GET'])
@require_auth()
@require_storage()
def get_urls(file_hash: str):
"""Get known URLs for a file."""
from helper.local_library import LocalLibraryDB
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
metadata = db.get_metadata(file_path)
urls = metadata.get('known_urls', []) if metadata else []
return jsonify({"hash": file_hash, "urls": urls}), 200
except Exception as e:
logger.error(f"Get URLs error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
@app.route('/urls/<file_hash>', methods=['POST'])
@require_auth()
@require_storage()
def add_urls(file_hash: str):
"""Add URLs to a file."""
from helper.local_library import LocalLibraryDB
data = request.get_json() or {}
urls = data.get('urls', [])
if not urls:
return jsonify({"error": "URLs required"}), 400
try:
with LocalLibraryDB(STORAGE_PATH) as db:
file_path = db.search_by_hash(file_hash)
if not file_path:
return jsonify({"error": "File not found"}), 404
db.add_known_urls(file_path, urls)
return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200
except Exception as e:
logger.error(f"Add URLs error: {e}", exc_info=True)
return jsonify({"error": f"Failed: {str(e)}"}), 500
return app
# ============================================================================
# MAIN
# ============================================================================
def main():
if not HAS_FLASK:
print("ERROR: Flask and flask-cors required")
print("Install with: pip install flask flask-cors")
sys.exit(1)
parser = argparse.ArgumentParser(
description='Remote Storage Server for Medios-Macina',
epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey'
)
parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory')
parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)')
parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)')
parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
args = parser.parse_args()
global STORAGE_PATH, API_KEY
STORAGE_PATH = Path(args.storage_path).resolve()
API_KEY = args.api_key
if not STORAGE_PATH.exists():
print(f"ERROR: Storage path does not exist: {STORAGE_PATH}")
sys.exit(1)
# Get local IP address
local_ip = get_local_ip()
if not local_ip:
local_ip = "127.0.0.1"
print(f"\n{'='*70}")
print(f"Remote Storage Server - Medios-Macina")
print(f"{'='*70}")
print(f"Storage Path: {STORAGE_PATH}")
print(f"Local IP: {local_ip}")
print(f"Server URL: http://{local_ip}:{args.port}")
print(f"Health URL: http://{local_ip}:{args.port}/health")
print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}")
print(f"Debug Mode: {args.debug}")
print(f"\n📋 Config for config.json:")
config_entry = {
"name": "phone",
"url": f"http://{local_ip}:{args.port}",
"timeout": 30
}
if args.api_key:
config_entry["api_key"] = args.api_key
print(json.dumps(config_entry, indent=2))
print(f"\n{'='*70}\n")
try:
from helper.local_library import LocalLibraryDB
with LocalLibraryDB(STORAGE_PATH) as db:
logger.info("Database initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize database: {e}")
sys.exit(1)
app = create_app()
app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False)
if __name__ == '__main__':
main()