Files
Medios-Macina/cmdlets/add_file.py

1086 lines
46 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
from collections.abc import Iterable as IterableABC
import json
from pathlib import Path
import sys
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from helper.logger import log, debug
from helper.file_storage import FileStorage
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
merge_sequences, extract_relationships, extract_duration
)
2025-12-07 00:21:30 -08:00
from ._shared import collapse_namespace_tags
2025-11-25 20:09:33 -08:00
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
from helper.utils import sha256_file
from metadata import embed_metadata_in_file
# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
# Initialize file storage system
storage = FileStorage()
def _guess_media_kind_from_suffix(media_path: Path) -> str:
suffix = media_path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
def _resolve_media_kind(result: Any, media_path: Path) -> str:
if isinstance(result, models.PipeObject):
if getattr(result, 'media_kind', None):
return str(result.media_kind)
elif isinstance(result, dict):
media_kind = result.get('media_kind')
if media_kind:
return str(media_kind)
metadata = result.get('metadata')
if isinstance(metadata, dict) and metadata.get('media_kind'):
return str(metadata['media_kind'])
return _guess_media_kind_from_suffix(media_path)
def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
# For local origin, try to read from local database first
if origin and origin.lower() == "local" and config:
try:
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
try:
db_root = get_local_storage_path(config)
except Exception:
db_root = None
if db_root:
try:
2025-12-03 15:18:57 -08:00
with LocalLibraryDB(Path(db_root)) as db:
2025-11-25 20:09:33 -08:00
# Get tags and metadata from database
tags = db.get_tags(media_path) or []
metadata = db.get_metadata(media_path) or {}
known_urls = metadata.get("known_urls") or []
file_hash = metadata.get("hash")
if tags or known_urls or file_hash:
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
return None, file_hash, tags, known_urls
except Exception as exc:
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
except Exception:
pass
# Fall back to sidecar file lookup
try:
sidecar_path = find_sidecar(media_path)
except Exception:
sidecar_path = None
if not sidecar_path or not sidecar_path.exists():
return None, None, [], []
try:
hash_value, tags, known_urls = read_sidecar(sidecar_path)
return sidecar_path, hash_value, tags or [], known_urls or []
except Exception as exc:
log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
return sidecar_path, None, [], []
def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
candidate = None
if isinstance(result, models.PipeObject):
candidate = result.file_hash
elif isinstance(result, dict):
candidate = result.get('file_hash') or result.get('hash')
candidate = candidate or fallback_hash
if candidate:
return str(candidate)
try:
return sha256_file(file_path)
except Exception as exc:
log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
return None
def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tags'),
media_path.parent / (media_path.name + '.tags.txt'),
]
targets.extend(extra_paths)
for target in targets:
if not target:
continue
try:
path_obj = Path(target)
if path_obj.exists():
path_obj.unlink()
except Exception:
continue
2025-12-07 00:21:30 -08:00
def _show_local_result_table(file_hash: Optional[str], config: Dict[str, Any]) -> None:
"""Run search-file by hash to display the newly added local file in a table."""
if not file_hash:
return
try:
from cmdlets import search_file as search_cmd
temp_ctx = models.PipelineStageContext(0, 1)
saved_ctx = ctx.get_stage_context()
ctx.set_stage_context(temp_ctx)
try:
# Call the cmdlet exactly like the user would type: search-file "hash:...,store:local"
search_cmd._run(None, [f"hash:{file_hash},store:local"], config)
try:
table = ctx.get_last_result_table()
if table is not None:
log("")
log(table.format_plain())
except Exception:
pass
finally:
ctx.set_stage_context(saved_ctx)
except Exception as exc:
debug(f"[add-file] Skipped search-file display: {exc}")
2025-11-25 20:09:33 -08:00
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: list[str],
known_urls: list[str],
file_hash: Optional[str],
relationships: Optional[Dict[str, Any]],
duration: Optional[float],
media_kind: str,
) -> None:
payload = {
'hash': file_hash,
'known_urls': known_urls,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
try:
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
try:
debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
db_path = Path(library_root) / ".downlow_library.db"
debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
with LocalLibraryDB(library_root) as db:
2025-12-01 01:10:16 -08:00
# Use optimized single-transaction save
debug(f"[_persist_local_metadata] Saving metadata and {len(tags)} tags to DB")
try:
db.save_file_info(dest_path, payload, tags)
debug(f"[_persist_local_metadata] ✅ File info saved to DB")
except Exception as exc:
log(f"[_persist_local_metadata] ❌ Failed to save file info: {exc}", file=sys.stderr)
raise
2025-11-25 20:09:33 -08:00
# NOTE: Sidecar files are intentionally NOT created for local storage
# Local storage uses database as primary source, not sidecar files
debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
except Exception as exc:
log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr)
import traceback
log(traceback.format_exc(), file=sys.stderr)
2025-12-06 00:10:19 -08:00
def _handle_local_transfer(
media_path: Path,
destination_root: Path,
result: Any,
config: Optional[Dict[str, Any]] = None,
export_mode: bool = False,
) -> Tuple[int, Optional[Path]]:
2025-11-25 20:09:33 -08:00
"""Transfer a file to local storage and return (exit_code, destination_path).
Args:
media_path: Path to source file
destination_root: Destination directory
result: Result object with metadata
config: Configuration dictionary
Returns:
Tuple of (exit_code, destination_path)
- exit_code: 0 on success, 1 on failure
- destination_path: Path to moved file on success, None on failure
"""
destination_root = destination_root.expanduser()
try:
destination_root.mkdir(parents=True, exist_ok=True)
except Exception as exc:
2025-12-07 00:21:30 -08:00
log(f"Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1, None
tags_from_result = extract_tags_from_result(result)
urls_from_result = extract_known_urls_from_result(result)
# Get origin from result if available
result_origin = None
if hasattr(result, "origin"):
result_origin = result.origin
elif isinstance(result, dict):
result_origin = result.get("origin") or result.get("source")
sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
2025-12-07 00:21:30 -08:00
tags_from_result = collapse_namespace_tags([normalize_title_tag(t) for t in tags_from_result], "title", prefer="last")
sidecar_tags = collapse_namespace_tags([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
2025-11-25 20:09:33 -08:00
# Merge tags carefully: if URL has title tag, don't include sidecar title tags
# This prevents duplicate title: tags when URL provides a title
has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
if has_url_title:
# URL has a title, filter out any sidecar title tags to avoid duplication
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
else:
# No URL title, use all sidecar tags
merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
relationships = extract_relationships(result)
duration = extract_duration(result)
2025-12-06 00:10:19 -08:00
# Skip title-based renaming for library mode (hash-based) but allow for export mode below
2025-12-01 01:10:16 -08:00
2025-11-25 20:09:33 -08:00
try:
2025-12-06 00:10:19 -08:00
if export_mode:
title_tag = next((t for t in merged_tags if str(t).strip().lower().startswith("title:")), None)
title_value = ""
if title_tag:
title_value = title_tag.split(":", 1)[1].strip()
if not title_value:
title_value = media_path.stem.replace("_", " ").strip()
# Sanitize filename
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
base_name = safe_title or media_path.stem
new_name = base_name + media_path.suffix
target_path = destination_root / new_name
destination_root.mkdir(parents=True, exist_ok=True)
if target_path.exists():
from helper.utils import unique_path
target_path = unique_path(target_path)
shutil.move(str(media_path), target_path)
# Move/copy sidecar files alongside
possible_sidecars = [
media_path.with_suffix(media_path.suffix + ".json"),
media_path.with_name(media_path.name + ".tags"),
media_path.with_name(media_path.name + ".tags.txt"),
media_path.with_name(media_path.name + ".metadata"),
media_path.with_name(media_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(media_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(sc), dest_sidecar)
except Exception:
pass
media_path = target_path
dest_file = str(target_path)
else:
# Ensure filename is the hash when adding to local storage
resolved_hash = _resolve_file_hash(result, sidecar_hash, media_path)
2025-12-07 00:21:30 -08:00
hashed_move_done = False
2025-12-06 00:10:19 -08:00
if resolved_hash:
hashed_name = resolved_hash + media_path.suffix
target_path = destination_root / hashed_name
try:
if target_path.exists():
target_path.unlink()
except Exception:
pass
if media_path != target_path:
media_path = media_path.rename(target_path)
2025-12-07 00:21:30 -08:00
hashed_move_done = True
if hashed_move_done and media_path.parent.samefile(destination_root):
# Already placed at final destination with hash name; skip extra upload/move
dest_file = str(media_path)
else:
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
2025-11-25 20:09:33 -08:00
except Exception as exc:
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
return 1, None
dest_path = Path(dest_file)
2025-12-05 03:42:57 -08:00
file_hash = _resolve_file_hash(result, resolved_hash, dest_path)
2025-11-25 20:09:33 -08:00
media_kind = _resolve_media_kind(result, dest_path)
2025-12-01 01:10:16 -08:00
# If we have a title tag, keep it. Otherwise, derive from filename.
has_title = any(str(t).strip().lower().startswith("title:") for t in merged_tags)
2025-12-07 00:21:30 -08:00
final_tags = collapse_namespace_tags(merged_tags, "title", prefer="last")
2025-11-25 20:09:33 -08:00
2025-12-01 01:10:16 -08:00
if not has_title:
filename_title = dest_path.stem.replace("_", " ").strip()
if filename_title:
final_tags.insert(0, f"title:{filename_title}")
2025-12-06 00:10:19 -08:00
if not export_mode:
_persist_local_metadata(destination_root, dest_path, final_tags, merged_urls, file_hash, relationships, duration, media_kind)
_cleanup_sidecar_files(media_path, sidecar_path)
2025-12-07 00:21:30 -08:00
_show_local_result_table(file_hash, config or {})
2025-12-06 00:10:19 -08:00
else:
debug(f"✅ Exported to destination: {dest_path}")
2025-11-25 20:09:33 -08:00
return 0, dest_path
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Upload/copy a file to specified location.
Returns 0 on success, non-zero on failure.
"""
import sys # For stderr output
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
debug("Starting add-file cmdlet")
# Handle list of results (from piped commands that emit multiple items)
if isinstance(result, list):
debug(f"Processing {len(result)} piped files")
success_count = 0
for item in result:
exit_code = _run(item, _args, config)
if exit_code == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
location: Optional[str] = None
provider_name: Optional[str] = None
delete_after_upload = False
2025-12-06 00:10:19 -08:00
# Check if -path argument was provided
2025-11-25 20:09:33 -08:00
path_arg = parsed.get("path")
if path_arg:
2025-12-06 00:10:19 -08:00
path_value = Path(str(path_arg).strip())
# If there is no piped result, treat -path as the source file (existing behavior)
if result is None:
if not path_value.exists():
log(f"❌ File not found: {path_value}")
return 1
result = {"target": str(path_value), "origin": "wild"}
log(f"Using direct file path: {path_value}")
else:
# Piped result present: treat -path as destination (export)
if not path_value.exists():
try:
path_value.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"❌ Cannot create destination directory {path_value}: {exc}", file=sys.stderr)
return 1
location = str(path_value)
2025-11-25 20:09:33 -08:00
# Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
2025-12-07 00:21:30 -08:00
storage_arg = parsed.get("storage")
if location is None:
location = storage_arg
if location:
location = str(location).lower().strip()
elif storage_arg:
# User provided both -path (as destination) and -storage; prefer explicit storage only if it matches
storage_str = str(storage_arg).lower().strip()
if storage_str != str(location).lower():
log(f"❌ Conflicting destinations: -path '{location}' vs -storage '{storage_str}'", file=sys.stderr)
return 1
2025-11-25 20:09:33 -08:00
# Get file provider from parsed args
provider_name = parsed.get("provider")
if provider_name:
provider_name = str(provider_name).lower().strip()
# Check for delete flag (presence in parsed dict means it was provided)
delete_after_upload = "delete" in parsed
# Either storage or provider must be specified, but not both
if location is None and provider_name is None:
log("Either -storage or -provider must be specified")
log(" -storage options: 'hydrus', 'local', or a directory path")
log(" -provider options: '0x0'")
return 1
if location is not None and provider_name is not None:
log("❌ Cannot specify both -storage and -provider")
return 1
# Validate location (storage backends)
is_valid_location = False
if location is not None:
valid_locations = {'hydrus', 'local'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
if location is not None and not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
return 1
# Extract tags/known URLs from pipeline objects if available
pipe_object_tags = extract_tags_from_result(result)
if pipe_object_tags:
2025-11-27 10:59:01 -08:00
debug(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
pipe_known_urls = extract_known_urls_from_result(result)
# Resolve media path: get from piped result
# Support both object attributes (getattr) and dict keys (get)
target = None
origin = None
# Try object attributes first
if hasattr(result, "target"):
target = result.target
elif hasattr(result, "path"):
target = result.path
elif hasattr(result, "file_path"):
target = result.file_path
# Try dict keys if object attributes failed
elif isinstance(result, dict):
target = (result.get("target") or result.get("path") or result.get("file_path") or
result.get("__file_path") or result.get("__path") or result.get("__target"))
# Get origin to detect Hydrus files
if hasattr(result, "origin"):
origin = result.origin
elif hasattr(result, "source"):
origin = result.source
elif isinstance(result, dict):
origin = result.get("origin") or result.get("source") or result.get("__source")
# Convert target to string and preserve URLs (don't let Path() mangle them)
target_str = str(target) if target else None
# Check if this is a playlist item that needs to be downloaded first
is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
# This is a playlist item URL - we need to download it first
log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
# Extract item number if available
item_num = None
if "__action" in result and result["__action"].startswith("playlist-item:"):
item_num = result["__action"].split(":")[1]
elif "index" in result:
item_num = result["index"]
# Call download-data to download this specific item
# Pass the item number so it knows which track to download
from cmdlets import download_data as dl_module
# Capture emissions from download-data to process them
captured_results = []
original_emit = ctx.emit
def capture_emit(obj):
captured_results.append(obj)
# Also emit to original so user sees progress/output if needed
# But since add-file is usually terminal, we might not need to
# original_emit(obj)
# Temporarily hook the pipeline emit function
ctx.emit = capture_emit
try:
if item_num:
# Pass a marker dict to tell download-data which item to get
download_result = dl_module._run(
{
"__playlist_url": str(target_str),
"__playlist_item": int(item_num)
},
[],
config
)
else:
# Fallback: just download the URL (will show all items)
download_result = dl_module._run(None, [str(target_str)], config)
finally:
# Restore original emit function
ctx.emit = original_emit
if download_result != 0:
log(f"❌ Failed to download playlist item", file=sys.stderr)
return 1
log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
# Process the downloaded files recursively
success_count = 0
for res in captured_results:
# Recursively call add-file with the downloaded result
# This ensures tags and metadata from download-data are applied
if _run(res, _args, config) == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Determine media_path from result
media_path: Optional[Path] = None
is_hydrus_file = origin and origin.lower() == "hydrus"
if target_str:
# Check if it's a URL or Hydrus hash
if target_str.lower().startswith(("http://", "https://")):
media_path = None # Will handle as Hydrus file below
elif not is_hydrus_file:
# Only treat as local path if not a Hydrus file
media_path = Path(target_str)
if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
# Check if this is a format object from download-data
if isinstance(result, dict) and result.get('format_id') is not None:
log("❌ Format object received, but add-file expects a downloaded file")
log(f" Tip: Use @N to automatically select and download the format")
log(f" Streamlined workflow:")
log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
log(f" (The @N automatically expands to download-data \"URL\" -item N)")
return 1
log("❌ File not found: provide a piped file result or local file path")
return 1
# Check if this is a Hydrus file - fetch the actual file path from Hydrus
if is_hydrus_file and target_str:
log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
try:
from helper import hydrus
# Get the Hydrus client
client = hydrus.get_client(config)
if not client:
log(f"❌ Hydrus client unavailable", file=sys.stderr)
return 1
# target_str is the hash - need to get the actual file path from Hydrus
file_hash = target_str
# Call the /get_files/file_path endpoint to get the actual file path
response = client.get_file_path(file_hash)
if not response or not isinstance(response, dict):
log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
return 1
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
return 1
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
return 1
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
import traceback
log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
return 1
# Generic URL handler: if target is a URL and we haven't resolved a local path yet
# This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
from cmdlets import download_data as dl_module
dl_args = []
if location:
dl_args.extend(["-storage", location])
# Map provider 0x0 to storage 0x0 for download-data
if provider_name == "0x0":
dl_args.extend(["-storage", "0x0"])
2025-11-30 11:39:04 -08:00
# Capture results from download-data so we can add them to DB
captured_results = []
original_emit = ctx.emit
def capture_emit(obj):
captured_results.append(obj)
original_emit(obj)
ctx.emit = capture_emit
try:
ret_code = dl_module._run(result, dl_args, config)
finally:
ctx.emit = original_emit
if ret_code != 0:
return ret_code
# Process the downloaded files recursively to add them to DB
if captured_results:
log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr)
success_count = 0
for res in captured_results:
# Recursively call add-file with the downloaded result
if _run(res, _args, config) == 0:
success_count += 1
return 0 if success_count > 0 else 1
return 0
2025-11-25 20:09:33 -08:00
if media_path is None:
log("File path could not be resolved")
return 1
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return 1
# Validate file type - only accept Hydrus-supported files
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
log(f"Hydrus supports the following file types:", file=sys.stderr)
# Display by category from hydrus_wrapper
for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
log(f"Skipping this file: {media_path.name}", file=sys.stderr)
return 1
# Handle based on provider or storage
if provider_name is not None:
# Use file provider (e.g., 0x0.st)
from helper.search_provider import get_file_provider
log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
try:
file_provider = get_file_provider(provider_name, config)
if file_provider is None:
2025-11-27 10:59:01 -08:00
log(f"File provider '{provider_name}' not available", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
hoster_url = file_provider.upload(media_path)
2025-11-27 10:59:01 -08:00
log(f"File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
# Associate the URL with the file in Hydrus if possible
current_hash = locals().get('file_hash')
if not current_hash:
current_hash = _resolve_file_hash(result, None, media_path)
if current_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(current_hash, hoster_url)
2025-11-27 10:59:01 -08:00
debug(f"Associated URL with file hash {current_hash}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
except Exception as exc:
2025-11-27 10:59:01 -08:00
log(f"Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
except Exception as exc:
2025-11-27 10:59:01 -08:00
log(f"{provider_name} upload failed: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
if delete_after_upload:
try:
media_path.unlink()
_cleanup_sidecar_files(media_path)
log(f"✅ Deleted file and sidecar", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
return 0
# Handle storage-based operations (location is not None here)
2025-11-30 11:39:04 -08:00
valid_locations = {'hydrus', 'local', 'matrix'}
2025-11-25 20:09:33 -08:00
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
if not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
2025-11-30 11:39:04 -08:00
log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path")
2025-11-25 20:09:33 -08:00
return 1
if location == 'local':
try:
from config import get_local_storage_path
resolved_dir = get_local_storage_path(config)
except Exception:
resolved_dir = None
if not resolved_dir:
resolved_dir = config.get("LocalDir") or config.get("OutputDir")
if not resolved_dir:
log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
return 1
2025-11-27 10:59:01 -08:00
debug(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
# After successful local transfer, emit result for pipeline continuation
# This allows downstream commands like add-tags to chain automatically
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
elif is_local_path:
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
return 1
log(f"Moving to local path: {destination_root}", file=sys.stderr)
2025-12-06 00:10:19 -08:00
exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config, export_mode=True)
2025-11-25 20:09:33 -08:00
# After successful local transfer, emit result for pipeline continuation
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
2025-11-30 11:39:04 -08:00
elif location == 'matrix':
log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr)
try:
result_url = storage["matrix"].upload(media_path, config=config)
log(f"Matrix: {result_url}", file=sys.stderr)
result_dict = create_pipe_object_result(
source='matrix',
identifier=result_url,
file_path=str(media_path),
cmdlet_name='add-file',
title=media_path.name,
target=result_url
)
ctx.emit(result_dict)
except Exception as exc:
log(f"Failed: {exc}", file=sys.stderr)
return 1
if delete_after_upload:
try:
media_path.unlink()
_cleanup_sidecar_files(media_path)
log(f"✅ Deleted file and sidecar", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
return 0
2025-11-25 20:09:33 -08:00
# location == 'hydrus'
# Compute file hash to check if already in Hydrus
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
try:
file_hash = sha256_file(media_path)
except Exception as exc:
log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
return 1
2025-11-27 10:59:01 -08:00
debug(f"File hash: {file_hash}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
# Read sidecar tags and known URLs first (for tagging)
sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
if sidecar_path:
log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
if sidecar_tags:
log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
if sidecar_urls:
log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
else:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
# Merge tags from PipeObject with tags from sidecar
# NOTE: Remove ALL existing title tags and use only filename-based title
# The filename is the source of truth for the title
tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
if not str(t).strip().lower().startswith("title:")]
# Ensure ONE title tag based on the actual filename
filename_title = media_path.stem.replace("_", " ").strip()
if filename_title:
tags = [f"title:{filename_title}"] + tags_without_titles
else:
tags = tags_without_titles
known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
if pipe_object_tags:
log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
# Write metadata to file before uploading (only for local storage, not for Hydrus)
# Hydrus stores tags separately, so we don't need to modify the file
if location != 'hydrus':
try:
if tags:
# Determine file kind from extension
file_kind = ''
sfx = media_path.suffix.lower()
if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_kind = 'audio'
elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_kind = 'video'
if embed_metadata_in_file(media_path, tags, file_kind):
log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
else:
log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
except Exception as exc:
log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
else:
log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
# Use FileStorage backend to upload to Hydrus
try:
file_hash = storage["hydrus"].upload(
media_path,
config=config,
tags=tags,
)
2025-11-27 10:59:01 -08:00
log(f"Hydrus: {file_hash}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
except Exception as exc:
2025-11-27 10:59:01 -08:00
log(f"Failed: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
# Associate known URLs in Hydrus metadata
url_count = 0
if known_urls:
try:
client = hydrus_wrapper.get_client(config)
if client:
for url in known_urls:
u = str(url or "").strip()
if not u:
continue
try:
client.associate_url(file_hash, u)
except Exception as exc:
log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
continue
url_count += 1
except Exception as exc:
log(f"Failed to associate URLs: {exc}", file=sys.stderr)
if url_count:
log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
else:
log(f"No URLs to associate", file=sys.stderr)
_cleanup_sidecar_files(media_path, sidecar_path)
# Update in-memory result for downstream pipes
try:
# Only update piped result objects; direct -path usage may have a dummy result
setattr(result, "hash_hex", file_hash)
# Preserve media_kind for downstream commands (e.g., open)
if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
# Try to infer media_kind from file extension or keep existing
suffix = media_path.suffix.lower()
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
setattr(result, "media_kind", "document")
if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
cols = list(getattr(result, "columns"))
if ("Hash", file_hash) not in cols:
cols.append(("Hash", file_hash))
setattr(result, "columns", cols)
except Exception:
pass
# If -delete flag is set, delete the file and .tags after successful upload
2025-12-01 01:10:16 -08:00
# Also delete if the file is a temporary file from merge-file (contains .dlhx_ or (merged))
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
if delete_after_upload or is_temp_merge:
log(f"Deleting local files (as requested or temp file)...", file=sys.stderr)
2025-11-25 20:09:33 -08:00
try:
media_path.unlink()
log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete file: {exc}", file=sys.stderr)
# Delete .tags sidecar if it exists
if sidecar_path is not None:
try:
sidecar_path.unlink()
log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
2025-12-07 00:21:30 -08:00
# Decide whether to surface search-file results at end of pipeline
stage_ctx = ctx.get_stage_context()
is_storage_target = location is not None
should_display = is_storage_target and (stage_ctx is None or stage_ctx.is_last_stage)
if (not should_display) or not file_hash:
log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
2025-11-25 20:09:33 -08:00
# Emit result for Hydrus uploads so downstream commands know about it
if location == 'hydrus':
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or media_path.name
result_dict = create_pipe_object_result(
source='hydrus',
identifier=file_hash,
file_path=f"hydrus:{file_hash}",
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
extra={
'storage_source': 'hydrus',
'hydrus_hash': file_hash,
'tags': tags,
'known_urls': known_urls,
}
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these Hydrus file results, not format objects
ctx.set_current_stage_table(None)
2025-12-07 00:21:30 -08:00
# If this is the last stage (or not in a pipeline), show the file via search-file
if should_display and file_hash:
try:
from cmdlets import search_file as search_cmdlet
search_cmdlet._run(None, [f"hash:{file_hash}"], config)
except Exception:
debug("search-file lookup after add-file failed", file=sys.stderr)
elif file_hash:
# Not displaying search results here, so report completion normally
log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 0
CMDLET = Cmdlet(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
args=[
CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
SharedArgs.STORAGE, # For hydrus, local, or directory paths
CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
],
details=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting with public URL",
"- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
"- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
"- When using file provider: uploads to service, adds URL to sidecar",
"- When copying locally: copies file with original metadata preserved",
"- Use -delete flag to automatically delete the file and .tags after successful operation.",
],
)