Files
Medios-Macina/cmdlets/add_file.py
2025-11-25 20:09:33 -08:00

910 lines
39 KiB
Python

from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Iterable, Tuple
from collections.abc import Iterable as IterableABC
import json
from pathlib import Path
import sys
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from helper.logger import log, debug
from helper.file_storage import FileStorage
from ._shared import (
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result,
extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result,
merge_sequences, extract_relationships, extract_duration
)
from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB
from helper.utils import sha256_file
from metadata import embed_metadata_in_file
# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
# Initialize file storage system
storage = FileStorage()
def _guess_media_kind_from_suffix(media_path: Path) -> str:
suffix = media_path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
def _resolve_media_kind(result: Any, media_path: Path) -> str:
if isinstance(result, models.PipeObject):
if getattr(result, 'media_kind', None):
return str(result.media_kind)
elif isinstance(result, dict):
media_kind = result.get('media_kind')
if media_kind:
return str(media_kind)
metadata = result.get('metadata')
if isinstance(metadata, dict) and metadata.get('media_kind'):
return str(metadata['media_kind'])
return _guess_media_kind_from_suffix(media_path)
def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]:
# For local origin, try to read from local database first
if origin and origin.lower() == "local" and config:
try:
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
try:
db_root = get_local_storage_path(config)
except Exception:
db_root = None
if db_root:
try:
db = LocalLibraryDB(Path(db_root))
try:
# Get tags and metadata from database
tags = db.get_tags(media_path) or []
metadata = db.get_metadata(media_path) or {}
known_urls = metadata.get("known_urls") or []
file_hash = metadata.get("hash")
if tags or known_urls or file_hash:
debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)")
return None, file_hash, tags, known_urls
finally:
db.close()
except Exception as exc:
log(f"⚠️ Could not query local database: {exc}", file=sys.stderr)
except Exception:
pass
# Fall back to sidecar file lookup
try:
sidecar_path = find_sidecar(media_path)
except Exception:
sidecar_path = None
if not sidecar_path or not sidecar_path.exists():
return None, None, [], []
try:
hash_value, tags, known_urls = read_sidecar(sidecar_path)
return sidecar_path, hash_value, tags or [], known_urls or []
except Exception as exc:
log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr)
return sidecar_path, None, [], []
def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]:
candidate = None
if isinstance(result, models.PipeObject):
candidate = result.file_hash
elif isinstance(result, dict):
candidate = result.get('file_hash') or result.get('hash')
candidate = candidate or fallback_hash
if candidate:
return str(candidate)
try:
return sha256_file(file_path)
except Exception as exc:
log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr)
return None
def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None:
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tags'),
media_path.parent / (media_path.name + '.tags.txt'),
]
targets.extend(extra_paths)
for target in targets:
if not target:
continue
try:
path_obj = Path(target)
if path_obj.exists():
path_obj.unlink()
except Exception:
continue
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: list[str],
known_urls: list[str],
file_hash: Optional[str],
relationships: Optional[Dict[str, Any]],
duration: Optional[float],
media_kind: str,
) -> None:
payload = {
'hash': file_hash,
'known_urls': known_urls,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
try:
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
try:
debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}")
db_path = Path(library_root) / ".downlow_library.db"
debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}")
debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}")
debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}")
with LocalLibraryDB(library_root) as db:
# Save metadata FIRST to ensure file entry is created in DB
if any(payload.values()):
debug(f"[_persist_local_metadata] Saving metadata payload first")
try:
db.save_metadata(dest_path, payload)
debug(f"[_persist_local_metadata] ✅ Metadata saved")
except Exception as meta_exc:
log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr)
raise
# Save tags to DB synchronously in same transaction
# For local storage, DB is the primary source of truth
if tags:
try:
debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB")
db.save_tags(dest_path, tags)
debug(f"[_persist_local_metadata] ✅ Tags saved to DB")
except Exception as tag_exc:
log(f"[_persist_local_metadata] ⚠️ Failed to save tags to DB: {tag_exc}", file=sys.stderr)
raise
# NOTE: Sidecar files are intentionally NOT created for local storage
# Local storage uses database as primary source, not sidecar files
debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully")
except Exception as exc:
log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr)
import traceback
log(traceback.format_exc(), file=sys.stderr)
def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any, config: Optional[Dict[str, Any]] = None) -> Tuple[int, Optional[Path]]:
"""Transfer a file to local storage and return (exit_code, destination_path).
Args:
media_path: Path to source file
destination_root: Destination directory
result: Result object with metadata
config: Configuration dictionary
Returns:
Tuple of (exit_code, destination_path)
- exit_code: 0 on success, 1 on failure
- destination_path: Path to moved file on success, None on failure
"""
destination_root = destination_root.expanduser()
try:
destination_root.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"❌ Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr)
return 1, None
tags_from_result = extract_tags_from_result(result)
urls_from_result = extract_known_urls_from_result(result)
# Get origin from result if available
result_origin = None
if hasattr(result, "origin"):
result_origin = result.origin
elif isinstance(result, dict):
result_origin = result.get("origin") or result.get("source")
sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result = [normalize_title_tag(t) for t in tags_from_result]
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
# Merge tags carefully: if URL has title tag, don't include sidecar title tags
# This prevents duplicate title: tags when URL provides a title
has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result)
if has_url_title:
# URL has a title, filter out any sidecar title tags to avoid duplication
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True)
else:
# No URL title, use all sidecar tags
merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True)
merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False)
relationships = extract_relationships(result)
duration = extract_duration(result)
try:
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
except Exception as exc:
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
return 1, None
dest_path = Path(dest_file)
file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
media_kind = _resolve_media_kind(result, dest_path)
# Ensure only ONE title tag that matches the actual filename
# Remove all existing title tags and add one based on the saved filename
merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")]
filename_title = dest_path.stem.replace("_", " ").strip()
if filename_title:
merged_tags_no_titles.insert(0, f"title:{filename_title}")
_persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind)
_cleanup_sidecar_files(media_path, sidecar_path)
debug(f"✅ Moved to local library: {dest_path}")
return 0, dest_path
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Upload/copy a file to specified location.
Returns 0 on success, non-zero on failure.
"""
import sys # For stderr output
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
debug("Starting add-file cmdlet")
# Handle list of results (from piped commands that emit multiple items)
if isinstance(result, list):
debug(f"Processing {len(result)} piped files")
success_count = 0
for item in result:
exit_code = _run(item, _args, config)
if exit_code == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
location: Optional[str] = None
provider_name: Optional[str] = None
delete_after_upload = False
# Check if -path argument was provided to use direct file path instead of piped result
path_arg = parsed.get("path")
if path_arg:
# Create a pseudo-result object from the file path
media_path = Path(str(path_arg).strip())
if not media_path.exists():
log(f"❌ File not found: {media_path}")
return 1
# Create result dict with the file path and origin 'wild' for direct path inputs
result = {"target": str(media_path), "origin": "wild"}
log(f"Using direct file path: {media_path}")
# Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage"
location = parsed.get("storage")
if location:
location = str(location).lower().strip()
# Get file provider from parsed args
provider_name = parsed.get("provider")
if provider_name:
provider_name = str(provider_name).lower().strip()
# Check for delete flag (presence in parsed dict means it was provided)
delete_after_upload = "delete" in parsed
# Either storage or provider must be specified, but not both
if location is None and provider_name is None:
log("Either -storage or -provider must be specified")
log(" -storage options: 'hydrus', 'local', or a directory path")
log(" -provider options: '0x0'")
return 1
if location is not None and provider_name is not None:
log("❌ Cannot specify both -storage and -provider")
return 1
# Validate location (storage backends)
is_valid_location = False
if location is not None:
valid_locations = {'hydrus', 'local'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location)
if location is not None and not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
return 1
# Extract tags/known URLs from pipeline objects if available
pipe_object_tags = extract_tags_from_result(result)
if pipe_object_tags:
log(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr)
pipe_known_urls = extract_known_urls_from_result(result)
# Resolve media path: get from piped result
# Support both object attributes (getattr) and dict keys (get)
target = None
origin = None
# Try object attributes first
if hasattr(result, "target"):
target = result.target
elif hasattr(result, "path"):
target = result.path
elif hasattr(result, "file_path"):
target = result.file_path
# Try dict keys if object attributes failed
elif isinstance(result, dict):
target = (result.get("target") or result.get("path") or result.get("file_path") or
result.get("__file_path") or result.get("__path") or result.get("__target"))
# Get origin to detect Hydrus files
if hasattr(result, "origin"):
origin = result.origin
elif hasattr(result, "source"):
origin = result.source
elif isinstance(result, dict):
origin = result.get("origin") or result.get("source") or result.get("__source")
# Convert target to string and preserve URLs (don't let Path() mangle them)
target_str = str(target) if target else None
# Check if this is a playlist item that needs to be downloaded first
is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe"
if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")):
# This is a playlist item URL - we need to download it first
log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr)
# Extract item number if available
item_num = None
if "__action" in result and result["__action"].startswith("playlist-item:"):
item_num = result["__action"].split(":")[1]
elif "index" in result:
item_num = result["index"]
# Call download-data to download this specific item
# Pass the item number so it knows which track to download
from cmdlets import download_data as dl_module
# Capture emissions from download-data to process them
captured_results = []
original_emit = ctx.emit
def capture_emit(obj):
captured_results.append(obj)
# Also emit to original so user sees progress/output if needed
# But since add-file is usually terminal, we might not need to
# original_emit(obj)
# Temporarily hook the pipeline emit function
ctx.emit = capture_emit
try:
if item_num:
# Pass a marker dict to tell download-data which item to get
download_result = dl_module._run(
{
"__playlist_url": str(target_str),
"__playlist_item": int(item_num)
},
[],
config
)
else:
# Fallback: just download the URL (will show all items)
download_result = dl_module._run(None, [str(target_str)], config)
finally:
# Restore original emit function
ctx.emit = original_emit
if download_result != 0:
log(f"❌ Failed to download playlist item", file=sys.stderr)
return 1
log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr)
# Process the downloaded files recursively
success_count = 0
for res in captured_results:
# Recursively call add-file with the downloaded result
# This ensures tags and metadata from download-data are applied
if _run(res, _args, config) == 0:
success_count += 1
return 0 if success_count > 0 else 1
# Determine media_path from result
media_path: Optional[Path] = None
is_hydrus_file = origin and origin.lower() == "hydrus"
if target_str:
# Check if it's a URL or Hydrus hash
if target_str.lower().startswith(("http://", "https://")):
media_path = None # Will handle as Hydrus file below
elif not is_hydrus_file:
# Only treat as local path if not a Hydrus file
media_path = Path(target_str)
if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))):
# Check if this is a format object from download-data
if isinstance(result, dict) and result.get('format_id') is not None:
log("❌ Format object received, but add-file expects a downloaded file")
log(f" Tip: Use @N to automatically select and download the format")
log(f" Streamlined workflow:")
log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local")
log(f" (The @N automatically expands to download-data \"URL\" -item N)")
return 1
log("❌ File not found: provide a piped file result or local file path")
return 1
# Check if this is a Hydrus file - fetch the actual file path from Hydrus
if is_hydrus_file and target_str:
log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr)
try:
from helper import hydrus
# Get the Hydrus client
client = hydrus.get_client(config)
if not client:
log(f"❌ Hydrus client unavailable", file=sys.stderr)
return 1
# target_str is the hash - need to get the actual file path from Hydrus
file_hash = target_str
# Call the /get_files/file_path endpoint to get the actual file path
response = client.get_file_path(file_hash)
if not response or not isinstance(response, dict):
log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr)
return 1
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
return 1
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
return 1
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
import traceback
log(f"Traceback: {traceback.format_exc()}", file=sys.stderr)
return 1
# Generic URL handler: if target is a URL and we haven't resolved a local path yet
# This handles cases like "search-file -provider openlibrary ... | add-file -storage local"
if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None:
log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr)
from cmdlets import download_data as dl_module
dl_args = []
if location:
dl_args.extend(["-storage", location])
# Map provider 0x0 to storage 0x0 for download-data
if provider_name == "0x0":
dl_args.extend(["-storage", "0x0"])
return dl_module._run(result, dl_args, config)
if media_path is None:
log("File path could not be resolved")
return 1
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return 1
# Validate file type - only accept Hydrus-supported files
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
log(f"Hydrus supports the following file types:", file=sys.stderr)
# Display by category from hydrus_wrapper
for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()):
ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys()))
log(f"{category.capitalize()}: {ext_list}", file=sys.stderr)
log(f"Skipping this file: {media_path.name}", file=sys.stderr)
return 1
# Handle based on provider or storage
if provider_name is not None:
# Use file provider (e.g., 0x0.st)
from helper.search_provider import get_file_provider
log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr)
try:
file_provider = get_file_provider(provider_name, config)
if file_provider is None:
log(f"❌ File provider '{provider_name}' not available", file=sys.stderr)
return 1
hoster_url = file_provider.upload(media_path)
log(f"✅ File uploaded to {provider_name}: {hoster_url}", file=sys.stderr)
# Associate the URL with the file in Hydrus if possible
current_hash = locals().get('file_hash')
if not current_hash:
current_hash = _resolve_file_hash(result, None, media_path)
if current_hash:
try:
client = hydrus_wrapper.get_client(config)
if client:
client.associate_url(current_hash, hoster_url)
log(f"✅ Associated URL with file hash {current_hash}", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not associate URL with Hydrus file: {exc}", file=sys.stderr)
except Exception as exc:
log(f"{provider_name} upload failed: {exc}", file=sys.stderr)
return 1
if delete_after_upload:
try:
media_path.unlink()
_cleanup_sidecar_files(media_path)
log(f"✅ Deleted file and sidecar", file=sys.stderr)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
return 0
# Handle storage-based operations (location is not None here)
valid_locations = {'hydrus', 'local'}
is_valid_location = location in valid_locations
is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location)
if not (is_valid_location or is_local_path):
log(f"❌ Invalid location: {location}")
log(f"Valid options: 'hydrus', 'local', or a directory path (e.g., C:\\Music or /home/user/music)")
return 1
if location == 'local':
try:
from config import get_local_storage_path
resolved_dir = get_local_storage_path(config)
except Exception:
resolved_dir = None
if not resolved_dir:
resolved_dir = config.get("LocalDir") or config.get("OutputDir")
if not resolved_dir:
log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr)
return 1
log(f"Moving into configured local library: {resolved_dir}", file=sys.stderr)
exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config)
# After successful local transfer, emit result for pipeline continuation
# This allows downstream commands like add-tags to chain automatically
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
elif is_local_path:
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
return 1
log(f"Moving to local path: {destination_root}", file=sys.stderr)
exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config)
# After successful local transfer, emit result for pipeline continuation
if exit_code == 0 and dest_path:
# Extract tags from result for emission
emit_tags = extract_tags_from_result(result)
file_hash = _resolve_file_hash(result, None, dest_path)
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or dest_path.name
# Always emit result for local files, even if no tags
# This allows @N selection and piping to downstream commands
result_dict = create_pipe_object_result(
source='local',
identifier=str(dest_path),
file_path=str(dest_path),
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
tags=emit_tags if emit_tags else [],
target=str(dest_path) # Explicit target for get-file
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these local file results, not format objects
ctx.set_current_stage_table(None)
return exit_code
# location == 'hydrus'
# Compute file hash to check if already in Hydrus
log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr)
log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr)
try:
file_hash = sha256_file(media_path)
except Exception as exc:
log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr)
return 1
log(f"File hash: {file_hash}", file=sys.stderr)
# Read sidecar tags and known URLs first (for tagging)
sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config)
if sidecar_path:
log(f"Found sidecar at: {sidecar_path}", file=sys.stderr)
log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr)
if sidecar_tags:
log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr)
if sidecar_urls:
log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr)
else:
log(f"No sidecar found for {media_path.name}", file=sys.stderr)
# Normalize all title tags to use spaces instead of underscores BEFORE merging
# This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title
def normalize_title_tag(tag: str) -> str:
"""Normalize a title tag by replacing underscores with spaces."""
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags]
pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags]
# Merge tags from PipeObject with tags from sidecar
# NOTE: Remove ALL existing title tags and use only filename-based title
# The filename is the source of truth for the title
tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True)
if not str(t).strip().lower().startswith("title:")]
# Ensure ONE title tag based on the actual filename
filename_title = media_path.stem.replace("_", " ").strip()
if filename_title:
tags = [f"title:{filename_title}"] + tags_without_titles
else:
tags = tags_without_titles
known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False)
if pipe_object_tags:
log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr)
# Write metadata to file before uploading (only for local storage, not for Hydrus)
# Hydrus stores tags separately, so we don't need to modify the file
if location != 'hydrus':
try:
if tags:
# Determine file kind from extension
file_kind = ''
sfx = media_path.suffix.lower()
if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
file_kind = 'audio'
elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
file_kind = 'video'
if embed_metadata_in_file(media_path, tags, file_kind):
log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr)
else:
log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr)
except Exception as exc:
log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr)
else:
log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr)
# Use FileStorage backend to upload to Hydrus
try:
file_hash = storage["hydrus"].upload(
media_path,
config=config,
tags=tags,
)
log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
except Exception as exc:
log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
return 1
# Associate known URLs in Hydrus metadata
url_count = 0
if known_urls:
try:
client = hydrus_wrapper.get_client(config)
if client:
for url in known_urls:
u = str(url or "").strip()
if not u:
continue
try:
client.associate_url(file_hash, u)
except Exception as exc:
log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr)
continue
url_count += 1
except Exception as exc:
log(f"Failed to associate URLs: {exc}", file=sys.stderr)
if url_count:
log(f"✅ Associated {url_count} URL(s)", file=sys.stderr)
else:
log(f"No URLs to associate", file=sys.stderr)
_cleanup_sidecar_files(media_path, sidecar_path)
# Update in-memory result for downstream pipes
try:
# Only update piped result objects; direct -path usage may have a dummy result
setattr(result, "hash_hex", file_hash)
# Preserve media_kind for downstream commands (e.g., open)
if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other":
# Try to infer media_kind from file extension or keep existing
suffix = media_path.suffix.lower()
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}:
setattr(result, "media_kind", "document")
if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list):
cols = list(getattr(result, "columns"))
if ("Hash", file_hash) not in cols:
cols.append(("Hash", file_hash))
setattr(result, "columns", cols)
except Exception:
pass
# If -delete flag is set, delete the file and .tags after successful upload
if delete_after_upload:
log(f"Deleting local files (as requested)...", file=sys.stderr)
try:
media_path.unlink()
log(f"✅ Deleted: {media_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete file: {exc}", file=sys.stderr)
# Delete .tags sidecar if it exists
if sidecar_path is not None:
try:
sidecar_path.unlink()
log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr)
except OSError as exc:
log(f"Failed to delete sidecar: {exc}", file=sys.stderr)
log(f"✅ Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr)
# Emit result for Hydrus uploads so downstream commands know about it
if location == 'hydrus':
# Extract title from original result, fallback to filename if not available
result_title = extract_title_from_result(result) or media_path.name
result_dict = create_pipe_object_result(
source='hydrus',
identifier=file_hash,
file_path=f"hydrus:{file_hash}",
cmdlet_name='add-file',
title=result_title,
file_hash=file_hash,
extra={
'storage_source': 'hydrus',
'hydrus_hash': file_hash,
'tags': tags,
'known_urls': known_urls,
}
)
ctx.emit(result_dict)
# Clear the stage table so downstream @N doesn't try to re-run download-data
# Next stage will use these Hydrus file results, not format objects
ctx.set_current_stage_table(None)
return 0
CMDLET = Cmdlet(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
args=[
CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"),
SharedArgs.STORAGE, # For hydrus, local, or directory paths
CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"),
],
details=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting with public URL",
"- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.",
"- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls",
"- When using file provider: uploads to service, adds URL to sidecar",
"- When copying locally: copies file with original metadata preserved",
"- Use -delete flag to automatically delete the file and .tags after successful operation.",
],
)