Files
Medios-Macina/cmdlet/add_file.py

1033 lines
42 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
2025-12-11 12:47:30 -08:00
from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
2025-11-25 20:09:33 -08:00
from pathlib import Path
import sys
2025-12-11 12:47:30 -08:00
import shutil
2025-11-25 20:09:33 -08:00
import models
import pipeline as ctx
2025-12-11 19:04:02 -08:00
from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
from Store import Store
2025-11-25 20:09:33 -08:00
from ._shared import (
2025-12-11 12:47:30 -08:00
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
2025-12-11 23:21:45 -08:00
extract_tag_from_result, extract_title_from_result, extract_url_from_result,
2025-12-11 19:04:02 -08:00
merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object
2025-11-25 20:09:33 -08:00
)
2025-12-11 23:21:45 -08:00
from ._shared import collapse_namespace_tag
2025-12-11 19:04:02 -08:00
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
2025-12-11 12:47:30 -08:00
from metadata import write_metadata
2025-11-25 20:09:33 -08:00
# Use official Hydrus supported filetypes from hydrus_wrapper
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
2025-12-11 12:47:30 -08:00
class Add_File(Cmdlet):
"""Add file into the DB"""
def __init__(self) -> None:
"""Initialize add-file cmdlet."""
super().__init__(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"),
CmdletArg(name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del"),
],
detail=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Initialize state
path_arg = parsed.get("path")
location = parsed.get("store") # Fixed: was "storage", should be "store"
provider_name = parsed.get("provider")
delete_after = parsed.get("delete", False)
# Coerce result to PipeObject; if result is a list, prefer the first element
effective_result = result
if isinstance(result, list) and result:
first_item = result[0]
# Prefer first item if it's a dict or PipeObject
if isinstance(first_item, (dict, )):
effective_result = first_item
pipe_obj = coerce_to_pipe_object(effective_result, path_arg)
# Debug: Log input result details
debug(f"[add-file] INPUT result type={type(result).__name__}")
if isinstance(result, list):
debug(f"[add-file] INPUT result is list with {len(result)} items")
if result and isinstance(result[0], dict):
first = result[0]
hash_val = first.get('hash')
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}")
elif isinstance(result, dict):
hash_val = result.get('hash')
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}")
# Debug: Log parsed arguments
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
# Resolve source - returns (media_path_or_url, file_hash)
media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config)
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
if not media_path_or_url:
2025-12-11 19:04:02 -08:00
debug(f"[add-file] ERROR: Could not resolve source file/URL")
return 1
# Update pipe_obj with resolved path
pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Check if it's a URL before validating as file
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL target, delegating to download-data: {media_path_or_url}")
return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Convert to Path and validate
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Validate source
if not self._validate_source(media_path):
debug(f"[add-file] ERROR: Source validation failed for {media_path}")
return 1
# Debug: Log execution path decision
debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}")
debug(f" media_path={media_path}, exists={media_path.exists()}")
2025-12-11 19:04:02 -08:00
# Execute transfer based on destination (using Store registry)
2025-12-11 12:47:30 -08:00
if provider_name:
debug(f"[add-file] ROUTE: file provider upload")
return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after)
elif location:
2025-12-11 19:04:02 -08:00
# Check if location is a registered backend name
2025-12-01 01:10:16 -08:00
try:
2025-12-11 19:04:02 -08:00
store = Store(config)
backends = store.list_backends()
2025-12-11 12:47:30 -08:00
if location in backends:
debug(f"[add-file] ROUTE: storage backend '{location}'")
return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after)
else:
# Treat as local export path
debug(f"[add-file] ROUTE: local export to path '{location}'")
return self._handle_local_export(media_path, location, pipe_obj, config, delete_after)
2025-12-01 01:10:16 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
2025-12-06 00:10:19 -08:00
return 1
else:
2025-12-11 12:47:30 -08:00
debug(f"[add-file] ERROR: No location or provider specified")
log(f"No storage location or provider specified", file=sys.stderr)
2025-12-07 00:21:30 -08:00
return 1
2025-12-11 12:47:30 -08:00
@staticmethod
def _resolve_source(
result: Any,
path_arg: Optional[str],
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[Optional[Path | str], Optional[str]]:
"""Resolve the source file path from args or pipeline result.
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
PRIORITY: hash+store pattern is preferred over path-based resolution.
This ensures consistency when @N selections pass hash+store identifiers.
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
Returns (media_path_or_url, file_hash)
where media_path_or_url can be a Path object or a URL string.
"""
# PRIORITY 1: Try hash+store from result dict (most reliable for @N selections)
if isinstance(result, dict):
result_hash = result.get("hash")
result_store = result.get("store")
if result_hash and result_store:
debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}")
# Use get_file to retrieve from the specific store
try:
2025-12-11 19:04:02 -08:00
store = Store(config)
if result_store in store.list_backends():
backend = store[result_store]
2025-12-11 12:47:30 -08:00
media_path = backend.get_file(result_hash)
2025-12-11 19:04:02 -08:00
if isinstance(media_path, Path) and media_path.exists():
2025-12-11 12:47:30 -08:00
pipe_obj.path = str(media_path)
debug(f"[add-file] Retrieved file from {result_store}: {media_path}")
return media_path, result_hash
2025-12-11 19:04:02 -08:00
if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")):
pipe_obj.path = media_path
debug(f"[add-file] Retrieved URL from {result_store}: {media_path}")
return media_path, result_hash
2025-12-11 12:47:30 -08:00
except Exception as exc:
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# PRIORITY 2: Try explicit path argument
if path_arg:
media_path = Path(path_arg)
pipe_obj.path = str(media_path)
debug(f"[add-file] Using explicit path argument: {media_path}")
return media_path, None
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
pipe_path = getattr(pipe_obj, "path", None)
if pipe_path:
pipe_path_str = str(pipe_path)
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
return media_path, file_hash if success else None
# Check if pipe_path is a URL - skip to URL handling below
if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
media_path = Path(pipe_path_str)
return media_path, None
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
pipe_url = getattr(pipe_obj, "url", None)
if pipe_url and isinstance(pipe_url, str):
# Check if it's a URL
if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in pipe_obj.url: {pipe_url}")
return pipe_url, None
# Try from hydrus hash in pipe_obj.extra or hash
hydrus_hash = None
if isinstance(pipe_obj.extra, dict):
hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash")
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config)
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
if isinstance(result, str):
debug(f"Checking result string: {result}")
# Check if result is a URL before treating as file path
if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result string: {result}")
return result, None # Return URL string directly
media_path = Path(result)
pipe_obj.path = str(media_path)
return media_path, None
# Try from result if it's a list (pipeline emits multiple results)
if isinstance(result, list) and result:
first_item = result[0]
# If the first item is a string, it's either a URL or a file path
if isinstance(first_item, str):
debug(f"Checking result list[0]: {first_item}")
if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result list: {first_item}")
return first_item, None # Return URL string directly
media_path = Path(first_item)
pipe_obj.path = str(media_path)
return media_path, None
# If the first item is a dict, interpret it as a PipeObject-style result
if isinstance(first_item, dict):
# Look for path or path-like keys
path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"):
path_candidate = first_item.get("paths")[0]
if path_candidate:
debug(f"Resolved path from result dict: {path_candidate}")
try:
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, first_item.get("hash")
except Exception:
# Fallback to returning string if not a path
return str(path_candidate), first_item.get("hash")
# If first item is a PipeObject object
try:
# models.PipeObject is an actual class; check attribute presence
import models as _models
if isinstance(first_item, _models.PipeObject):
path_candidate = getattr(first_item, "path", None)
if path_candidate:
debug(f"Resolved path from PipeObject: {path_candidate}")
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, getattr(first_item, "hash", None)
except Exception:
pass
debug(f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}")
log("File path could not be resolved")
return None, None
@staticmethod
def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]:
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
client = hydrus_wrapper.get_client(config)
2025-11-25 20:09:33 -08:00
if not client:
2025-12-11 12:47:30 -08:00
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
2025-11-25 20:09:33 -08:00
response = client.get_file_path(file_hash)
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
2025-11-25 20:09:33 -08:00
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
2025-11-25 20:09:33 -08:00
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return media_path, True
2025-11-25 20:09:33 -08:00
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
2025-12-11 12:47:30 -08:00
return None, False
@staticmethod
def _validate_source(media_path: Optional[Path]) -> bool:
"""Validate that the source file exists and is supported."""
if media_path is None:
return False
target_str = str(media_path)
# If it's a URL target, we skip file existence checks
if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return True
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
# Validate file type
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
return False
return True
@staticmethod
def _is_url_target(media_path: Optional[Path]) -> bool:
"""Check if the target is a URL that needs downloading."""
if media_path and str(media_path).lower().startswith(("http://", "https://")):
return True
return False
def _delegate_to_download_data(
self,
result: Any,
url_str: str,
location: Optional[str],
provider_name: Optional[str],
args: Sequence[str],
config: Dict[str, Any],
) -> int:
"""Delegate URL handling to download-media cmdlet."""
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
2025-12-12 21:55:38 -08:00
from cmdlet.download_media import CMDLET as dl_cmdlet
2025-12-11 12:47:30 -08:00
dl_args = list(args) if args else []
# Add the URL to the argument list for download-media
dl_args.insert(0, url_str)
# If result has selection_args (like -item from @N selection), include them
if isinstance(result, dict) and "_selection_args" in result:
selection_args = result["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra:
selection_args = result.extra["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
# download-media doesn't support -storage flag
# It downloads to the configured directory, then add-file will handle storage
# Note: Provider uploads (0x0) are not supported via this path
# Call download-media with the URL in args
return dl_cmdlet.run(None, dl_args, config)
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
url: List[str] = []
try:
if isinstance(pipe_obj.extra, dict):
url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or [])
except Exception:
pass
if not url and isinstance(result, dict):
url = list(result.get("url") or result.get("url") or [])
if not url:
url = list(extract_url_from_result(result) or [])
return url
@staticmethod
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
try:
rels = pipe_obj.get_relationships()
if rels:
return rels
except Exception:
pass
if isinstance(result, dict) and result.get("relationships"):
return result.get("relationships")
try:
return extract_relationships(result)
except Exception:
return None
@staticmethod
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
if getattr(pipe_obj, "duration", None) is not None:
return pipe_obj.duration
try:
return extract_duration(result)
except Exception:
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
*,
2025-12-11 19:04:02 -08:00
hash_value: str,
2025-12-11 12:47:30 -08:00
store: str,
2025-12-11 19:04:02 -08:00
path: Optional[str],
2025-12-11 23:21:45 -08:00
tag: List[str],
2025-12-11 12:47:30 -08:00
title: Optional[str],
extra_updates: Optional[Dict[str, Any]] = None,
) -> None:
2025-12-11 19:04:02 -08:00
pipe_obj.hash = hash_value
2025-12-11 12:47:30 -08:00
pipe_obj.store = store
2025-12-11 19:04:02 -08:00
pipe_obj.path = path
2025-12-11 23:21:45 -08:00
pipe_obj.tag = tag
2025-12-11 12:47:30 -08:00
if title:
pipe_obj.title = title
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.update(extra_updates or {})
else:
pipe_obj.extra = dict(extra_updates or {})
@staticmethod
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
from result_table import format_result
log(format_result(pipe_obj, title="Result"), file=sys.stderr)
ctx.emit(pipe_obj.to_dict())
ctx.set_current_stage_table(None)
2025-12-13 12:09:50 -08:00
@staticmethod
def _emit_storage_result(payload: Dict[str, Any]) -> None:
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
- If this is the last stage (or not in a pipeline), prints a search-store-like table
and sets an overlay table/items for @N selection.
"""
# Always emit for downstream commands (no-op if not in a pipeline)
ctx.emit(payload)
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if not is_last:
return
try:
from result_table import ResultTable
table = ResultTable("Result")
table.add_result(payload)
# Overlay so @1 refers to this add-file result without overwriting search history
ctx.set_last_result_table_overlay(table, [payload], subject=payload)
except Exception:
# If table rendering fails, still keep @ selection items
try:
ctx.set_last_result_items_only([payload])
except Exception:
pass
2025-12-11 12:47:30 -08:00
@staticmethod
def _prepare_metadata(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[List[str], List[str], Optional[str], Optional[str]]:
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
2025-12-11 23:21:45 -08:00
tags_from_result = list(pipe_obj.tag or [])
2025-12-11 12:47:30 -08:00
if not tags_from_result:
try:
2025-12-11 23:21:45 -08:00
tags_from_result = list(extract_tag_from_result(result) or [])
2025-12-11 12:47:30 -08:00
except Exception:
tags_from_result = []
url_from_result = Add_File._get_url(result, pipe_obj)
preferred_title = pipe_obj.title
if not preferred_title:
for t in tags_from_result:
if str(t).strip().lower().startswith("title:"):
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
if candidate:
preferred_title = candidate
break
if not preferred_title:
preferred_title = extract_title_from_result(result)
if preferred_title:
preferred_title = preferred_title.replace("_", " ").strip()
2025-12-11 19:04:02 -08:00
store = getattr(pipe_obj, "store", None)
2025-12-11 12:47:30 -08:00
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
2025-12-11 19:04:02 -08:00
media_path, store, config
2025-12-11 12:47:30 -08:00
)
def normalize_title_tag(tag: str) -> str:
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")]
2025-12-11 23:21:45 -08:00
sidecar_tags = collapse_namespace_tag([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last")
2025-12-11 12:47:30 -08:00
sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")]
merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True)
if preferred_title:
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
# Persist back to PipeObject
2025-12-11 23:21:45 -08:00
pipe_obj.tag = merged_tags
2025-12-11 12:47:30 -08:00
if preferred_title and not pipe_obj.title:
pipe_obj.title = preferred_title
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.setdefault("url", merged_url)
return merged_tags, merged_url, preferred_title, file_hash
@staticmethod
def _handle_local_export(
media_path: Path,
location: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle exporting to a specific local path (Copy)."""
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
log(f"Exporting to local path: {destination_root}", file=sys.stderr)
result = None
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Determine Filename (Title-based)
title_value = title
if not title_value:
# Try to find title in tags
title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None)
if title_tag:
title_value = title_tag.split(":", 1)[1].strip()
if not title_value:
title_value = media_path.stem.replace("_", " ").strip()
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
base_name = safe_title or media_path.stem
new_name = base_name + media_path.suffix
destination_root.mkdir(parents=True, exist_ok=True)
target_path = destination_root / new_name
if target_path.exists():
target_path = unique_path(target_path)
# COPY Operation (Safe Export)
2025-11-30 11:39:04 -08:00
try:
2025-12-11 12:47:30 -08:00
shutil.copy2(str(media_path), target_path)
except Exception as exc:
log(f"❌ Failed to export file: {exc}", file=sys.stderr)
return 1
# Copy Sidecars
Add_File._copy_sidecars(media_path, target_path)
# Ensure hash for exported copy
if not f_hash:
try:
f_hash = sha256_file(target_path)
except Exception:
f_hash = None
# Write Metadata Sidecars (since it's an export)
relationships = Add_File._get_relationships(result, pipe_obj)
try:
write_sidecar(target_path, tags, url, f_hash)
write_metadata(target_path, hash_value=f_hash, url=url, relationships=relationships or [])
except Exception:
pass
# Update PipeObject and emit
extra_updates = {
"url": url,
"export_path": str(destination_root),
}
if relationships:
extra_updates["relationships"] = relationships
chosen_title = title or title_value or pipe_obj.title or target_path.name
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store="local",
2025-12-11 19:04:02 -08:00
path=str(target_path),
2025-12-11 23:21:45 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
title=chosen_title,
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
# Cleanup
# Only delete if explicitly requested!
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
2025-11-30 11:39:04 -08:00
return 0
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
@staticmethod
def _download_soulseek_file(
result: Any,
config: Dict[str, Any]
) -> Optional[Path]:
"""
Download a file from Soulseek peer.
Extracts username and filename from soulseek result metadata and initiates download.
"""
try:
import asyncio
2025-12-12 21:55:38 -08:00
from ProviderCore.registry import download_soulseek_file
2025-12-11 19:04:02 -08:00
from pathlib import Path
# Extract metadata from result
full_metadata = {}
if isinstance(result, dict):
full_metadata = result.get("full_metadata", {})
elif hasattr(result, "extra") and isinstance(result.extra, dict) and "full_metadata" in result.extra:
full_metadata = result.extra.get("full_metadata", {})
elif hasattr(result, "full_metadata"):
# Direct attribute access (fallback)
val = getattr(result, "full_metadata", {})
if isinstance(val, dict):
full_metadata = val
username = full_metadata.get("username")
filename = full_metadata.get("filename")
if not username or not filename:
debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
if hasattr(result, "extra"):
debug(f"[add-file] Result extra keys: {list(result.extra.keys())}")
return None
if not username or not filename:
debug(f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})")
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
# Determine output directory (prefer downloads folder in config)
output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads")
output_dir.mkdir(parents=True, exist_ok=True)
# Run async download in event loop
try:
loop = asyncio.get_event_loop()
if loop.is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
downloaded_path = loop.run_until_complete(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
timeout=1200 # 20 minutes
)
)
return downloaded_path
except Exception as e:
log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr)
debug(f"[add-file] Soulseek download traceback: {e}")
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_provider_upload(
media_path: Path,
provider_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a file provider (e.g. 0x0)."""
2025-12-12 21:55:38 -08:00
from ProviderCore.registry import get_file_provider
2025-12-11 12:47:30 -08:00
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
try:
file_provider = get_file_provider(provider_name, config)
2025-12-11 12:47:30 -08:00
if not file_provider:
2025-11-27 10:59:01 -08:00
log(f"File provider '{provider_name}' not available", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
hoster_url = file_provider.upload(str(media_path))
log(f"File uploaded: {hoster_url}", file=sys.stderr)
# Associate URL with Hydrus if possible
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
if f_hash:
2025-11-25 20:09:33 -08:00
try:
client = hydrus_wrapper.get_client(config)
if client:
2025-12-11 12:47:30 -08:00
client.associate_url(f_hash, hoster_url)
except Exception:
pass
2025-11-25 20:09:33 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
log(f"Upload failed: {exc}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
return 1
2025-12-11 12:47:30 -08:00
# Update PipeObject and emit
extra_updates: Dict[str, Any] = {
"provider": provider_name,
"provider_url": hoster_url,
}
if isinstance(pipe_obj.extra, dict):
# Also track hoster URL as a url for downstream steps
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store=provider_name or "provider",
2025-12-11 19:04:02 -08:00
path=file_path,
2025-12-11 23:21:45 -08:00
tag=pipe_obj.tag,
2025-12-11 12:47:30 -08:00
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _handle_storage_backend(
media_path: Path,
backend_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
2025-11-25 20:09:33 -08:00
try:
2025-12-11 19:04:02 -08:00
store = Store(config)
backend = store[backend_name]
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config)
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
media_path,
title=title,
tags=tags,
url=url
2025-11-25 20:09:33 -08:00
)
2025-12-11 12:47:30 -08:00
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
stored_path: Optional[str] = None
try:
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
# Some backends may return a browser URL
stored_path = maybe_path
except Exception:
stored_path = None
2025-11-30 11:39:04 -08:00
2025-12-11 12:47:30 -08:00
Add_File._update_pipe_object_destination(
pipe_obj,
2025-12-11 19:04:02 -08:00
hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown",
2025-12-11 12:47:30 -08:00
store=backend_name,
2025-12-11 19:04:02 -08:00
path=stored_path,
2025-12-11 23:21:45 -08:00
tag=tags,
2025-12-11 12:47:30 -08:00
title=title or pipe_obj.title or media_path.name,
extra_updates={
"url": url,
},
2025-11-30 11:39:04 -08:00
)
2025-12-13 12:09:50 -08:00
# Emit a search-store-like payload for consistent tables and natural piping.
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
resolved_hash = file_identifier if len(file_identifier) == 64 else (f_hash or file_identifier or "unknown")
meta: Dict[str, Any] = {}
try:
meta = backend.get_metadata(resolved_hash) or {}
except Exception:
meta = {}
# Determine size bytes
size_bytes: Optional[int] = None
for key in ("size_bytes", "size", "filesize", "file_size"):
try:
raw_size = meta.get(key)
if raw_size is not None:
size_bytes = int(raw_size)
break
except Exception:
pass
if size_bytes is None:
try:
size_bytes = int(media_path.stat().st_size)
except Exception:
size_bytes = None
# Determine title/ext
title_out = (
meta.get("title")
or title
or pipe_obj.title
or media_path.stem
or media_path.name
)
ext_out = (meta.get("ext") or media_path.suffix.lstrip("."))
payload: Dict[str, Any] = {
"title": title_out,
"ext": str(ext_out or ""),
"size_bytes": size_bytes,
"store": backend_name,
"hash": resolved_hash,
# Preserve extra fields for downstream commands (kept hidden by default table rules)
"path": stored_path,
"tag": list(tags or []),
"url": list(url or []),
}
Add_File._emit_storage_result(payload)
2025-12-11 12:47:30 -08:00
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
2025-11-30 11:39:04 -08:00
except Exception as exc:
2025-12-11 12:47:30 -08:00
log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
2025-11-30 11:39:04 -08:00
return 1
2025-12-11 12:47:30 -08:00
# --- Helpers ---
@staticmethod
def _load_sidecar_bundle(
media_path: Path,
2025-12-11 19:04:02 -08:00
store: Optional[str],
2025-12-11 12:47:30 -08:00
config: Dict[str, Any],
) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]:
"""Load sidecar metadata."""
2025-12-11 19:04:02 -08:00
if store and store.lower() == "local":
2025-11-30 11:39:04 -08:00
try:
2025-12-11 12:47:30 -08:00
from config import get_local_storage_path
db_root = get_local_storage_path(config)
if db_root:
2025-12-11 19:04:02 -08:00
with API_folder_store(Path(db_root)) as db:
2025-12-11 12:47:30 -08:00
file_hash = db.get_file_hash(media_path)
if file_hash:
tags = db.get_tags(file_hash) or []
metadata = db.get_metadata(file_hash) or {}
url = metadata.get("url") or []
f_hash = metadata.get("hash") or file_hash
if tags or url or f_hash:
return None, f_hash, tags, url
except Exception:
pass
2025-11-30 11:39:04 -08:00
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
sidecar_path = find_sidecar(media_path)
if sidecar_path and sidecar_path.exists():
h, t, u = read_sidecar(sidecar_path)
return sidecar_path, h, t or [], u or []
except Exception:
pass
return None, None, [], []
@staticmethod
def _resolve_file_hash(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
fallback_hash: Optional[str],
) -> Optional[str]:
if pipe_obj.hash and pipe_obj.hash != "unknown":
return pipe_obj.hash
if fallback_hash:
return fallback_hash
if isinstance(result, dict):
candidate = result.get('hash')
if candidate:
return str(candidate)
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
return sha256_file(media_path)
except Exception:
return None
@staticmethod
def _resolve_media_kind(path: Path) -> str:
# Reusing logic
suffix = path.suffix.lower()
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}:
return 'audio'
if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
return 'video'
if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}:
return 'image'
if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}:
return 'document'
return 'other'
@staticmethod
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: List[str],
url: List[str],
f_hash: Optional[str],
relationships: Any,
duration: Any,
media_kind: str,
):
payload = {
'hash': f_hash,
'url': url,
'relationships': relationships or [],
'duration': duration,
'size': None,
'ext': dest_path.suffix.lower(),
'media_type': media_kind,
'media_kind': media_kind,
}
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
payload['size'] = dest_path.stat().st_size
except OSError:
payload['size'] = None
2025-12-11 19:04:02 -08:00
with API_folder_store(library_root) as db:
2025-11-25 20:09:33 -08:00
try:
2025-12-11 12:47:30 -08:00
db.save_file_info(dest_path, payload, tags)
except Exception as exc:
log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr)
@staticmethod
def _copy_sidecars(source_path: Path, target_path: Path):
possible_sidecars = [
source_path.with_suffix(source_path.suffix + ".json"),
source_path.with_name(source_path.name + ".tag"),
source_path.with_name(source_path.name + ".metadata"),
source_path.with_name(source_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(source_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(sc), dest_sidecar)
except Exception:
pass
2025-12-07 00:21:30 -08:00
2025-12-11 12:47:30 -08:00
@staticmethod
def _cleanup_after_success(media_path: Path, delete_source: bool):
if not delete_source:
return
# Check if it's a temp file that should always be deleted
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
if delete_source or is_temp_merge:
log(f"Deleting source file...", file=sys.stderr)
try:
media_path.unlink()
Add_File._cleanup_sidecar_files(media_path)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
@staticmethod
def _cleanup_sidecar_files(media_path: Path):
targets = [
media_path.parent / (media_path.name + '.metadata'),
media_path.parent / (media_path.name + '.notes'),
media_path.parent / (media_path.name + '.tag'),
]
for target in targets:
try:
if target.exists():
target.unlink()
except Exception:
pass
# Create and register the cmdlet
CMDLET = Add_File()