Files
Medios-Macina/cmdlet/add_file.py
Nose c019c00aed
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
df
2025-12-29 17:05:03 -08:00

3267 lines
135 KiB
Python

from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Tuple, List
from pathlib import Path
import sys
import shutil
import tempfile
import re
from urllib.parse import urlsplit, parse_qs
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug
from SYS.pipeline_progress import PipelineProgress
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
from Store import Store
from . import _shared as sh
from result_table import ResultTable
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
SharedArgs = sh.SharedArgs
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
merge_sequences = sh.merge_sequences
extract_relationships = sh.extract_relationships
extract_duration = sh.extract_duration
coerce_to_pipe_object = sh.coerce_to_pipe_object
collapse_namespace_tag = sh.collapse_namespace_tag
from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store
from SYS.utils import sha256_file, unique_path
from metadata import write_metadata
# Canonical supported filetypes for all stores/cmdlets
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
class Add_File(Cmdlet):
"""Add file into the DB"""
def __init__(self) -> None:
"""Initialize add-file cmdlet."""
super().__init__(
name="add-file",
summary="Upload a media file to specified location (Hydrus, file provider, or local directory).",
usage="add-file (-path <filepath> | <piped>) (-storage <location> | -provider <fileprovider>) [-delete]",
arg=[
SharedArgs.PATH,
SharedArgs.STORE,
CmdletArg(
name="provider",
type="string",
required=False,
description="File hosting provider (e.g., 0x0, file.io, internetarchive)",
alias="prov",
),
CmdletArg(
name="room",
type="string",
required=False,
description="Matrix room_id (when -provider matrix). If omitted, a room picker table is shown.",
alias="room_id",
),
CmdletArg(
name="delete",
type="flag",
required=False,
description="Delete file after successful upload",
alias="del",
),
],
detail=[
"- Storage location options (use -storage):",
" hydrus: Upload to Hydrus database with metadata tagging",
" local: Copy file to local directory",
" <path>: Copy file to specified directory",
"- File provider options (use -provider):",
" 0x0: Upload to 0x0.st for temporary hosting",
" file.io: Upload to file.io for temporary hosting",
" matrix: Upload to a Matrix room (requires Matrix config)",
" internetarchive: Upload to archive.org (optional tag: ia:<identifier> to upload into an existing item)",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
parsed = parse_cmdlet_args(args, self)
progress = PipelineProgress(ctx)
path_arg = parsed.get("path")
location = parsed.get("store")
provider_name = parsed.get("provider")
provider_room = parsed.get("room")
delete_after = parsed.get("delete", False)
# Convenience: when piping a file into add-file, allow `-path <existing dir>`
# to act as the destination export directory.
# Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop"
if path_arg and not location and not provider_name:
try:
candidate_dir = Path(str(path_arg))
if candidate_dir.exists() and candidate_dir.is_dir():
piped_items = result if isinstance(result, list) else [result]
has_local_source = False
for it in piped_items:
try:
po = coerce_to_pipe_object(it, None)
src = str(getattr(po, "path", "") or "").strip()
if not src:
continue
if src.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
continue
if Path(src).is_file():
has_local_source = True
break
except Exception:
continue
if has_local_source:
debug(
f"[add-file] Treating -path directory as destination: {candidate_dir}"
)
location = str(candidate_dir)
path_arg = None
except Exception:
pass
stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
# Directory-mode selector:
# - First pass: `add-file -store X -path <DIR>` should ONLY show a selectable table.
# - Second pass (triggered by @ selection expansion): re-run add-file with `-path file1,file2,...`
# and actually ingest/copy.
dir_scan_mode = False
dir_scan_results: Optional[List[Dict[str, Any]]] = None
explicit_path_list_results: Optional[List[Dict[str, Any]]] = None
if path_arg and location and not provider_name:
# Support comma-separated path lists: -path "file1,file2,file3"
# This is the mechanism used by @N expansion for directory tables.
try:
path_text = str(path_arg)
except Exception:
path_text = ""
if "," in path_text:
parts = [p.strip().strip('"') for p in path_text.split(",")]
parts = [p for p in parts if p]
batch: List[Dict[str, Any]] = []
for p in parts:
try:
file_path = Path(p)
except Exception:
continue
if not file_path.exists() or not file_path.is_file():
continue
ext = file_path.suffix.lower()
if ext not in SUPPORTED_MEDIA_EXTENSIONS:
continue
try:
hv = sha256_file(file_path)
except Exception:
continue
try:
size = file_path.stat().st_size
except Exception:
size = 0
batch.append(
{
"path": file_path,
"name": file_path.name,
"hash": hv,
"size": size,
"ext": ext,
}
)
if batch:
explicit_path_list_results = batch
# Clear path_arg so add-file doesn't treat it as a single path.
path_arg = None
else:
# Directory scan (selector table, no ingest yet)
try:
candidate_dir = Path(str(path_arg))
if candidate_dir.exists() and candidate_dir.is_dir():
dir_scan_mode = True
debug(f"[add-file] Scanning directory for batch add: {candidate_dir}")
dir_scan_results = Add_File._scan_directory_for_files(candidate_dir)
if dir_scan_results:
debug(
f"[add-file] Found {len(dir_scan_results)} supported files in directory"
)
# Clear path_arg so it doesn't trigger single-item mode.
path_arg = None
except Exception as exc:
debug(f"[add-file] Directory scan failed: {exc}")
# Determine if -store targets a registered backend (vs a filesystem export path).
is_storage_backend_location = False
if location:
try:
store_probe = Store(config)
is_storage_backend_location = location in (store_probe.list_backends() or [])
except Exception:
is_storage_backend_location = False
# Decide which items to process.
# - If directory scan was performed, use those results
# - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item.
# - Otherwise, if piped input is a list, ingest each item.
if explicit_path_list_results:
items_to_process = explicit_path_list_results
debug(f"[add-file] Using {len(items_to_process)} files from -path list")
elif dir_scan_results:
items_to_process = dir_scan_results
debug(f"[add-file] Using {len(items_to_process)} files from directory scan")
elif path_arg:
items_to_process: List[Any] = [result]
elif isinstance(result, list) and result:
items_to_process = list(result)
else:
items_to_process = [result]
# Minimal step-based progress for single-item runs.
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
use_steps = False
steps_started = False
step2_done = False
try:
ui, _ = progress.ui_and_pipe_index()
use_steps = (ui is not None) and (len(items_to_process) == 1)
except Exception:
use_steps = False
debug(f"[add-file] INPUT result type={type(result).__name__}")
if isinstance(result, list):
debug(f"[add-file] INPUT result is list with {len(result)} items")
debug(
f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}"
)
# If this invocation was directory selector mode, show a selectable table and stop.
# The user then runs @N (optionally piped), which replays add-file with selected paths.
if dir_scan_mode:
try:
from result_table import ResultTable
from pathlib import Path as _Path
# Build base args to replay: keep everything except the directory -path.
base_args: List[str] = []
skip_next = False
for tok in list(args or []):
if skip_next:
skip_next = False
continue
t = str(tok)
if t in {"-path", "--path", "-p"}:
skip_next = True
continue
base_args.append(t)
table = ResultTable(title="Files in Directory", preserve_order=True)
table.set_table("add-file.directory")
table.set_source_command("add-file", base_args)
rows: List[Dict[str, Any]] = []
for file_info in dir_scan_results or []:
p = file_info.get("path")
hp = str(file_info.get("hash") or "")
name = str(file_info.get("name") or "unknown")
try:
clean_title = _Path(name).stem
except Exception:
clean_title = name
ext = str(file_info.get("ext") or "").lstrip(".")
size = file_info.get("size", 0)
row_item = {
"path": str(p) if p is not None else "",
"hash": hp,
"title": clean_title,
"columns": [
("Title", clean_title),
("Hash", hp),
("Size", size),
("Ext", ext),
],
# Used by @N replay (CLI will combine selected rows into -path file1,file2,...)
"_selection_args": ["-path", str(p) if p is not None else ""],
}
rows.append(row_item)
table.add_result(row_item)
ctx.set_current_stage_table(table)
ctx.set_last_result_table(table, rows, subject={"table": "add-file.directory"})
log(f"✓ Found {len(rows)} files. Select with @N (e.g., @1 or @1-3).")
return 0
except Exception as exc:
debug(f"[add-file] Failed to display directory scan result table: {exc}")
collected_payloads: List[Dict[str, Any]] = []
pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {}
pending_url_associations: Dict[str, List[tuple[str, List[str]]]] = {}
successes = 0
failures = 0
# When add-file -store is the last stage, always show a final search-store table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = (
bool(is_last_stage) and bool(is_storage_backend_location) and bool(location)
)
auto_search_store_after_add = False
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
defer_url_association = (
bool(is_storage_backend_location) and bool(location) and len(items_to_process) > 1
)
# If we are going to persist results (-store / -provider) and the piped input contains
# URL download targets (e.g. playlist rows), preflight URL duplicates once up-front.
# IMPORTANT: Do not treat a *source URL* on an already-local file (e.g. screen-shot)
# as a download target; that would trigger yt-dlp preflights for non-yt-dlp URLs.
skip_url_downloads: set[str] = set()
download_mode_hint: Optional[str] = None
forced_ytdl_format: Optional[str] = None
if (provider_name or location) and isinstance(items_to_process, list) and items_to_process:
url_candidates: List[str] = []
for it in items_to_process:
try:
po_probe = coerce_to_pipe_object(it, path_arg)
except Exception:
continue
# If the piped item already points at a local file, we are *ingesting* it,
# not downloading it. Skip URL-preflight and yt-dlp probing for those.
try:
po_path = getattr(po_probe, "path", None)
po_path_s = str(po_path or "").strip()
if po_path_s and not po_path_s.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
continue
except Exception:
pass
try:
for u in self._get_url(it, po_probe) or []:
s = str(u or "").strip()
if not s:
continue
if s.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
url_candidates.append(s)
except Exception:
continue
# Only meaningful when targeting a registered backend store.
if url_candidates and is_storage_backend_location and location:
# De-dupe in-order to keep logs stable.
seen: set[str] = set()
unique_urls: List[str] = []
for u in url_candidates:
if u in seen:
continue
seen.add(u)
unique_urls.append(u)
try:
skip_url_downloads = self._preflight_url_duplicates_bulk(unique_urls, config)
except Exception:
skip_url_downloads = set()
# Batch-level format preflight:
# - If the sample URL only has one available format, force it for the batch.
# - If the sample URL appears audio-only (no video codecs), prefer audio mode.
try:
from cmdlet.download_media import is_url_supported_by_ytdlp, list_formats
from tool.ytdlp import YtDlpTool
sample_url = unique_urls[0] if unique_urls else None
if sample_url and is_url_supported_by_ytdlp(str(sample_url)):
cf = None
try:
cookie_path = YtDlpTool(config).resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cf = str(cookie_path)
except Exception:
cf = None
fmts = list_formats(
str(sample_url),
no_playlist=False,
playlist_items=None,
cookiefile=cf,
)
if isinstance(fmts, list) and fmts:
has_video = False
try:
for f in fmts:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
except Exception:
has_video = False
download_mode_hint = "video" if has_video else "audio"
if len(fmts) == 1 and isinstance(fmts[0], dict):
fid = str(fmts[0].get("format_id") or "").strip()
if fid:
forced_ytdl_format = fid
except Exception:
download_mode_hint = download_mode_hint
forced_ytdl_format = forced_ytdl_format
processed_url_items: set[str] = set()
for item in items_to_process:
pipe_obj = coerce_to_pipe_object(item, path_arg)
temp_dir_to_cleanup: Optional[Path] = None
delete_after_item = delete_after
try:
media_path_or_url, file_hash = self._resolve_source(
item, path_arg, pipe_obj, config
)
debug(
f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}..."
)
if not media_path_or_url:
failures += 1
continue
is_url_target = isinstance(media_path_or_url, str) and str(
media_path_or_url
).lower().startswith(("http://", "https://", "magnet:", "torrent:"))
if use_steps and (not steps_started) and (not is_url_target):
progress.begin_steps(3)
progress.step("resolving source")
steps_started = True
# Update pipe_obj with resolved path
pipe_obj.path = str(media_path_or_url)
# URL targets: prefer provider-aware download for OpenLibrary selections.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
table = None
full_metadata = None
if isinstance(pipe_obj.extra, dict):
table = pipe_obj.extra.get("table")
full_metadata = pipe_obj.extra.get("full_metadata")
is_openlibrary = (str(table or "").lower() == "openlibrary") or (
"openlibrary.org/books/" in media_path_or_url.lower()
)
if is_openlibrary:
# Enrich tags from OpenLibrary metadata so the stored file has book tags (author/pages/etc).
try:
from Provider.openlibrary import OpenLibrary as _OpenLibrary
olid = None
archive_id = None
if isinstance(full_metadata, dict):
olid = full_metadata.get("openlibrary_id") or full_metadata.get(
"openlibrary"
)
archive_id = full_metadata.get("archive_id")
if not olid:
import re
m = re.search(
r"/books/(OL\d+M)", str(media_path_or_url), flags=re.IGNORECASE
)
if m:
olid = m.group(1)
scraped_tags: List[str] = []
if olid:
scraped_tags.extend(
_OpenLibrary.scrape_openlibrary_metadata(str(olid)) or []
)
if archive_id:
scraped_tags.append(f"internet_archive:{archive_id}")
if scraped_tags:
existing = list(pipe_obj.tag or [])
pipe_obj.tag = merge_sequences(
existing, scraped_tags, case_sensitive=False
)
except Exception:
pass
from ProviderCore.registry import get_search_provider
from ProviderCore.base import SearchResult
provider = get_search_provider("openlibrary", config)
if provider is None:
log("[add-file] OpenLibrary provider not available", file=sys.stderr)
failures += 1
continue
# Lean, non-debug status output (the ResultTable search follows after ingest).
try:
title_text = str(getattr(pipe_obj, "title", "") or "").strip()
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
tags_list = list(getattr(pipe_obj, "tag", None) or [])
tags_text = ", ".join(
str(t).strip() for t in tags_list if str(t).strip()
)
log(f"Title: {title_text or 'Unknown'}")
log(f"Tags: {tags_text}")
except Exception:
pass
temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
# Wire OpenLibrary download progress into pipeline Live UI (no tqdm spam).
def _ol_progress(
kind: str, completed: int, total: Optional[int], label: str
) -> None:
try:
if kind == "pages" and total:
progress.set_status(f"downloading pages {completed}/{total}")
progress.set_percent(
int(round((completed / max(1, total)) * 100.0))
)
elif kind == "bytes" and total:
progress.set_status(
f"downloading {label} {completed}/{total} bytes"
)
progress.set_percent(
int(round((completed / max(1, total)) * 100.0))
)
else:
progress.set_status("downloading")
except Exception:
return
try:
progress.set_percent(0)
progress.set_status("downloading openlibrary")
except Exception:
pass
sr = SearchResult(
table="openlibrary",
title=str(getattr(pipe_obj, "title", None) or "Unknown"),
path=str(media_path_or_url),
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
)
downloaded = provider.download(
sr, temp_dir_to_cleanup, progress_callback=_ol_progress
)
if downloaded is None:
log("[add-file] OpenLibrary download failed", file=sys.stderr)
failures += 1
continue
downloaded_path = Path(downloaded)
if downloaded_path.exists() and downloaded_path.is_dir():
log(
"[add-file] OpenLibrary download produced a directory (PDF conversion failed). Cannot ingest.",
file=sys.stderr,
)
failures += 1
continue
media_path_or_url = str(downloaded_path)
pipe_obj.path = str(downloaded_path)
delete_after_item = True
try:
if ui is not None:
ui.set_pipe_percent(int(pipe_idx), 100)
ui.set_pipe_status_text(int(pipe_idx), "downloaded")
except Exception:
pass
# For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
# Hydrus file URLs are direct file downloads and may require Hydrus auth headers.
# If the user provided a destination (-provider or -store), download now and continue.
if (
(provider_name or location)
and isinstance(media_path_or_url, str)
and media_path_or_url.lower().startswith(("http://", "https://"))
):
downloaded = self._try_download_hydrus_file_url(
file_url=str(media_path_or_url),
pipe_obj=pipe_obj,
config=config,
)
if downloaded is not None:
downloaded_path, downloaded_temp_dir = downloaded
temp_dir_to_cleanup = downloaded_temp_dir
media_path_or_url = str(downloaded_path)
pipe_obj.path = str(downloaded_path)
pipe_obj.is_temp = True
delete_after_item = True
# If it's still a URL target, fall back to the legacy delegate.
if isinstance(
media_path_or_url, str
) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
# If the user provided a destination (-store / -provider), download here and then
# continue normal add-file logic so the downloaded file is actually ingested.
url_str = str(media_path_or_url)
if provider_name or location:
# Avoid re-processing the same URL multiple times in a batch.
if url_str in processed_url_items:
successes += 1
continue
processed_url_items.add(url_str)
# If bulk preflight found this URL already stored, skip downloading.
if url_str in skip_url_downloads:
log(
f"Skipping download (already stored): {url_str}",
file=sys.stderr,
)
successes += 1
continue
downloaded_pipe_dicts = (
self._download_streaming_url_as_pipe_objects(
url_str,
config,
mode_hint=download_mode_hint,
ytdl_format_hint=forced_ytdl_format,
)
)
if not downloaded_pipe_dicts:
failures += 1
continue
# Merge original tags/notes/relationships into each downloaded item and ingest.
for dl_item in downloaded_pipe_dicts:
try:
if isinstance(dl_item, dict):
# Merge tags
base_tags = list(getattr(pipe_obj, "tag", None) or [])
if base_tags:
dl_tags = list(dl_item.get("tag") or [])
dl_item["tag"] = merge_sequences(
dl_tags, base_tags, case_sensitive=False
)
# Carry notes/relationships forward when present on the original.
base_notes = getattr(pipe_obj, "notes", None)
if base_notes and ("notes" not in dl_item):
dl_item["notes"] = base_notes
base_rels = getattr(pipe_obj, "relationships", None)
if base_rels and ("relationships" not in dl_item):
dl_item["relationships"] = base_rels
except Exception:
pass
dl_pipe_obj = coerce_to_pipe_object(dl_item, None)
try:
dl_media_path = Path(
str(getattr(dl_pipe_obj, "path", "") or "")
)
except Exception:
dl_media_path = None
if dl_media_path is None or not self._validate_source(
dl_media_path
):
failures += 1
continue
if provider_name:
if str(provider_name).strip().lower() == "matrix":
room_id = None
if provider_room:
room_id = str(provider_room).strip()
if not room_id:
try:
matrix_conf = (
config.get("provider", {}).get("matrix", {})
if isinstance(config, dict)
else {}
)
room_id = (
str(
matrix_conf.get("room_id") or ""
).strip()
or None
)
except Exception:
room_id = None
if not room_id:
pending = [
{
"path": str(dl_media_path),
"pipe_obj": dl_pipe_obj,
"delete_after": bool(delete_after_item),
}
]
return self._matrix_prompt_room_selection(
pending, config, list(args)
)
code = self._handle_matrix_upload(
dl_media_path,
dl_pipe_obj,
config,
delete_after_item,
room_id=room_id,
)
else:
code = self._handle_provider_upload(
dl_media_path,
provider_name,
dl_pipe_obj,
config,
delete_after_item,
)
if code == 0:
successes += 1
else:
failures += 1
continue
if location:
try:
store = Store(config)
backends = store.list_backends()
if location in backends:
code = self._handle_storage_backend(
dl_item,
dl_media_path,
location,
dl_pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
else:
code = self._handle_local_export(
dl_media_path,
location,
dl_pipe_obj,
config,
delete_after_item,
)
except Exception as exc:
debug(
f"[add-file] ERROR: Failed to resolve location: {exc}"
)
log(f"Invalid location: {location}", file=sys.stderr)
failures += 1
continue
if code == 0:
successes += 1
else:
failures += 1
continue
# Finished processing all downloaded items for this URL.
continue
# No destination specified: keep legacy behavior (download-media only).
code = self._delegate_to_download_media(
item, url_str, location, provider_name, args, config
)
if code == 0:
successes += 1
else:
failures += 1
continue
media_path = (
Path(media_path_or_url)
if isinstance(media_path_or_url, str)
else media_path_or_url
)
if not self._validate_source(media_path):
failures += 1
continue
if provider_name:
# Matrix provider can prompt for a room selection if one is not configured.
if str(provider_name).strip().lower() == "matrix":
room_id = None
if provider_room:
room_id = str(provider_room).strip()
if not room_id:
try:
matrix_conf = (
config.get("provider", {}).get("matrix", {})
if isinstance(config, dict)
else {}
)
room_id = str(matrix_conf.get("room_id") or "").strip() or None
except Exception:
room_id = None
if not room_id:
pending = [
{
"path": str(media_path),
"pipe_obj": pipe_obj,
"delete_after": bool(delete_after_item),
}
]
return self._matrix_prompt_room_selection(pending, config, list(args))
code = self._handle_matrix_upload(
media_path, pipe_obj, config, delete_after_item, room_id=room_id
)
else:
code = self._handle_provider_upload(
media_path, provider_name, pipe_obj, config, delete_after_item
)
if code == 0:
successes += 1
else:
failures += 1
continue
if location:
try:
store = Store(config)
backends = store.list_backends()
if location in backends:
code = self._handle_storage_backend(
item,
media_path,
location,
pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
else:
code = self._handle_local_export(
media_path, location, pipe_obj, config, delete_after_item
)
except Exception as exc:
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
failures += 1
continue
if use_steps and steps_started and (not step2_done):
progress.step("writing destination")
step2_done = True
if code == 0:
successes += 1
else:
failures += 1
continue
log("No destination specified", file=sys.stderr)
failures += 1
finally:
if temp_dir_to_cleanup is not None:
try:
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
except Exception:
pass
# Apply deferred url associations (bulk) before showing the final store table.
if pending_url_associations:
try:
Add_File._apply_pending_url_associations(pending_url_associations, config)
except Exception:
pass
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
try:
hashes: List[str] = []
for payload in collected_payloads:
h = payload.get("hash") if isinstance(payload, dict) else None
if isinstance(h, str) and len(h) == 64:
hashes.append(h)
# Deduplicate while preserving order
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
)
if not refreshed_items:
# Fallback: at least show the add-file payloads as a display overlay
from result_table import ResultTable
table = ResultTable("Result")
for payload in collected_payloads:
table.add_result(payload)
ctx.set_last_result_table_overlay(
table, collected_payloads, subject=collected_payloads
)
except Exception:
pass
# Persist relationships into backend DB/API.
if pending_relationship_pairs:
try:
Add_File._apply_pending_relationships(pending_relationship_pairs, config)
except Exception:
pass
if use_steps and steps_started:
progress.step("finalized")
if successes > 0:
return 0
return 1
@staticmethod
def _try_emit_search_store_by_hashes(
*, store: str, hash_values: List[str], config: Dict[str, Any]
) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
debug(f'[add-file] Refresh: search-store -store {store} "{query}"')
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
stage_index=0,
total_stages=1,
pipe_index=0,
worker_id=getattr(prev_ctx, "worker_id", None),
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(
table, items, subject={"store": store, "hash": hashes}
)
except Exception:
pass
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
)
return None
@staticmethod
def _parse_relationship_tag_king_alts(tag_value: str) -> tuple[Optional[str], List[str]]:
"""Parse a relationship tag into (king_hash, alt_hashes).
Supported formats:
- New: relationship: <KING_HASH>,<ALT_HASH>,<ALT_HASH>
- Old: relationship: hash(king)<KING_HASH>,hash(alt)<ALT_HASH>...
relationship: hash(king)KING,hash(alt)ALT
For the local DB we treat the first hash listed as the king.
"""
if not isinstance(tag_value, str):
return None, []
raw = tag_value.strip()
if not raw:
return None, []
# Normalize input: ensure we only look at the RHS after "relationship:"
rhs = raw
if ":" in raw:
prefix, rest = raw.split(":", 1)
if prefix.strip().lower() == "relationship":
rhs = rest.strip()
# Old typed format: hash(type)HEX
typed = re.findall(r"hash\((\w+)\)<?([a-fA-F0-9]{64})>?", rhs)
if typed:
king: Optional[str] = None
alts: List[str] = []
for rel_type, h in typed:
h_norm = str(h).strip().lower()
if rel_type.strip().lower() == "king":
king = h_norm
elif rel_type.strip().lower() in {"alt", "related"}:
alts.append(h_norm)
# If the tag omitted king but had hashes, fall back to first hash.
if not king:
all_hashes = [str(h).strip().lower() for _, h in typed]
king = all_hashes[0] if all_hashes else None
alts = [h for h in all_hashes[1:] if h]
# Dedupe alts while preserving order
seen: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen or seen.add(h))]
if king and len(king) == 64:
return king, [h for h in alts if h != king]
return None, []
# New format: a simple list of hashes, first is king.
hashes = re.findall(r"\b[a-fA-F0-9]{64}\b", rhs)
hashes = [h.strip().lower() for h in hashes if isinstance(h, str)]
if not hashes:
return None, []
king = hashes[0]
alts = hashes[1:]
seen2: set[str] = set()
alts = [h for h in alts if h and len(h) == 64 and not (h in seen2 or seen2.add(h))]
return king, [h for h in alts if h != king]
@staticmethod
def _parse_relationships_king_alts(
relationships: Dict[str, Any],
) -> tuple[Optional[str], List[str]]:
"""Parse a PipeObject.relationships dict into (king_hash, alt_hashes).
Supported shapes:
- {"king": [KING], "alt": [ALT1, ALT2]}
- {"king": KING, "alt": ALT} (strings)
- Also treats "related" hashes as alts for persistence purposes.
"""
if not isinstance(relationships, dict) or not relationships:
return None, []
def _first_hash(val: Any) -> Optional[str]:
if isinstance(val, str):
h = val.strip().lower()
return h if len(h) == 64 else None
if isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
return h
return None
def _many_hashes(val: Any) -> List[str]:
out: List[str] = []
if isinstance(val, str):
h = val.strip().lower()
if len(h) == 64:
out.append(h)
elif isinstance(val, list):
for item in val:
if isinstance(item, str):
h = item.strip().lower()
if len(h) == 64:
out.append(h)
return out
king = _first_hash(relationships.get("king"))
if not king:
return None, []
alts = _many_hashes(relationships.get("alt"))
alts.extend(_many_hashes(relationships.get("related")))
seen: set[str] = set()
alts = [h for h in alts if h and h != king and not (h in seen or seen.add(h))]
return king, alts
@staticmethod
def _apply_pending_relationships(
pending: Dict[str, set[tuple[str, str]]], config: Dict[str, Any]
) -> None:
"""Persist relationships into the appropriate backend DB/API.
- Folder stores: write to the per-store SQLite DB (directional alt->king).
- Hydrus stores: call Hydrus relationship API.
"""
if not pending:
return
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in pending.items():
if not pairs:
continue
try:
backend = store[str(backend_name)]
except Exception:
continue
backend_type = type(backend).__name__.lower()
# Folder-backed local DB
location_fn = getattr(backend, "location", None)
is_folder = type(backend).__name__ == "Folder" and callable(location_fn)
if is_folder and location_fn is not None:
try:
root = Path(str(location_fn())).expanduser()
with API_folder_store(root) as db:
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
# Hash-first store DB write; skips if either hash isn't in this store DB.
try:
db.set_relationship_by_hash(
str(alt_hash), str(king_hash), "alt", bidirectional=False
)
except Exception:
continue
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Hydrus
if "hydrus" in backend_type or hasattr(backend, "_client"):
client: Any = getattr(backend, "_client", None)
# Do not fall back to a global/default Hydrus client here; relationships must not be cross-store.
if client is None or not hasattr(client, "set_relationship"):
continue
def _hash_exists(hash_hex: str) -> bool:
try:
if not hasattr(client, "fetch_file_metadata"):
return False
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
include_notes=False,
)
meta = payload.get("metadata") if isinstance(payload, dict) else None
return bool(isinstance(meta, list) and meta)
except Exception:
return False
processed_pairs: set[tuple[str, str]] = set()
for alt_hash, king_hash in sorted(pairs):
if not alt_hash or not king_hash or alt_hash == king_hash:
continue
if (alt_hash, king_hash) in processed_pairs:
continue
try:
alt_norm = str(alt_hash).strip().lower()
king_norm = str(king_hash).strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64:
continue
if not _hash_exists(alt_norm) or not _hash_exists(king_norm):
continue
client.set_relationship(alt_norm, king_norm, "alt")
processed_pairs.add((alt_hash, king_hash))
except Exception:
pass
continue
# Other backends: no-op
_ = backend_type
@staticmethod
def _resolve_source(
result: Any,
path_arg: Optional[str],
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[Optional[Path | str], Optional[str]]:
"""Resolve the source file path from args or pipeline result.
Returns (media_path_or_url, file_hash)
where media_path_or_url can be a Path object or a URL string.
"""
# PRIORITY 1a: Try hash+path from directory scan result (has 'path' and 'hash' keys)
if isinstance(result, dict):
result_path = result.get("path")
result_hash = result.get("hash")
# Check if this looks like a directory scan result (has path and hash but no 'store' key)
result_store = result.get("store")
if result_path and result_hash and not result_store:
try:
media_path = (
Path(result_path) if not isinstance(result_path, Path) else result_path
)
if media_path.exists() and media_path.is_file():
debug(f"[add-file] Using path+hash from directory scan: {media_path}")
pipe_obj.path = str(media_path)
return media_path, str(result_hash)
except Exception as exc:
debug(f"[add-file] Failed to use directory scan result: {exc}")
# PRIORITY 1b: Try hash+store from result dict (most reliable for @N selections)
if isinstance(result, dict):
result_hash = result.get("hash")
result_store = result.get("store")
if result_hash and result_store:
debug(
f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}"
)
try:
store = Store(config)
if result_store in store.list_backends():
backend = store[result_store]
media_path = backend.get_file(result_hash)
if isinstance(media_path, Path) and media_path.exists():
pipe_obj.path = str(media_path)
return media_path, str(result_hash)
if isinstance(media_path, str) and media_path.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
pipe_obj.path = media_path
return media_path, str(result_hash)
except Exception as exc:
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
# PRIORITY 2: Try explicit path argument
if path_arg:
media_path = Path(path_arg)
pipe_obj.path = str(media_path)
debug(f"[add-file] Using explicit path argument: {media_path}")
return media_path, None
# PRIORITY 3: Try from pipe_obj.path (check file first before URL)
pipe_path = getattr(pipe_obj, "path", None)
if pipe_path:
pipe_path_str = str(pipe_path)
debug(f"Resolved pipe_path: {pipe_path_str}")
if pipe_path_str.startswith("hydrus:"):
file_hash = pipe_path_str.split(":", 1)[1]
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(
file_hash, config, store_name=str(store_name).strip() if store_name else None
)
return media_path, file_hash if success else None
if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return pipe_path_str, None
return Path(pipe_path_str), None
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
pipe_url = getattr(pipe_obj, "url", None)
if pipe_url and isinstance(pipe_url, str):
# Check if it's a URL
if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in pipe_obj.url: {pipe_url}")
return pipe_url, None
# Try from hydrus hash in pipe_obj.extra or hash
hydrus_hash = None
if isinstance(pipe_obj.extra, dict):
hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash")
hydrus_hash = hydrus_hash or pipe_obj.hash
if hydrus_hash and hydrus_hash != "unknown":
store_name = getattr(pipe_obj, "store", None)
if not store_name and isinstance(pipe_obj.extra, dict):
store_name = pipe_obj.extra.get("store")
media_path, success = Add_File._fetch_hydrus_path(
str(hydrus_hash), config, store_name=str(store_name).strip() if store_name else None
)
return media_path, str(hydrus_hash) if success else None
# Try from result (if it's a string path or URL)
if isinstance(result, str):
debug(f"Checking result string: {result}")
# Check if result is a URL before treating as file path
if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result string: {result}")
return result, None # Return URL string directly
media_path = Path(result)
pipe_obj.path = str(media_path)
return media_path, None
# Try from result if it's a list (pipeline emits multiple results)
if isinstance(result, list) and result:
first_item = result[0]
# If the first item is a string, it's either a URL or a file path
if isinstance(first_item, str):
debug(f"Checking result list[0]: {first_item}")
if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
debug(f"Detected URL in result list: {first_item}")
return first_item, None # Return URL string directly
media_path = Path(first_item)
pipe_obj.path = str(media_path)
return media_path, None
# If the first item is a dict, interpret it as a PipeObject-style result
if isinstance(first_item, dict):
# Look for path or path-like keys
path_candidate = (
first_item.get("path") or first_item.get("filepath") or first_item.get("file")
)
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
paths_val = first_item.get("paths")
if not path_candidate and isinstance(paths_val, (list, tuple)) and paths_val:
path_candidate = paths_val[0]
if path_candidate:
debug(f"Resolved path from result dict: {path_candidate}")
try:
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, first_item.get("hash")
except Exception:
# Fallback to returning string if not a path
return str(path_candidate), first_item.get("hash")
# If first item is a PipeObject object
try:
# models.PipeObject is an actual class; check attribute presence
import models as _models
if isinstance(first_item, _models.PipeObject):
path_candidate = getattr(first_item, "path", None)
if path_candidate:
debug(f"Resolved path from PipeObject: {path_candidate}")
media_path = Path(path_candidate)
pipe_obj.path = str(media_path)
return media_path, getattr(first_item, "hash", None)
except Exception:
pass
debug(
f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}"
)
log("File path could not be resolved")
return None, None
@staticmethod
def _scan_directory_for_files(directory: Path) -> List[Dict[str, Any]]:
"""Scan a directory for supported media files and return list of file info dicts.
Each dict contains:
- path: Path object
- name: filename
- hash: sha256 hash
- size: file size in bytes
- ext: file extension
"""
if not directory.exists() or not directory.is_dir():
return []
files_info: List[Dict[str, Any]] = []
try:
for item in directory.iterdir():
if not item.is_file():
continue
ext = item.suffix.lower()
if ext not in SUPPORTED_MEDIA_EXTENSIONS:
continue
# Compute hash
try:
file_hash = sha256_file(item)
except Exception as exc:
debug(f"Failed to hash {item}: {exc}")
continue
# Get file size
try:
size = item.stat().st_size
except Exception:
size = 0
files_info.append(
{
"path": item,
"name": item.name,
"hash": file_hash,
"size": size,
"ext": ext,
}
)
except Exception as exc:
debug(f"Error scanning directory {directory}: {exc}")
return files_info
@staticmethod
def _fetch_hydrus_path(
file_hash: str,
config: Dict[str, Any],
store_name: Optional[str] = None,
) -> Tuple[Optional[Path], bool]:
"""Fetch the physical path of a file from Hydrus using its hash."""
if not file_hash:
return None, False
try:
client = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_path"):
client = candidate
except Exception:
client = None
if client is None:
log(f"❌ Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return None, False
else:
client = hydrus_wrapper.get_client(config)
if not client:
log("❌ Hydrus client not available", file=sys.stderr)
return None, False
response = client.get_file_path(file_hash)
file_path_str = response.get("path")
if not file_path_str:
log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr)
return None, False
media_path = Path(file_path_str)
if not media_path.exists():
log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr)
return None, False
log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr)
return media_path, True
except Exception as exc:
log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr)
return None, False
@staticmethod
def _validate_source(media_path: Optional[Path]) -> bool:
"""Validate that the source file exists and is supported."""
if media_path is None:
return False
target_str = str(media_path)
# If it's a URL target, we skip file existence checks
if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
return True
if not media_path.exists() or not media_path.is_file():
log(f"File not found: {media_path}")
return False
# Validate file type
file_extension = media_path.suffix.lower()
if file_extension not in SUPPORTED_MEDIA_EXTENSIONS:
log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr)
return False
return True
@staticmethod
def _is_url_target(media_path: Optional[Path]) -> bool:
"""Check if the target is a URL that needs downloading."""
if media_path and str(media_path).lower().startswith(("http://", "https://")):
return True
return False
@staticmethod
def _sanitize_filename(value: str) -> str:
# Minimal Windows-safe filename sanitization.
text = str(value or "").strip()
if not text:
return "file"
invalid = '<>:"/\\|?*'
text = "".join("_" if (ch in invalid or ord(ch) < 32) else ch for ch in text)
text = re.sub(r"\s+", " ", text).strip(" .")
return text or "file"
@staticmethod
def _parse_hydrus_file_url(file_url: str) -> Optional[str]:
"""Return the sha256 hash from a Hydrus /get_files/file URL, or None."""
try:
split = urlsplit(str(file_url))
if split.scheme.lower() not in {"http", "https"}:
return None
path_lower = (split.path or "").lower()
if "/get_files/file" not in path_lower:
return None
params = parse_qs(split.query or "")
raw = None
if "hash" in params and params["hash"]:
raw = params["hash"][0]
if not raw:
return None
hash_val = str(raw).strip().lower()
if not re.fullmatch(r"[0-9a-f]{64}", hash_val):
return None
return hash_val
except Exception:
return None
def _try_download_hydrus_file_url(
self,
*,
file_url: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Optional[tuple[Path, Path]]:
"""If *file_url* is a Hydrus file URL, download it to temp and return (path, temp_dir)."""
file_hash = self._parse_hydrus_file_url(file_url)
if not file_hash:
return None
# Resolve Hydrus backend for auth.
store_name = str(getattr(pipe_obj, "store", "") or "").strip()
if ":" in store_name:
store_name = store_name.split(":", 1)[-1].strip()
backend = None
try:
store_registry = Store(config)
if store_name and store_registry.is_available(store_name):
candidate = store_registry[store_name]
if type(candidate).__name__.lower() == "hydrusnetwork":
backend = candidate
except Exception:
backend = None
if backend is None:
try:
store_registry = Store(config)
target_prefix = str(file_url).split("/get_files/file", 1)[0].rstrip("/")
for backend_name in store_registry.list_backends():
candidate = store_registry[backend_name]
if type(candidate).__name__.lower() != "hydrusnetwork":
continue
base_url = str(getattr(candidate, "URL", "") or "").rstrip("/")
if base_url and (
target_prefix.lower() == base_url.lower()
or target_prefix.lower().startswith(base_url.lower())
):
backend = candidate
break
except Exception:
backend = None
if backend is None:
debug("[add-file] Hydrus file URL detected but no Hydrus backend matched for auth")
return None
api_key = str(getattr(backend, "API", "") or "").strip()
if not api_key:
debug(
f"[add-file] Hydrus backend '{getattr(backend, 'NAME', '') or store_name}' missing API key"
)
return None
# Best-effort filename from title + ext.
ext = ""
try:
if isinstance(pipe_obj.extra, dict):
ext = str(pipe_obj.extra.get("ext") or "").strip().lstrip(".")
except Exception:
ext = ""
if not ext:
ext = "bin"
title_hint = str(getattr(pipe_obj, "title", "") or "").strip()
base_name = (
self._sanitize_filename(title_hint) if title_hint else f"hydrus_{file_hash[:12]}"
)
temp_dir = Path(tempfile.mkdtemp(prefix="medios_hydrus_"))
destination = unique_path(temp_dir / f"{base_name}.{ext}")
headers = {"Hydrus-Client-API-Access-Key": api_key}
timeout = 60.0
try:
client = getattr(backend, "_client", None)
timeout_val = getattr(client, "timeout", None)
if timeout_val is not None:
timeout = float(timeout_val)
except Exception:
timeout = 60.0
try:
log(
f"[add-file] Downloading Hydrus file via API ({getattr(backend, 'NAME', '') or store_name})",
file=sys.stderr,
)
downloaded_bytes = hydrus_wrapper.download_hydrus_file(
str(file_url), headers, destination, timeout
)
if downloaded_bytes <= 0 and not destination.exists():
return None
return destination, temp_dir
except Exception as exc:
log(f"[add-file] Hydrus download failed: {exc}", file=sys.stderr)
try:
shutil.rmtree(temp_dir, ignore_errors=True)
except Exception:
pass
return None
def _delegate_to_download_media(
self,
result: Any,
url_str: str,
location: Optional[str],
provider_name: Optional[str],
args: Sequence[str],
config: Dict[str, Any],
) -> int:
"""Delegate URL handling to download-media cmdlet."""
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
from cmdlet.download_media import CMDLET as dl_cmdlet
dl_args = list(args) if args else []
# Add the URL to the argument list for download-media
dl_args.insert(0, url_str)
# If result has selection_args (like -item from @N selection), include them
if isinstance(result, dict) and "_selection_args" in result:
selection_args = result["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
else:
extra_val = getattr(result, "extra", None)
if isinstance(extra_val, dict) and "_selection_args" in extra_val:
selection_args = extra_val["_selection_args"]
if selection_args:
dl_args.extend(selection_args)
# download-media doesn't support -storage flag
# It downloads to the configured directory, then add-file will handle storage
# Note: Provider uploads (0x0) are not supported via this path
# Call download-media with the URL in args
return dl_cmdlet.run(None, dl_args, config)
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
from metadata import normalize_urls
# Prefer explicit PipeObject.url if present
urls: List[str] = []
try:
urls = normalize_urls(getattr(pipe_obj, "url", None))
except Exception:
urls = []
# Then check extra.url
if not urls:
try:
if isinstance(pipe_obj.extra, dict):
urls = normalize_urls(pipe_obj.extra.get("url"))
except Exception:
pass
# Then check result dict
if not urls and isinstance(result, dict):
urls = normalize_urls(result.get("url"))
# Finally, try extractor helper
if not urls:
urls = normalize_urls(extract_url_from_result(result))
return urls
@staticmethod
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
try:
rels = pipe_obj.get_relationships()
if rels:
return rels
except Exception:
pass
if isinstance(result, dict) and result.get("relationships"):
return result.get("relationships")
try:
return extract_relationships(result)
except Exception:
return None
@staticmethod
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
def _parse_duration(value: Any) -> Optional[float]:
if value is None:
return None
if isinstance(value, (int, float)):
return float(value) if value > 0 else None
if isinstance(value, str):
s = value.strip()
if not s:
return None
try:
candidate = float(s)
return candidate if candidate > 0 else None
except ValueError:
pass
if ":" in s:
parts = [p.strip() for p in s.split(":") if p.strip()]
if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
nums = [int(p) for p in parts]
if len(nums) == 2:
minutes, seconds = nums
return float(minutes * 60 + seconds)
hours, minutes, seconds = nums
return float(hours * 3600 + minutes * 60 + seconds)
return None
parsed = _parse_duration(getattr(pipe_obj, "duration", None))
if parsed is not None:
return parsed
try:
return _parse_duration(extract_duration(result))
except Exception:
return None
@staticmethod
def _get_note_text(result: Any, pipe_obj: models.PipeObject, note_name: str) -> Optional[str]:
"""Extract a named note text from a piped item.
Supports:
- pipe_obj.extra["notes"][note_name]
- result["notes"][note_name] for dict results
- pipe_obj.extra[note_name] / result[note_name] as fallback
"""
def _normalize(val: Any) -> Optional[str]:
if val is None:
return None
if isinstance(val, bytes):
try:
val = val.decode("utf-8", errors="ignore")
except Exception:
val = str(val)
if isinstance(val, str):
text = val.strip()
return text if text else None
try:
text = str(val).strip()
return text if text else None
except Exception:
return None
note_key = str(note_name or "").strip()
if not note_key:
return None
# Prefer notes dict on PipeObject.extra (common for cmdlet-emitted dicts)
try:
if isinstance(pipe_obj.extra, dict):
notes_val = pipe_obj.extra.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in pipe_obj.extra:
return _normalize(pipe_obj.extra.get(note_key))
except Exception:
pass
# Fallback to raw result dict
if isinstance(result, dict):
try:
notes_val = result.get("notes")
if isinstance(notes_val, dict) and note_key in notes_val:
return _normalize(notes_val.get(note_key))
if note_key in result:
return _normalize(result.get(note_key))
except Exception:
pass
return None
@staticmethod
def _update_pipe_object_destination(
pipe_obj: models.PipeObject,
*,
hash_value: str,
store: str,
path: Optional[str],
tag: List[str],
title: Optional[str],
extra_updates: Optional[Dict[str, Any]] = None,
) -> None:
pipe_obj.hash = hash_value
pipe_obj.store = store
pipe_obj.path = path
pipe_obj.tag = tag
if title:
pipe_obj.title = title
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.update(extra_updates or {})
else:
pipe_obj.extra = dict(extra_updates or {})
@staticmethod
def _emit_pipe_object(pipe_obj: models.PipeObject) -> None:
from result_table import format_result
log(format_result(pipe_obj, title="Result"), file=sys.stderr)
ctx.emit(pipe_obj.to_dict())
ctx.set_current_stage_table(None)
@staticmethod
def _emit_storage_result(
payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True
) -> None:
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
- If this is the last stage (or not in a pipeline), prints a search-store-like table
and sets an overlay table/items for @N selection.
"""
# Emit for downstream commands (no-op if not in a pipeline)
if emit:
ctx.emit(payload)
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if not is_last or not overlay:
return
try:
from result_table import ResultTable
table = ResultTable("Result")
table.add_result(payload)
# Overlay so @1 refers to this add-file result without overwriting search history
ctx.set_last_result_table_overlay(table, [payload], subject=payload)
except Exception:
# If table rendering fails, still keep @ selection items
try:
ctx.set_last_result_items_only([payload])
except Exception:
pass
@staticmethod
def _try_emit_search_store_by_hash(
*, store: str, hash_value: str, config: Dict[str, Any]
) -> Optional[List[Any]]:
"""Run search-store for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
prints tables for it when a display overlay exists. After running search-store,
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
Returns the emitted search-store payload items on success, else None.
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
args = ["-store", str(store), f"hash:{str(hash_value)}"]
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
stage_index=0,
total_stages=1,
pipe_index=0,
worker_id=getattr(prev_ctx, "worker_id", None),
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
if is_last:
try:
table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if table is not None and items:
ctx.set_last_result_table_overlay(
table, items, subject={"store": store, "hash": hash_value}
)
except Exception:
pass
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
)
return None
@staticmethod
def _prepare_metadata(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
) -> Tuple[List[str], List[str], Optional[str], Optional[str]]:
"""
Prepare tags, url, and title for the file.
Returns (tags, url, preferred_title, file_hash)
"""
tags_from_result = list(pipe_obj.tag or [])
if not tags_from_result:
try:
tags_from_result = list(extract_tag_from_result(result) or [])
except Exception:
tags_from_result = []
url_from_result = Add_File._get_url(result, pipe_obj)
preferred_title = pipe_obj.title
if not preferred_title:
for t in tags_from_result:
if str(t).strip().lower().startswith("title:"):
candidate = t.split(":", 1)[1].strip().replace("_", " ").strip()
if candidate:
preferred_title = candidate
break
if not preferred_title:
preferred_title = extract_title_from_result(result)
if preferred_title:
preferred_title = preferred_title.replace("_", " ").strip()
store = getattr(pipe_obj, "store", None)
_, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle(
media_path, store, config
)
def normalize_title_tag(tag: str) -> str:
if str(tag).strip().lower().startswith("title:"):
parts = tag.split(":", 1)
if len(parts) == 2:
value = parts[1].replace("_", " ").strip()
return f"title:{value}"
return tag
tags_from_result_no_title = [
t for t in tags_from_result if not str(t).strip().lower().startswith("title:")
]
sidecar_tags = collapse_namespace_tag(
[normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last"
)
sidecar_tags_filtered = [
t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")
]
merged_tags = merge_sequences(
tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True
)
if preferred_title:
merged_tags.append(f"title:{preferred_title}")
merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False)
file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash)
# Relationships must not be stored as tags.
# If relationship tags exist (legacy sidecar format), capture them into PipeObject.relationships
# and strip them from the final tag list.
relationship_tags = [
t
for t in merged_tags
if isinstance(t, str) and t.strip().lower().startswith("relationship:")
]
if relationship_tags:
try:
if (
not isinstance(getattr(pipe_obj, "relationships", None), dict)
or not pipe_obj.relationships
):
king: Optional[str] = None
alts: List[str] = []
for rel_tag in relationship_tags:
k, a = Add_File._parse_relationship_tag_king_alts(rel_tag)
if k and not king:
king = k
if a:
alts.extend(a)
if king:
seen_alt: set[str] = set()
alts = [
h
for h in alts
if h
and h != king
and len(h) == 64
and not (h in seen_alt or seen_alt.add(h))
]
payload: Dict[str, Any] = {"king": [king]}
if alts:
payload["alt"] = alts
pipe_obj.relationships = payload
except Exception:
pass
merged_tags = [
t
for t in merged_tags
if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
]
# Persist back to PipeObject
pipe_obj.tag = merged_tags
if preferred_title and not pipe_obj.title:
pipe_obj.title = preferred_title
if file_hash and not pipe_obj.hash:
pipe_obj.hash = file_hash
if isinstance(pipe_obj.extra, dict):
pipe_obj.extra.setdefault("url", merged_url)
return merged_tags, merged_url, preferred_title, file_hash
@staticmethod
def _handle_local_export(
media_path: Path,
location: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle exporting to a specific local path (Copy)."""
try:
destination_root = Path(location)
except Exception as exc:
log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr)
return 1
log(f"Exporting to local path: {destination_root}", file=sys.stderr)
result = None
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
# Determine Filename (Title-based)
title_value = title
if not title_value:
# Try to find title in tags
title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None)
if title_tag:
title_value = title_tag.split(":", 1)[1].strip()
if not title_value:
title_value = media_path.stem.replace("_", " ").strip()
safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip()
base_name = safe_title or media_path.stem
new_name = base_name + media_path.suffix
destination_root.mkdir(parents=True, exist_ok=True)
target_path = destination_root / new_name
if target_path.exists():
target_path = unique_path(target_path)
# COPY Operation (Safe Export)
try:
shutil.copy2(str(media_path), target_path)
except Exception as exc:
log(f"❌ Failed to export file: {exc}", file=sys.stderr)
return 1
# Copy Sidecars
Add_File._copy_sidecars(media_path, target_path)
# Ensure hash for exported copy
if not f_hash:
try:
f_hash = sha256_file(target_path)
except Exception:
f_hash = None
# Write Metadata Sidecars (since it's an export)
relationships = Add_File._get_relationships(result, pipe_obj)
try:
write_sidecar(target_path, tags, url, f_hash)
write_metadata(
target_path, hash_value=f_hash, url=url, relationships=relationships or []
)
except Exception:
pass
# Update PipeObject and emit
extra_updates = {
"url": url,
"export_path": str(destination_root),
}
if relationships:
extra_updates["relationships"] = relationships
chosen_title = title or title_value or pipe_obj.title or target_path.name
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=f_hash or "unknown",
store="local",
path=str(target_path),
tag=tags,
title=chosen_title,
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
# Cleanup
# Only delete if explicitly requested!
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
@staticmethod
def _preflight_url_duplicates_bulk(urls: Sequence[str], config: Dict[str, Any]) -> set[str]:
"""Return a set of URLs that appear to already exist in any searchable backend.
This is a best-effort check used to avoid re-downloading already-stored media when
a batch of URL items is piped into add-file.
"""
skip: set[str] = set()
try:
storage = Store(config)
backend_names = list(storage.list_searchable_backends() or [])
except Exception:
return skip
for raw in urls:
u = str(raw or "").strip()
if not u:
continue
for backend_name in backend_names:
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
backend = storage[backend_name]
except Exception:
continue
try:
hits = backend.search(f"url:{u}", limit=1) or []
except Exception:
hits = []
if hits:
skip.add(u)
break
return skip
@staticmethod
def _download_streaming_url_as_pipe_objects(
url: str,
config: Dict[str, Any],
*,
mode_hint: Optional[str] = None,
ytdl_format_hint: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""Download a yt-dlp-supported URL and return PipeObject-style dict(s).
This does not rely on pipeline stage context and is used so add-file can ingest
URL selections directly (download -> add to store/provider) in one invocation.
"""
url_str = str(url or "").strip()
if not url_str:
return []
try:
from cmdlet.download_media import (
CMDLET as dl_cmdlet,
_download_with_timeout,
is_url_supported_by_ytdlp,
list_formats,
_format_chapters_note,
_best_subtitle_sidecar,
_read_text_file,
)
from models import DownloadOptions
from tool.ytdlp import YtDlpTool
except Exception:
return []
if not is_url_supported_by_ytdlp(url_str):
return []
try:
from config import resolve_output_dir
out_dir = resolve_output_dir(config)
if out_dir is None:
return []
except Exception:
return []
cookies_path = None
try:
cookie_candidate = YtDlpTool(config).resolve_cookiefile()
if cookie_candidate is not None and cookie_candidate.is_file():
cookies_path = cookie_candidate
except Exception:
cookies_path = None
quiet_download = False
try:
quiet_download = bool((config or {}).get("_quiet_background_output"))
except Exception:
quiet_download = False
# Decide download mode.
# Default to video unless we have a hint or the URL appears to be audio-only.
mode = str(mode_hint or "").strip().lower() if mode_hint else ""
if mode not in {"audio", "video"}:
mode = "video"
# Best-effort: infer from formats for this URL (one-time, no playlist probing).
try:
cf = (
str(cookies_path)
if cookies_path is not None and cookies_path.is_file()
else None
)
fmts_probe = list_formats(
url_str, no_playlist=False, playlist_items=None, cookiefile=cf
)
if isinstance(fmts_probe, list) and fmts_probe:
has_video = False
for f in fmts_probe:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
mode = "video" if has_video else "audio"
except Exception:
mode = "video"
# Pick a safe initial format selector.
# Important: yt-dlp defaults like "251/140" are YouTube-specific and break Bandcamp.
fmt_hint = str(ytdl_format_hint).strip() if ytdl_format_hint else ""
if fmt_hint:
chosen_format: Optional[str] = fmt_hint
else:
chosen_format = None
if mode == "audio":
# Generic audio selector that works across extractors.
chosen_format = "bestaudio/best"
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format=chosen_format,
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
# Download with a small amount of resilience for format errors.
try:
result_obj = _download_with_timeout(opts, timeout_seconds=300)
except Exception as exc:
msg = str(exc)
# If a format is invalid/unsupported, try:
# - if only one format exists, retry with that id
# - else for audio-only sources, retry with bestaudio/best
try:
format_error = "Requested format is not available" in msg
except Exception:
format_error = False
if format_error:
try:
cf = (
str(cookies_path)
if cookies_path is not None and cookies_path.is_file()
else None
)
fmts = list_formats(
url_str, no_playlist=False, playlist_items=None, cookiefile=cf
)
if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict):
fid = str(fmts[0].get("format_id") or "").strip()
if fid:
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format=fid,
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
result_obj = _download_with_timeout(opts, timeout_seconds=300)
# proceed
else:
raise
elif mode == "audio" and (
not chosen_format or chosen_format != "bestaudio/best"
):
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format="bestaudio/best",
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
result_obj = _download_with_timeout(opts, timeout_seconds=300)
else:
raise
except Exception as exc2:
log(f"[add-file] Download failed for {url_str}: {exc2}", file=sys.stderr)
return []
else:
log(f"[add-file] Download failed for {url_str}: {exc}", file=sys.stderr)
return []
results: List[Any]
if isinstance(result_obj, list):
results = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
# Section downloads: create one result per file.
from models import DownloadMediaResult
results = []
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
try:
results.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or url_str,
hash_value=hv,
)
)
except Exception:
continue
else:
results = [result_obj]
out: List[Dict[str, Any]] = []
for downloaded in results:
try:
po = dl_cmdlet._build_pipe_object(downloaded, url_str, opts)
# Attach chapter timestamps note (best-effort).
try:
info = (
downloaded.info
if isinstance(getattr(downloaded, "info", None), dict)
else {}
)
except Exception:
info = {}
try:
chapters_text = _format_chapters_note(info)
except Exception:
chapters_text = None
if chapters_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("chapters", chapters_text)
po["notes"] = notes
# Capture subtitle sidecar into notes and remove it so add-file won't ingest it later.
try:
media_path = Path(str(po.get("path") or ""))
except Exception:
media_path = None
if media_path is not None and media_path.exists() and media_path.is_file():
try:
sub_path = _best_subtitle_sidecar(media_path)
except Exception:
sub_path = None
if sub_path is not None:
sub_text = _read_text_file(sub_path)
if sub_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
try:
sub_path.unlink()
except Exception:
pass
# Mark as temp artifact from download-media so add-file can auto-delete after ingest.
po["action"] = "cmdlet:download-media"
po["is_temp"] = True
out.append(po)
except Exception:
continue
return out
@staticmethod
def _download_soulseek_file(result: Any, config: Dict[str, Any]) -> Optional[Path]:
"""
Download a file from Soulseek peer.
Extracts username and filename from soulseek result metadata and initiates download.
"""
try:
import asyncio
from ProviderCore.registry import download_soulseek_file
from pathlib import Path
# Extract metadata from result
full_metadata = {}
if isinstance(result, dict):
full_metadata = result.get("full_metadata", {})
elif (
hasattr(result, "extra")
and isinstance(result.extra, dict)
and "full_metadata" in result.extra
):
full_metadata = result.extra.get("full_metadata", {})
elif hasattr(result, "full_metadata"):
# Direct attribute access (fallback)
val = getattr(result, "full_metadata", {})
if isinstance(val, dict):
full_metadata = val
username = full_metadata.get("username")
filename = full_metadata.get("filename")
if not username or not filename:
debug(
f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})"
)
extra_val = getattr(result, "extra", None)
if isinstance(extra_val, dict):
debug(f"[add-file] Result extra keys: {list(extra_val.keys())}")
return None
if not username or not filename:
debug(
f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})"
)
return None
debug(f"[add-file] Starting soulseek download: {username} -> {filename}")
# Read Soulseek login credentials from config (client credentials), separate from peer username.
try:
from config import get_soulseek_username, get_soulseek_password
client_user = get_soulseek_username(config) or ""
client_pass = get_soulseek_password(config) or ""
except Exception:
client_user = ""
client_pass = ""
if not client_user or not client_pass:
debug(
"[add-file] ERROR: Soulseek credentials missing (set [provider=soulseek] username/password in config.conf)"
)
return None
# Determine output directory (prefer downloads folder in config)
output_dir = (
Path(config.get("output_dir", "./downloads"))
if isinstance(config.get("output_dir"), str)
else Path("./downloads")
)
output_dir.mkdir(parents=True, exist_ok=True)
# Run async download in event loop
try:
loop = asyncio.get_event_loop()
if loop.is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
downloaded_path = loop.run_until_complete(
download_soulseek_file(
username=username,
filename=filename,
output_dir=output_dir,
client_username=client_user,
client_password=client_pass,
timeout=1200, # 20 minutes
)
)
return downloaded_path
except Exception as e:
log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr)
debug(f"[add-file] Soulseek download traceback: {e}")
return None
@staticmethod
def _handle_matrix_upload(
media_path: Path,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
*,
room_id: str,
) -> int:
"""Upload to Matrix and update the PipeObject.
Matrix needs a room_id. If you don't have one, use the interactive
room picker path which resumes via `-matrix-send`.
"""
from Provider.matrix import Matrix
log(f"Uploading via matrix: {media_path.name}", file=sys.stderr)
try:
provider = Matrix(config)
except Exception as exc:
log(f"Matrix not available: {exc}", file=sys.stderr)
return 1
try:
hoster_url = provider.upload_to_room(str(media_path), str(room_id), pipe_obj=pipe_obj)
log(f"File uploaded: {hoster_url}", file=sys.stderr)
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
except Exception as exc:
log(f"Upload failed: {exc}", file=sys.stderr)
return 1
# Update PipeObject and emit
extra_updates: Dict[str, Any] = {
"provider": "matrix",
"provider_url": hoster_url,
"room_id": str(room_id),
}
if isinstance(pipe_obj.extra, dict):
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=f_hash or "unknown",
store="matrix",
path=file_path,
tag=pipe_obj.tag,
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
Add_File._cleanup_after_success(media_path, delete_source=bool(delete_after))
return 0
@staticmethod
def _matrix_prompt_room_selection(
pending_items: List[Dict[str, Any]],
config: Dict[str, Any],
original_args: List[str],
) -> int:
"""Show rooms table and pause pipeline for @N selection."""
from Provider.matrix import Matrix
# Stash pending uploads so @N on the matrix table can trigger Matrix.upload_to_room.
ctx.store_value("matrix_pending_uploads", pending_items)
try:
provider = Matrix(config)
except Exception as exc:
log(f"Matrix not available: {exc}", file=sys.stderr)
return 1
try:
rooms = provider.list_rooms()
except Exception as exc:
log(f"Failed to list Matrix rooms: {exc}", file=sys.stderr)
return 1
if not rooms:
log("No joined rooms found.", file=sys.stderr)
return 0
table = ResultTable("Matrix Rooms")
table.set_table("matrix")
table.set_source_command("add-file", list(original_args or []))
for room in rooms:
row = table.add_row()
name = str(room.get("name") or "").strip() if isinstance(room, dict) else ""
rid = str(room.get("room_id") or "").strip() if isinstance(room, dict) else ""
row.add_column("Name", name)
row.add_column("Room", rid)
room_items: List[Dict[str, Any]] = []
for room in rooms:
if not isinstance(room, dict):
continue
rid = str(room.get("room_id") or "").strip()
name = str(room.get("name") or "").strip()
room_items.append(
{
**room,
"store": "matrix",
"provider": "matrix",
"title": name or rid or "Matrix Room",
}
)
# Overlay table: user selects @N on this Matrix rooms table to upload.
ctx.set_last_result_table_overlay(table, room_items)
ctx.set_current_stage_table(table)
print()
from rich_display import stdout_console
stdout_console().print(table)
print("\nSelect room(s) with @N (e.g. @1 or @1-3) to upload the selected item(s)")
return 0
@staticmethod
def _handle_provider_upload(
media_path: Path,
provider_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
) -> int:
"""Handle uploading to a file provider (e.g. 0x0)."""
from ProviderCore.registry import get_file_provider
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
try:
file_provider = get_file_provider(provider_name, config)
if not file_provider:
log(f"File provider '{provider_name}' not available", file=sys.stderr)
return 1
hoster_url = file_provider.upload(str(media_path), pipe_obj=pipe_obj)
log(f"File uploaded: {hoster_url}", file=sys.stderr)
f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None)
except Exception as exc:
log(f"Upload failed: {exc}", file=sys.stderr)
return 1
# Update PipeObject and emit
extra_updates: Dict[str, Any] = {
"provider": provider_name,
"provider_url": hoster_url,
}
if isinstance(pipe_obj.extra, dict):
# Also track hoster URL as a url for downstream steps
existing_known = list(pipe_obj.extra.get("url") or [])
if hoster_url and hoster_url not in existing_known:
existing_known.append(hoster_url)
extra_updates["url"] = existing_known
file_path = pipe_obj.path or (str(media_path) if media_path else None) or ""
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=f_hash or "unknown",
store=provider_name or "provider",
path=file_path,
tag=pipe_obj.tag,
title=pipe_obj.title or (media_path.name if media_path else None),
extra_updates=extra_updates,
)
Add_File._emit_pipe_object(pipe_obj)
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
return 0
@staticmethod
def _handle_storage_backend(
result: Any,
media_path: Path,
backend_name: str,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
delete_after: bool,
*,
collect_payloads: Optional[List[Dict[str, Any]]] = None,
collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None,
defer_url_association: bool = False,
pending_url_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
delete_after_effective = bool(delete_after)
if not delete_after_effective:
# When download-media is piped into add-file, the downloaded artifact is a temp file.
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
try:
if (
str(backend_name or "").strip().lower() != "temp"
and getattr(pipe_obj, "is_temp", False)
and getattr(pipe_obj, "action", None) == "cmdlet:download-media"
):
from config import resolve_output_dir
temp_dir = resolve_output_dir(config)
try:
if media_path.resolve().is_relative_to(temp_dir.expanduser().resolve()):
delete_after_effective = True
debug(f"[add-file] Auto-delete temp source after ingest: {media_path}")
except Exception:
# If path resolution fails, fall back to non-destructive behavior
pass
except Exception:
pass
try:
store = Store(config)
backend = store[backend_name]
# Prepare metadata from pipe_obj and sidecars
tags, url, title, f_hash = Add_File._prepare_metadata(
result, media_path, pipe_obj, config
)
# If we're moving/copying from one store to another, also copy the source store's
# existing associated URLs so they aren't lost.
try:
from metadata import normalize_urls
source_store = None
source_hash = None
if isinstance(result, dict):
source_store = result.get("store")
source_hash = result.get("hash")
if not source_store:
source_store = getattr(pipe_obj, "store", None)
if not source_hash:
source_hash = getattr(pipe_obj, "hash", None)
if (not source_hash) and isinstance(pipe_obj.extra, dict):
source_hash = pipe_obj.extra.get("hash")
source_store = str(source_store or "").strip()
source_hash = str(source_hash or "").strip().lower()
if (
source_store
and source_hash
and len(source_hash) == 64
and source_store.lower() != str(backend_name or "").strip().lower()
):
source_backend = None
try:
if source_store in store.list_backends():
source_backend = store[source_store]
except Exception:
source_backend = None
if source_backend is not None:
try:
src_urls = normalize_urls(source_backend.get_url(source_hash) or [])
except Exception:
src_urls = []
try:
dst_urls = normalize_urls(url or [])
except Exception:
dst_urls = []
merged: list[str] = []
seen: set[str] = set()
for u in list(dst_urls or []) + list(src_urls or []):
if not u:
continue
if u in seen:
continue
seen.add(u)
merged.append(u)
url = merged
except Exception:
pass
# Collect relationship pairs for post-ingest DB/API persistence.
if collect_relationship_pairs is not None:
rels = Add_File._get_relationships(result, pipe_obj)
if isinstance(rels, dict) and rels:
king_hash, alt_hashes = Add_File._parse_relationships_king_alts(rels)
if king_hash and alt_hashes:
bucket = collect_relationship_pairs.setdefault(str(backend_name), set())
for alt_hash in alt_hashes:
if alt_hash and alt_hash != king_hash:
bucket.add((alt_hash, king_hash))
# Relationships must never be stored as tags.
if isinstance(tags, list) and tags:
tags = [
t
for t in tags
if not (isinstance(t, str) and t.strip().lower().startswith("relationship:"))
]
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
media_path,
title=title,
tag=tags,
url=[] if (defer_url_association and url) else url,
)
##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
stored_path: Optional[str] = None
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (often with an access key) and should
# only be invoked by explicit user commands (e.g. get-file).
try:
if type(backend).__name__ == "Folder":
maybe_path = backend.get_file(file_identifier)
if isinstance(maybe_path, Path):
stored_path = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
stored_path = maybe_path
except Exception:
stored_path = None
Add_File._update_pipe_object_destination(
pipe_obj,
hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown",
store=backend_name,
path=stored_path,
tag=tags,
title=title or pipe_obj.title or media_path.name,
extra_updates={
"url": url,
},
)
# Emit a search-store-like payload for consistent tables and natural piping.
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
resolved_hash = (
file_identifier
if len(file_identifier) == 64
else (f_hash or file_identifier or "unknown")
)
# If we have url(s), ensure they get associated with the destination file.
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
if url:
if defer_url_association and pending_url_associations is not None:
try:
pending_url_associations.setdefault(str(backend_name), []).append(
(str(resolved_hash), list(url))
)
except Exception:
pass
else:
try:
backend.add_url(resolved_hash, list(url))
except Exception:
pass
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
sub_note = Add_File._get_note_text(result, pipe_obj, "sub")
if sub_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "sub", sub_note)
except Exception:
pass
chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
if chapters_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "chapters", chapters_note)
except Exception:
pass
meta: Dict[str, Any] = {}
try:
meta = backend.get_metadata(resolved_hash) or {}
except Exception:
meta = {}
# Determine size bytes
size_bytes: Optional[int] = None
for key in ("size_bytes", "size", "filesize", "file_size"):
try:
raw_size = meta.get(key)
if raw_size is not None:
size_bytes = int(raw_size)
break
except Exception:
pass
if size_bytes is None:
try:
size_bytes = int(media_path.stat().st_size)
except Exception:
size_bytes = None
# Determine title/ext
title_out = (
meta.get("title") or title or pipe_obj.title or media_path.stem or media_path.name
)
ext_out = meta.get("ext") or media_path.suffix.lstrip(".")
payload: Dict[str, Any] = {
"title": title_out,
"ext": str(ext_out or ""),
"size_bytes": size_bytes,
"store": backend_name,
"hash": resolved_hash,
# Preserve extra fields for downstream commands (kept hidden by default table rules)
"path": stored_path,
"tag": list(tags or []),
"url": list(url or []),
}
if collect_payloads is not None:
try:
collect_payloads.append(payload)
except Exception:
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
# canonical search-store payload/table for piping/selection consistency.
if auto_search_store and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
Add_File._emit_storage_result(
payload, overlay=not suppress_last_stage_overlay, emit=False
)
refreshed_items = Add_File._try_emit_search_store_by_hash(
store=backend_name,
hash_value=resolved_hash,
config=config,
)
if refreshed_items:
# Re-emit the canonical store rows so downstream stages receive them.
for emitted in refreshed_items:
ctx.emit(emitted)
else:
# Fall back to emitting the add-file payload so downstream stages still receive an item.
ctx.emit(payload)
else:
Add_File._emit_storage_result(
payload, overlay=not suppress_last_stage_overlay, emit=True
)
Add_File._cleanup_after_success(media_path, delete_source=delete_after_effective)
return 0
except Exception as exc:
log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# --- Helpers ---
@staticmethod
def _apply_pending_url_associations(
pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any]
) -> None:
"""Apply deferred URL associations in bulk, grouped per backend."""
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in (pending or {}).items():
if not pairs:
continue
try:
backend = store[backend_name]
except Exception:
continue
# Merge URLs per hash and de-duplicate.
merged: Dict[str, List[str]] = {}
for file_hash, urls in pairs:
h = str(file_hash or "").strip().lower()
if len(h) != 64:
continue
url_list: List[str] = []
try:
for u in urls or []:
s = str(u or "").strip()
if s:
url_list.append(s)
except Exception:
url_list = []
if not url_list:
continue
bucket = merged.setdefault(h, [])
seen = set(bucket)
for u in url_list:
if u in seen:
continue
seen.add(u)
bucket.append(u)
items: List[tuple[str, List[str]]] = [(h, u) for h, u in merged.items() if u]
if not items:
continue
bulk = getattr(backend, "add_url_bulk", None)
if callable(bulk):
try:
bulk(items)
continue
except Exception:
pass
single = getattr(backend, "add_url", None)
if callable(single):
for h, u in items:
try:
single(h, u)
except Exception:
continue
@staticmethod
def _load_sidecar_bundle(
media_path: Path,
store: Optional[str],
config: Dict[str, Any],
) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]:
"""Load sidecar metadata."""
if store and store.lower() == "local":
try:
from config import get_local_storage_path
db_root = get_local_storage_path(config)
if db_root:
with API_folder_store(Path(db_root)) as db:
file_hash = db.get_file_hash(media_path)
if file_hash:
tags = db.get_tags(file_hash) or []
metadata = db.get_metadata(file_hash) or {}
url = metadata.get("url") or []
f_hash = metadata.get("hash") or file_hash
if tags or url or f_hash:
return None, f_hash, tags, url
except Exception:
pass
try:
sidecar_path = find_sidecar(media_path)
if sidecar_path and sidecar_path.exists():
h, t, u = read_sidecar(sidecar_path)
return sidecar_path, h, t or [], u or []
except Exception:
pass
return None, None, [], []
@staticmethod
def _resolve_file_hash(
result: Any,
media_path: Path,
pipe_obj: models.PipeObject,
fallback_hash: Optional[str],
) -> Optional[str]:
if pipe_obj.hash and pipe_obj.hash != "unknown":
return pipe_obj.hash
if fallback_hash:
return fallback_hash
if isinstance(result, dict):
candidate = result.get("hash")
if candidate:
return str(candidate)
try:
return sha256_file(media_path)
except Exception:
return None
@staticmethod
def _resolve_media_kind(path: Path) -> str:
# Reusing logic
suffix = path.suffix.lower()
if suffix in {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka"}:
return "audio"
if suffix in {
".mp4",
".mkv",
".webm",
".mov",
".avi",
".flv",
".mpg",
".mpeg",
".ts",
".m4v",
".wmv",
}:
return "video"
if suffix in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}:
return "image"
if suffix in {".pdf", ".epub", ".txt", ".mobi", ".azw3", ".cbz", ".cbr", ".doc", ".docx"}:
return "document"
return "other"
@staticmethod
def _persist_local_metadata(
library_root: Path,
dest_path: Path,
tags: List[str],
url: List[str],
f_hash: Optional[str],
relationships: Any,
duration: Any,
media_kind: str,
):
payload = {
"hash": f_hash,
"url": url,
"relationships": relationships or [],
"duration": duration,
"size": None,
"ext": dest_path.suffix.lower(),
"media_type": media_kind,
"media_kind": media_kind,
}
try:
payload["size"] = dest_path.stat().st_size
except OSError:
payload["size"] = None
with API_folder_store(library_root) as db:
try:
db.save_file_info(dest_path, payload, tags)
except Exception as exc:
log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr)
@staticmethod
def _copy_sidecars(source_path: Path, target_path: Path):
possible_sidecars = [
source_path.with_suffix(source_path.suffix + ".json"),
source_path.with_name(source_path.name + ".tag"),
source_path.with_name(source_path.name + ".metadata"),
source_path.with_name(source_path.name + ".notes"),
]
for sc in possible_sidecars:
try:
if sc.exists():
suffix_part = sc.name.replace(source_path.name, "", 1)
dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}"
dest_sidecar.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(sc), dest_sidecar)
except Exception:
pass
@staticmethod
def _cleanup_after_success(media_path: Path, delete_source: bool):
if not delete_source:
return
# Check if it's a temp file that should always be deleted
is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name
if delete_source or is_temp_merge:
##log(f"Deleting source file...", file=sys.stderr)
try:
media_path.unlink()
Add_File._cleanup_sidecar_files(media_path)
except Exception as exc:
log(f"⚠️ Could not delete file: {exc}", file=sys.stderr)
@staticmethod
def _cleanup_sidecar_files(media_path: Path):
targets = [
media_path.parent / (media_path.name + ".metadata"),
media_path.parent / (media_path.name + ".notes"),
media_path.parent / (media_path.name + ".tag"),
]
for target in targets:
try:
if target.exists():
target.unlink()
except Exception:
pass
# Create and register the cmdlet
CMDLET = Add_File()