dfdfsdd
This commit is contained in:
@@ -5,10 +5,9 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import inspect
|
||||
from collections.abc import Iterable as IterableABC
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.logger import log
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
|
||||
from dataclasses import dataclass, field
|
||||
@@ -690,7 +689,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
||||
get_field(result, "table", "unknown") # With default
|
||||
"""
|
||||
# Handle lists by accessing the first element
|
||||
if isinstance(obj, list) and obj:
|
||||
if isinstance(obj, list):
|
||||
if not obj:
|
||||
return default
|
||||
obj = obj[0]
|
||||
|
||||
if isinstance(obj, dict):
|
||||
@@ -702,8 +703,9 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
||||
return value
|
||||
|
||||
# For PipeObjects, also check the extra field
|
||||
if hasattr(obj, 'extra') and isinstance(obj.extra, dict):
|
||||
return obj.extra.get(field, default)
|
||||
extra_val = getattr(obj, 'extra', None)
|
||||
if isinstance(extra_val, dict):
|
||||
return extra_val.get(field, default)
|
||||
|
||||
return default
|
||||
|
||||
@@ -1118,7 +1120,7 @@ def create_pipe_object_result(
|
||||
Returns:
|
||||
Dict with all PipeObject fields for emission
|
||||
"""
|
||||
result = {
|
||||
result: Dict[str, Any] = {
|
||||
'source': source,
|
||||
'id': identifier,
|
||||
'path': file_path,
|
||||
@@ -1546,14 +1548,11 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
extra = {k: v for k, v in value.items() if k not in known_keys}
|
||||
|
||||
# Extract URL: prefer direct url field, then url list
|
||||
url_val = value.get("url")
|
||||
if not url_val:
|
||||
url = value.get("url") or value.get("url") or []
|
||||
if url and isinstance(url, list) and len(url) > 0:
|
||||
url_val = url[0]
|
||||
# Preserve url in extra if multiple url exist
|
||||
if url and len(url) > 1:
|
||||
extra["url"] = url
|
||||
from metadata import normalize_urls
|
||||
url_list = normalize_urls(value.get("url"))
|
||||
url_val = url_list[0] if url_list else None
|
||||
if len(url_list) > 1:
|
||||
extra["url"] = url_list
|
||||
|
||||
# Extract relationships
|
||||
rels = value.get("relationships") or {}
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
|
||||
from typing import Any, Dict, Optional, Sequence, Tuple, List
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from SYS.logger import log, debug
|
||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
|
||||
from Store import Store
|
||||
from ._shared import (
|
||||
Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs,
|
||||
@@ -20,8 +22,8 @@ from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_sto
|
||||
from SYS.utils import sha256_file, unique_path
|
||||
from metadata import write_metadata
|
||||
|
||||
# Use official Hydrus supported filetypes from hydrus_wrapper
|
||||
SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS
|
||||
# Canonical supported filetypes for all stores/cmdlets
|
||||
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
|
||||
|
||||
class Add_File(Cmdlet):
|
||||
"""Add file into the DB"""
|
||||
@@ -53,93 +55,210 @@ class Add_File(Cmdlet):
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main execution entry point."""
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Initialize state
|
||||
path_arg = parsed.get("path")
|
||||
location = parsed.get("store") # Fixed: was "storage", should be "store"
|
||||
location = parsed.get("store")
|
||||
provider_name = parsed.get("provider")
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
# Coerce result to PipeObject; if result is a list, prefer the first element
|
||||
effective_result = result
|
||||
if isinstance(result, list) and result:
|
||||
first_item = result[0]
|
||||
# Prefer first item if it's a dict or PipeObject
|
||||
if isinstance(first_item, (dict, )):
|
||||
effective_result = first_item
|
||||
pipe_obj = coerce_to_pipe_object(effective_result, path_arg)
|
||||
stage_ctx = ctx.get_stage_context()
|
||||
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
|
||||
|
||||
# Decide which items to process.
|
||||
# - If user provided -path, treat this invocation as single-item.
|
||||
# - Otherwise, if piped input is a list, ingest each item.
|
||||
if path_arg:
|
||||
items_to_process: List[Any] = [result]
|
||||
elif isinstance(result, list) and result:
|
||||
items_to_process = list(result)
|
||||
else:
|
||||
items_to_process = [result]
|
||||
|
||||
# Debug: Log input result details
|
||||
debug(f"[add-file] INPUT result type={type(result).__name__}")
|
||||
if isinstance(result, list):
|
||||
debug(f"[add-file] INPUT result is list with {len(result)} items")
|
||||
if result and isinstance(result[0], dict):
|
||||
first = result[0]
|
||||
hash_val = first.get('hash')
|
||||
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
|
||||
debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}")
|
||||
elif isinstance(result, dict):
|
||||
hash_val = result.get('hash')
|
||||
hash_str = hash_val[:12] + "..." if hash_val else "N/A"
|
||||
debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}")
|
||||
|
||||
# Debug: Log parsed arguments
|
||||
debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}")
|
||||
|
||||
# Resolve source - returns (media_path_or_url, file_hash)
|
||||
media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config)
|
||||
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
|
||||
if not media_path_or_url:
|
||||
debug(f"[add-file] ERROR: Could not resolve source file/URL")
|
||||
return 1
|
||||
|
||||
# Update pipe_obj with resolved path
|
||||
pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url)
|
||||
|
||||
# Check if it's a URL before validating as file
|
||||
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
debug(f"Detected URL target, delegating to download-data: {media_path_or_url}")
|
||||
return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config)
|
||||
collected_payloads: List[Dict[str, Any]] = []
|
||||
successes = 0
|
||||
failures = 0
|
||||
|
||||
# Convert to Path and validate
|
||||
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
|
||||
|
||||
# Validate source
|
||||
if not self._validate_source(media_path):
|
||||
debug(f"[add-file] ERROR: Source validation failed for {media_path}")
|
||||
return 1
|
||||
# Only run the search-store refresh when add-file is the last stage.
|
||||
# In the middle of a pipeline, downstream cmdlets should receive the emitted
|
||||
# storage payload directly (no need to re-search and risk duplicate emits).
|
||||
auto_search_store_after_add = bool(is_last_stage) and len(items_to_process) == 1
|
||||
|
||||
# Debug: Log execution path decision
|
||||
debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}")
|
||||
debug(f" media_path={media_path}, exists={media_path.exists()}")
|
||||
for item in items_to_process:
|
||||
pipe_obj = coerce_to_pipe_object(item, path_arg)
|
||||
|
||||
# Execute transfer based on destination (using Store registry)
|
||||
if provider_name:
|
||||
debug(f"[add-file] ROUTE: file provider upload")
|
||||
return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after)
|
||||
elif location:
|
||||
# Check if location is a registered backend name
|
||||
temp_dir_to_cleanup: Optional[Path] = None
|
||||
delete_after_item = delete_after
|
||||
try:
|
||||
store = Store(config)
|
||||
backends = store.list_backends()
|
||||
|
||||
if location in backends:
|
||||
debug(f"[add-file] ROUTE: storage backend '{location}'")
|
||||
return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after)
|
||||
else:
|
||||
# Treat as local export path
|
||||
debug(f"[add-file] ROUTE: local export to path '{location}'")
|
||||
return self._handle_local_export(media_path, location, pipe_obj, config, delete_after)
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
|
||||
log(f"Invalid location: {location}", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
debug(f"[add-file] ERROR: No location or provider specified")
|
||||
log(f"No storage location or provider specified", file=sys.stderr)
|
||||
return 1
|
||||
media_path_or_url, file_hash = self._resolve_source(item, path_arg, pipe_obj, config)
|
||||
debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...")
|
||||
if not media_path_or_url:
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
# Update pipe_obj with resolved path
|
||||
pipe_obj.path = str(media_path_or_url)
|
||||
|
||||
# URL targets: prefer provider-aware download for OpenLibrary selections.
|
||||
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
|
||||
("http://", "https://", "magnet:", "torrent:")
|
||||
):
|
||||
table = None
|
||||
full_metadata = None
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
table = pipe_obj.extra.get("table")
|
||||
full_metadata = pipe_obj.extra.get("full_metadata")
|
||||
|
||||
is_openlibrary = (str(table or "").lower() == "openlibrary") or ("openlibrary.org/books/" in media_path_or_url.lower())
|
||||
if is_openlibrary:
|
||||
# Enrich tags from OpenLibrary metadata so the stored file has book tags (author/pages/etc).
|
||||
try:
|
||||
from Provider.openlibrary import OpenLibrary as _OpenLibrary
|
||||
|
||||
olid = None
|
||||
archive_id = None
|
||||
if isinstance(full_metadata, dict):
|
||||
olid = full_metadata.get("openlibrary_id") or full_metadata.get("openlibrary")
|
||||
archive_id = full_metadata.get("archive_id")
|
||||
|
||||
if not olid:
|
||||
import re
|
||||
m = re.search(r"/books/(OL\d+M)", str(media_path_or_url), flags=re.IGNORECASE)
|
||||
if m:
|
||||
olid = m.group(1)
|
||||
|
||||
scraped_tags: List[str] = []
|
||||
if olid:
|
||||
scraped_tags.extend(_OpenLibrary.scrape_openlibrary_metadata(str(olid)) or [])
|
||||
if archive_id:
|
||||
scraped_tags.append(f"internet_archive:{archive_id}")
|
||||
|
||||
if scraped_tags:
|
||||
existing = list(pipe_obj.tag or [])
|
||||
pipe_obj.tag = merge_sequences(existing, scraped_tags, case_sensitive=False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from ProviderCore.registry import get_search_provider
|
||||
from ProviderCore.base import SearchResult
|
||||
|
||||
provider = get_search_provider("openlibrary", config)
|
||||
if provider is None:
|
||||
log("[add-file] OpenLibrary provider not available", file=sys.stderr)
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
|
||||
sr = SearchResult(
|
||||
table="openlibrary",
|
||||
title=str(getattr(pipe_obj, "title", None) or "Unknown"),
|
||||
path=str(media_path_or_url),
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
|
||||
)
|
||||
downloaded = provider.download(sr, temp_dir_to_cleanup)
|
||||
if downloaded is None:
|
||||
log("[add-file] OpenLibrary download failed", file=sys.stderr)
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
downloaded_path = Path(downloaded)
|
||||
if downloaded_path.exists() and downloaded_path.is_dir():
|
||||
log(
|
||||
"[add-file] OpenLibrary download produced a directory (missing img2pdf?). Cannot ingest.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
media_path_or_url = str(downloaded_path)
|
||||
pipe_obj.path = str(downloaded_path)
|
||||
delete_after_item = True
|
||||
|
||||
# For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
|
||||
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
|
||||
("http://", "https://", "magnet:", "torrent:")
|
||||
):
|
||||
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
|
||||
if code == 0:
|
||||
successes += 1
|
||||
else:
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url
|
||||
|
||||
if not self._validate_source(media_path):
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
if provider_name:
|
||||
code = self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after_item)
|
||||
if code == 0:
|
||||
successes += 1
|
||||
else:
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
if location:
|
||||
try:
|
||||
store = Store(config)
|
||||
backends = store.list_backends()
|
||||
if location in backends:
|
||||
code = self._handle_storage_backend(
|
||||
item,
|
||||
media_path,
|
||||
location,
|
||||
pipe_obj,
|
||||
config,
|
||||
delete_after_item,
|
||||
collect_payloads=collected_payloads,
|
||||
suppress_last_stage_overlay=is_last_stage and len(items_to_process) > 1,
|
||||
auto_search_store=auto_search_store_after_add,
|
||||
)
|
||||
else:
|
||||
code = self._handle_local_export(media_path, location, pipe_obj, config, delete_after_item)
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
|
||||
log(f"Invalid location: {location}", file=sys.stderr)
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
if code == 0:
|
||||
successes += 1
|
||||
else:
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
log("No destination specified", file=sys.stderr)
|
||||
failures += 1
|
||||
finally:
|
||||
if temp_dir_to_cleanup is not None:
|
||||
try:
|
||||
shutil.rmtree(temp_dir_to_cleanup, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If we processed multiple storage ingests, present a single consolidated overlay table.
|
||||
if is_last_stage and len(items_to_process) > 1 and collected_payloads:
|
||||
try:
|
||||
from result_table import ResultTable
|
||||
|
||||
table = ResultTable("Result")
|
||||
for payload in collected_payloads:
|
||||
table.add_result(payload)
|
||||
# Make this the active selectable table so @.. returns here (and playlist table is kept in history).
|
||||
ctx.set_last_result_table(table, collected_payloads, subject=collected_payloads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if successes > 0:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def _resolve_source(
|
||||
@@ -149,10 +268,7 @@ class Add_File(Cmdlet):
|
||||
config: Dict[str, Any],
|
||||
) -> Tuple[Optional[Path | str], Optional[str]]:
|
||||
"""Resolve the source file path from args or pipeline result.
|
||||
|
||||
PRIORITY: hash+store pattern is preferred over path-based resolution.
|
||||
This ensures consistency when @N selections pass hash+store identifiers.
|
||||
|
||||
|
||||
Returns (media_path_or_url, file_hash)
|
||||
where media_path_or_url can be a Path object or a URL string.
|
||||
"""
|
||||
@@ -161,8 +277,9 @@ class Add_File(Cmdlet):
|
||||
result_hash = result.get("hash")
|
||||
result_store = result.get("store")
|
||||
if result_hash and result_store:
|
||||
debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}")
|
||||
# Use get_file to retrieve from the specific store
|
||||
debug(
|
||||
f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}"
|
||||
)
|
||||
try:
|
||||
store = Store(config)
|
||||
if result_store in store.list_backends():
|
||||
@@ -170,16 +287,15 @@ class Add_File(Cmdlet):
|
||||
media_path = backend.get_file(result_hash)
|
||||
if isinstance(media_path, Path) and media_path.exists():
|
||||
pipe_obj.path = str(media_path)
|
||||
debug(f"[add-file] Retrieved file from {result_store}: {media_path}")
|
||||
return media_path, result_hash
|
||||
|
||||
if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")):
|
||||
return media_path, str(result_hash)
|
||||
if isinstance(media_path, str) and media_path.lower().startswith(
|
||||
("http://", "https://", "magnet:", "torrent:")
|
||||
):
|
||||
pipe_obj.path = media_path
|
||||
debug(f"[add-file] Retrieved URL from {result_store}: {media_path}")
|
||||
return media_path, result_hash
|
||||
return media_path, str(result_hash)
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] Failed to retrieve via hash+store: {exc}")
|
||||
|
||||
|
||||
# PRIORITY 2: Try explicit path argument
|
||||
if path_arg:
|
||||
media_path = Path(path_arg)
|
||||
@@ -196,10 +312,9 @@ class Add_File(Cmdlet):
|
||||
file_hash = pipe_path_str.split(":", 1)[1]
|
||||
media_path, success = Add_File._fetch_hydrus_path(file_hash, config)
|
||||
return media_path, file_hash if success else None
|
||||
# Check if pipe_path is a URL - skip to URL handling below
|
||||
if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
media_path = Path(pipe_path_str)
|
||||
return media_path, None
|
||||
if pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
return pipe_path_str, None
|
||||
return Path(pipe_path_str), None
|
||||
|
||||
# PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file)
|
||||
pipe_url = getattr(pipe_obj, "url", None)
|
||||
@@ -248,8 +363,9 @@ class Add_File(Cmdlet):
|
||||
# Look for path or path-like keys
|
||||
path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file")
|
||||
# If the dict includes a 'paths' list (multi-part/section download), prefer the first file
|
||||
if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"):
|
||||
path_candidate = first_item.get("paths")[0]
|
||||
paths_val = first_item.get("paths")
|
||||
if not path_candidate and isinstance(paths_val, (list, tuple)) and paths_val:
|
||||
path_candidate = paths_val[0]
|
||||
if path_candidate:
|
||||
debug(f"Resolved path from result dict: {path_candidate}")
|
||||
try:
|
||||
@@ -361,10 +477,12 @@ class Add_File(Cmdlet):
|
||||
selection_args = result["_selection_args"]
|
||||
if selection_args:
|
||||
dl_args.extend(selection_args)
|
||||
elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra:
|
||||
selection_args = result.extra["_selection_args"]
|
||||
if selection_args:
|
||||
dl_args.extend(selection_args)
|
||||
else:
|
||||
extra_val = getattr(result, "extra", None)
|
||||
if isinstance(extra_val, dict) and "_selection_args" in extra_val:
|
||||
selection_args = extra_val["_selection_args"]
|
||||
if selection_args:
|
||||
dl_args.extend(selection_args)
|
||||
|
||||
# download-media doesn't support -storage flag
|
||||
# It downloads to the configured directory, then add-file will handle storage
|
||||
@@ -375,18 +493,32 @@ class Add_File(Cmdlet):
|
||||
|
||||
@staticmethod
|
||||
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
|
||||
url: List[str] = []
|
||||
try:
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or [])
|
||||
except Exception:
|
||||
pass
|
||||
from metadata import normalize_urls
|
||||
|
||||
if not url and isinstance(result, dict):
|
||||
url = list(result.get("url") or result.get("url") or [])
|
||||
if not url:
|
||||
url = list(extract_url_from_result(result) or [])
|
||||
return url
|
||||
# Prefer explicit PipeObject.url if present
|
||||
urls: List[str] = []
|
||||
try:
|
||||
urls = normalize_urls(getattr(pipe_obj, "url", None))
|
||||
except Exception:
|
||||
urls = []
|
||||
|
||||
# Then check extra.url
|
||||
if not urls:
|
||||
try:
|
||||
if isinstance(pipe_obj.extra, dict):
|
||||
urls = normalize_urls(pipe_obj.extra.get("url"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Then check result dict
|
||||
if not urls and isinstance(result, dict):
|
||||
urls = normalize_urls(result.get("url"))
|
||||
|
||||
# Finally, try extractor helper
|
||||
if not urls:
|
||||
urls = normalize_urls(extract_url_from_result(result))
|
||||
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]:
|
||||
@@ -405,10 +537,36 @@ class Add_File(Cmdlet):
|
||||
|
||||
@staticmethod
|
||||
def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]:
|
||||
if getattr(pipe_obj, "duration", None) is not None:
|
||||
return pipe_obj.duration
|
||||
def _parse_duration(value: Any) -> Optional[float]:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value) if value > 0 else None
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
candidate = float(s)
|
||||
return candidate if candidate > 0 else None
|
||||
except ValueError:
|
||||
pass
|
||||
if ":" in s:
|
||||
parts = [p.strip() for p in s.split(":") if p.strip()]
|
||||
if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
|
||||
nums = [int(p) for p in parts]
|
||||
if len(nums) == 2:
|
||||
minutes, seconds = nums
|
||||
return float(minutes * 60 + seconds)
|
||||
hours, minutes, seconds = nums
|
||||
return float(hours * 3600 + minutes * 60 + seconds)
|
||||
return None
|
||||
|
||||
parsed = _parse_duration(getattr(pipe_obj, "duration", None))
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
try:
|
||||
return extract_duration(result)
|
||||
return _parse_duration(extract_duration(result))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -442,19 +600,20 @@ class Add_File(Cmdlet):
|
||||
ctx.set_current_stage_table(None)
|
||||
|
||||
@staticmethod
|
||||
def _emit_storage_result(payload: Dict[str, Any]) -> None:
|
||||
def _emit_storage_result(payload: Dict[str, Any], *, overlay: bool = True, emit: bool = True) -> None:
|
||||
"""Emit a storage-style result payload.
|
||||
|
||||
- Always emits the dict downstream (when in a pipeline).
|
||||
- If this is the last stage (or not in a pipeline), prints a search-store-like table
|
||||
and sets an overlay table/items for @N selection.
|
||||
"""
|
||||
# Always emit for downstream commands (no-op if not in a pipeline)
|
||||
ctx.emit(payload)
|
||||
# Emit for downstream commands (no-op if not in a pipeline)
|
||||
if emit:
|
||||
ctx.emit(payload)
|
||||
|
||||
stage_ctx = ctx.get_stage_context()
|
||||
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
|
||||
if not is_last:
|
||||
if not is_last or not overlay:
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -470,6 +629,53 @@ class Add_File(Cmdlet):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _try_emit_search_store_by_hash(*, store: str, hash_value: str, config: Dict[str, Any]) -> bool:
|
||||
"""Run search-store for a single hash so the final table/payload is consistent.
|
||||
|
||||
Important: `add-file` is treated as an action command by the CLI, so the CLI only
|
||||
prints tables for it when a display overlay exists. After running search-store,
|
||||
this copies the resulting table into the display overlay (when this is the last
|
||||
stage) so the canonical store table is what the user sees and can select from.
|
||||
|
||||
Returns True if search-store ran successfully, else False.
|
||||
"""
|
||||
try:
|
||||
from cmdlet.search_store import CMDLET as search_store_cmdlet
|
||||
|
||||
args = ["-store", str(store), f"hash:{str(hash_value)}"]
|
||||
log(f"[add-file] Refresh: search-store -store {store} \"hash:{hash_value}\"", file=sys.stderr)
|
||||
|
||||
# Run search-store under a temporary stage context so its ctx.emit() calls
|
||||
# don't interfere with the outer add-file pipeline stage.
|
||||
prev_ctx = ctx.get_stage_context()
|
||||
temp_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=getattr(prev_ctx, "worker_id", None))
|
||||
ctx.set_stage_context(temp_ctx)
|
||||
try:
|
||||
code = search_store_cmdlet.run(None, args, config)
|
||||
finally:
|
||||
ctx.set_stage_context(prev_ctx)
|
||||
if code != 0:
|
||||
return False
|
||||
|
||||
# Promote the search-store result to a display overlay so the CLI prints it
|
||||
# for action commands like add-file.
|
||||
stage_ctx = ctx.get_stage_context()
|
||||
is_last = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
|
||||
if is_last:
|
||||
try:
|
||||
table = ctx.get_last_result_table()
|
||||
items = ctx.get_last_result_items()
|
||||
if table is not None and items:
|
||||
ctx.set_last_result_table_overlay(table, items, subject={"store": store, "hash": hash_value})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _prepare_metadata(
|
||||
result: Any,
|
||||
@@ -664,8 +870,9 @@ class Add_File(Cmdlet):
|
||||
|
||||
if not username or not filename:
|
||||
debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})")
|
||||
if hasattr(result, "extra"):
|
||||
debug(f"[add-file] Result extra keys: {list(result.extra.keys())}")
|
||||
extra_val = getattr(result, "extra", None)
|
||||
if isinstance(extra_val, dict):
|
||||
debug(f"[add-file] Result extra keys: {list(extra_val.keys())}")
|
||||
return None
|
||||
|
||||
if not username or not filename:
|
||||
@@ -769,28 +976,55 @@ class Add_File(Cmdlet):
|
||||
|
||||
@staticmethod
|
||||
def _handle_storage_backend(
|
||||
result: Any,
|
||||
media_path: Path,
|
||||
backend_name: str,
|
||||
pipe_obj: models.PipeObject,
|
||||
config: Dict[str, Any],
|
||||
delete_after: bool,
|
||||
*,
|
||||
collect_payloads: Optional[List[Dict[str, Any]]] = None,
|
||||
suppress_last_stage_overlay: bool = False,
|
||||
auto_search_store: bool = True,
|
||||
) -> int:
|
||||
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
|
||||
log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
|
||||
|
||||
delete_after_effective = bool(delete_after)
|
||||
if not delete_after_effective:
|
||||
# When download-media is piped into add-file, the downloaded artifact is a temp file.
|
||||
# After it is persisted to a storage backend, delete the temp copy to avoid duplicates.
|
||||
try:
|
||||
if (
|
||||
str(backend_name or "").strip().lower() != "temp"
|
||||
and getattr(pipe_obj, "is_temp", False)
|
||||
and getattr(pipe_obj, "action", None) == "cmdlet:download-media"
|
||||
):
|
||||
from config import resolve_output_dir
|
||||
temp_dir = resolve_output_dir(config)
|
||||
try:
|
||||
if media_path.resolve().is_relative_to(temp_dir.expanduser().resolve()):
|
||||
delete_after_effective = True
|
||||
debug(f"[add-file] Auto-delete temp source after ingest: {media_path}")
|
||||
except Exception:
|
||||
# If path resolution fails, fall back to non-destructive behavior
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
store = Store(config)
|
||||
backend = store[backend_name]
|
||||
|
||||
# Prepare metadata from pipe_obj and sidecars
|
||||
tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config)
|
||||
tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config)
|
||||
|
||||
# Call backend's add_file with full metadata
|
||||
# Backend returns hash as identifier
|
||||
file_identifier = backend.add_file(
|
||||
media_path,
|
||||
title=title,
|
||||
tags=tags,
|
||||
tag=tags,
|
||||
url=url
|
||||
)
|
||||
log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
|
||||
@@ -822,6 +1056,14 @@ class Add_File(Cmdlet):
|
||||
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
|
||||
resolved_hash = file_identifier if len(file_identifier) == 64 else (f_hash or file_identifier or "unknown")
|
||||
|
||||
# If we have url(s), ensure they get associated with the destination file.
|
||||
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
|
||||
if url:
|
||||
try:
|
||||
backend.add_url(resolved_hash, list(url))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
meta: Dict[str, Any] = {}
|
||||
try:
|
||||
meta = backend.get_metadata(resolved_hash) or {}
|
||||
@@ -865,9 +1107,30 @@ class Add_File(Cmdlet):
|
||||
"tag": list(tags or []),
|
||||
"url": list(url or []),
|
||||
}
|
||||
Add_File._emit_storage_result(payload)
|
||||
if collect_payloads is not None:
|
||||
try:
|
||||
collect_payloads.append(payload)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Keep the add-file 1-row summary overlay (when last stage), then emit the
|
||||
# canonical search-store payload/table for piping/selection consistency.
|
||||
if auto_search_store and resolved_hash and resolved_hash != "unknown":
|
||||
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
|
||||
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=False)
|
||||
|
||||
ok = Add_File._try_emit_search_store_by_hash(
|
||||
store=backend_name,
|
||||
hash_value=resolved_hash,
|
||||
config=config,
|
||||
)
|
||||
if not ok:
|
||||
# Fall back to emitting the add-file payload so downstream stages still receive an item.
|
||||
ctx.emit(payload)
|
||||
else:
|
||||
Add_File._emit_storage_result(payload, overlay=not suppress_last_stage_overlay, emit=True)
|
||||
|
||||
Add_File._cleanup_after_success(media_path, delete_source=delete_after)
|
||||
Add_File._cleanup_after_success(media_path, delete_source=delete_after_effective)
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
@@ -12,19 +11,24 @@ from Store import Store
|
||||
|
||||
class Add_Url(Cmdlet):
|
||||
"""Add URL associations to files via hash+store."""
|
||||
|
||||
NAME = "add-url"
|
||||
SUMMARY = "Associate a URL with a file"
|
||||
USAGE = "@1 | add-url <url>"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to associate"),
|
||||
]
|
||||
DETAIL = [
|
||||
"- Associates URL with file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
]
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-url",
|
||||
summary="Associate a URL with a file",
|
||||
usage="@1 | add-url <url>",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to associate"),
|
||||
],
|
||||
detail=[
|
||||
"- Associates URL with file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add URL to file via hash+store backend."""
|
||||
@@ -78,8 +82,7 @@ class Add_Url(Cmdlet):
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["add-url", "add_url"])(Add_Url)
|
||||
CMDLET = Add_Url()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
from typing import Any, Dict, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
@@ -12,19 +11,24 @@ from Store import Store
|
||||
|
||||
class Delete_Url(Cmdlet):
|
||||
"""Delete URL associations from files via hash+store."""
|
||||
|
||||
NAME = "delete-url"
|
||||
SUMMARY = "Remove a URL association from a file"
|
||||
USAGE = "@1 | delete-url <url>"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to remove"),
|
||||
]
|
||||
DETAIL = [
|
||||
"- Removes URL association from file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
]
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="delete-url",
|
||||
summary="Remove a URL association from a file",
|
||||
usage="@1 | delete-url <url>",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to remove"),
|
||||
],
|
||||
detail=[
|
||||
"- Removes URL association from file identified by hash+store",
|
||||
"- Multiple url can be comma-separated",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Delete URL from file via hash+store backend."""
|
||||
@@ -78,5 +82,4 @@ class Delete_Url(Cmdlet):
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["delete-url", "del-url", "delete_url"])(Delete_Url)
|
||||
CMDLET = Delete_Url()
|
||||
|
||||
@@ -190,9 +190,11 @@ class Download_File(Cmdlet):
|
||||
|
||||
# If this looks like a provider item and providers are available, prefer provider.download()
|
||||
downloaded_path: Optional[Path] = None
|
||||
attempted_provider_download = False
|
||||
if table and get_search_provider and SearchResult:
|
||||
provider = get_search_provider(str(table), config)
|
||||
if provider is not None:
|
||||
attempted_provider_download = True
|
||||
sr = SearchResult(
|
||||
table=str(table),
|
||||
title=str(title or "Unknown"),
|
||||
@@ -202,6 +204,19 @@ class Download_File(Cmdlet):
|
||||
debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
|
||||
downloaded_path = provider.download(sr, final_output_dir)
|
||||
|
||||
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
|
||||
if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary":
|
||||
availability = None
|
||||
reason = None
|
||||
if isinstance(full_metadata, dict):
|
||||
availability = full_metadata.get("availability")
|
||||
reason = full_metadata.get("availability_reason")
|
||||
msg = "[download-file] OpenLibrary item not downloadable"
|
||||
if availability or reason:
|
||||
msg += f" (availability={availability or ''} reason={reason or ''})"
|
||||
log(msg, file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Fallback: if we have a direct HTTP URL, download it directly
|
||||
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
|
||||
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
|
||||
|
||||
@@ -693,6 +693,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
|
||||
return
|
||||
|
||||
# Extract relevant fields
|
||||
webpage_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
||||
result_container[0] = {
|
||||
"extractor": info.get("extractor", ""),
|
||||
"title": info.get("title", ""),
|
||||
@@ -700,7 +701,9 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"description": info.get("description"),
|
||||
"url": url,
|
||||
# Keep both the requested and canonical URL forms; callers should prefer webpage_url.
|
||||
"requested_url": url,
|
||||
"webpage_url": webpage_url,
|
||||
}
|
||||
except Exception as exc:
|
||||
log(f"Probe error for {url}: {exc}")
|
||||
@@ -1220,9 +1223,359 @@ class Download_Media(Cmdlet):
|
||||
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
|
||||
storage = None
|
||||
hydrus_available = True
|
||||
try:
|
||||
from Store import Store
|
||||
storage = Store(config=config or {}, suppress_debug=True)
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
hydrus_available = bool(is_hydrus_available(config or {}))
|
||||
except Exception:
|
||||
storage = None
|
||||
|
||||
def _preflight_url_duplicate(candidate_url: str, extra_urls: Optional[Sequence[str]] = None) -> bool:
|
||||
# NOTE: download-media sets _quiet_background_output=True when running in a pipeline to
|
||||
# reduce background noise. URL de-dup is interactive and must still run in pipelines.
|
||||
if storage is None:
|
||||
debug("Preflight URL check skipped: storage unavailable")
|
||||
return True
|
||||
|
||||
debug(f"Preflight URL check: candidate={candidate_url}")
|
||||
|
||||
try:
|
||||
from metadata import normalize_urls
|
||||
except Exception:
|
||||
normalize_urls = None # type: ignore[assignment]
|
||||
|
||||
needles: List[str] = []
|
||||
if normalize_urls is not None:
|
||||
for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
|
||||
try:
|
||||
needles.extend(normalize_urls(raw))
|
||||
except Exception:
|
||||
continue
|
||||
# Fallback: always have at least one needle
|
||||
if not needles:
|
||||
needles = [str(candidate_url)]
|
||||
|
||||
# Deduplicate needles (preserve order)
|
||||
seen_needles: List[str] = []
|
||||
for needle in needles:
|
||||
if needle and needle not in seen_needles:
|
||||
seen_needles.append(needle)
|
||||
needles = seen_needles
|
||||
|
||||
try:
|
||||
debug(f"Preflight URL needles: {needles}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
url_matches: List[Dict[str, Any]] = []
|
||||
try:
|
||||
from Store.HydrusNetwork import HydrusNetwork
|
||||
|
||||
# Avoid searching the temp/download directory backend during dedup.
|
||||
# We only want to warn about duplicates in real stores.
|
||||
backend_names_all = storage.list_searchable_backends()
|
||||
backend_names: List[str] = []
|
||||
skipped: List[str] = []
|
||||
for backend_name in backend_names_all:
|
||||
try:
|
||||
backend = storage[backend_name]
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
try:
|
||||
if str(backend_name).strip().lower() == "temp":
|
||||
skipped.append(backend_name)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Heuristic: if a Folder backend points at the configured temp output dir, skip it.
|
||||
try:
|
||||
backend_location = getattr(backend, "_location", None)
|
||||
if backend_location and final_output_dir:
|
||||
backend_path = Path(str(backend_location)).expanduser().resolve()
|
||||
temp_path = Path(str(final_output_dir)).expanduser().resolve()
|
||||
if backend_path == temp_path:
|
||||
skipped.append(backend_name)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
backend_names.append(backend_name)
|
||||
|
||||
try:
|
||||
if skipped:
|
||||
debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
|
||||
else:
|
||||
debug(f"Preflight backends: {backend_names}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for backend_name in backend_names:
|
||||
backend = storage[backend_name]
|
||||
if isinstance(backend, HydrusNetwork) and not hydrus_available:
|
||||
continue
|
||||
|
||||
backend_hits: List[Dict[str, Any]] = []
|
||||
for needle in needles:
|
||||
try:
|
||||
backend_hits = backend.search(f"url:{needle}", limit=25) or []
|
||||
if backend_hits:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if backend_hits:
|
||||
url_matches.extend([dict(x) if isinstance(x, dict) else {"title": str(x)} for x in backend_hits])
|
||||
|
||||
if len(url_matches) >= 25:
|
||||
url_matches = url_matches[:25]
|
||||
break
|
||||
except Exception:
|
||||
url_matches = []
|
||||
|
||||
if not url_matches:
|
||||
debug("Preflight URL check: no matches")
|
||||
return True
|
||||
|
||||
table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
|
||||
results_list: List[Dict[str, Any]] = []
|
||||
for item in url_matches:
|
||||
if "title" not in item:
|
||||
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
|
||||
table.add_result(item)
|
||||
results_list.append(item)
|
||||
|
||||
pipeline_context.set_current_stage_table(table)
|
||||
pipeline_context.set_last_result_table(table, results_list)
|
||||
|
||||
print(f"\n{table}")
|
||||
response = input("Continue anyway? (y/n): ").strip().lower()
|
||||
if response not in {"y", "yes"}:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _canonicalize_url_for_storage(requested_url: str) -> str:
|
||||
# Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects).
|
||||
# Fall back to the requested URL if probing fails.
|
||||
# Important: when playlist item selection is used, avoid probing (can hang on large playlists).
|
||||
if playlist_items:
|
||||
return str(requested_url)
|
||||
try:
|
||||
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15)
|
||||
if isinstance(pr, dict):
|
||||
for key in ("webpage_url", "original_url", "url", "requested_url"):
|
||||
value = pr.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return str(requested_url)
|
||||
|
||||
# Check if we need to show format selection
|
||||
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
|
||||
ytdl_format = parsed.get("format")
|
||||
playlist_selection_handled = False
|
||||
|
||||
def _parse_at_selection(choice: str, *, max_index: int) -> Optional[List[int]]:
|
||||
"""Parse @ selection syntax (@2, @2-5, @{1,3,5}, @2,5,7) into 1-based indices."""
|
||||
raw = str(choice or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
if raw.lower() in {"q", "quit", "cancel"}:
|
||||
return None
|
||||
|
||||
if raw == "@*" or raw == "*":
|
||||
return list(range(1, max_index + 1))
|
||||
|
||||
if raw.startswith("@"):
|
||||
raw = raw[1:].strip()
|
||||
|
||||
if raw.startswith("{") and raw.endswith("}"):
|
||||
raw = raw[1:-1].strip()
|
||||
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
indices: set[int] = set()
|
||||
for part in raw.split(","):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
if "-" in part:
|
||||
left, right = [p.strip() for p in part.split("-", 1)]
|
||||
if not left or not right:
|
||||
return None
|
||||
try:
|
||||
start = int(left)
|
||||
end = int(right)
|
||||
except ValueError:
|
||||
return None
|
||||
if start < 1 or end < 1:
|
||||
return None
|
||||
if end < start:
|
||||
start, end = end, start
|
||||
for i in range(start, end + 1):
|
||||
if 1 <= i <= max_index:
|
||||
indices.add(i)
|
||||
else:
|
||||
try:
|
||||
i = int(part)
|
||||
except ValueError:
|
||||
return None
|
||||
if 1 <= i <= max_index:
|
||||
indices.add(i)
|
||||
if not indices:
|
||||
return None
|
||||
return sorted(indices)
|
||||
|
||||
def _maybe_prompt_playlist_items(url: str) -> Optional[Dict[str, Any]]:
|
||||
"""If URL appears to be a playlist/channel/collection, prompt user for @ selection.
|
||||
|
||||
Returns:
|
||||
- None if URL is not a playlist-like multi-entry page (or probe fails)
|
||||
- Dict with keys:
|
||||
- cancel: bool
|
||||
- playlist_items: Optional[str] (None means download all)
|
||||
- selected_urls: Optional[List[str]] (expanded per-entry urls when available)
|
||||
"""
|
||||
try:
|
||||
pr = probe_url(url, no_playlist=False, timeout_seconds=15)
|
||||
except Exception:
|
||||
pr = None
|
||||
if not isinstance(pr, dict):
|
||||
return None
|
||||
entries = pr.get("entries")
|
||||
if not isinstance(entries, list) or len(entries) <= 1:
|
||||
return None
|
||||
|
||||
# Display table (limit rows to keep output reasonable)
|
||||
max_rows = 200
|
||||
display_entries = entries[:max_rows]
|
||||
total = len(entries)
|
||||
|
||||
def _entry_to_url(entry: Any) -> Optional[str]:
|
||||
if not isinstance(entry, dict):
|
||||
return None
|
||||
# Prefer explicit absolute URLs when present
|
||||
for key in ("webpage_url", "original_url", "url"):
|
||||
v = entry.get(key)
|
||||
if isinstance(v, str) and v.strip():
|
||||
s = v.strip()
|
||||
try:
|
||||
if urlparse(s).scheme in {"http", "https"}:
|
||||
return s
|
||||
except Exception:
|
||||
return s
|
||||
|
||||
# Best-effort YouTube fallback from id
|
||||
entry_id = entry.get("id")
|
||||
if isinstance(entry_id, str) and entry_id.strip():
|
||||
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").lower()
|
||||
if "youtube" in extractor_name:
|
||||
return f"https://www.youtube.com/watch?v={entry_id.strip()}"
|
||||
return None
|
||||
|
||||
table = ResultTable()
|
||||
table.title = f"Playlist items ({total}{' shown ' + str(len(display_entries)) if total > max_rows else ''})"
|
||||
table.set_source_command("download-media", [url])
|
||||
try:
|
||||
table.set_preserve_order(True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results_list: List[Dict[str, Any]] = []
|
||||
for idx, entry in enumerate(display_entries, 1):
|
||||
title = None
|
||||
uploader = None
|
||||
duration = None
|
||||
try:
|
||||
if isinstance(entry, dict):
|
||||
title = entry.get("title")
|
||||
uploader = entry.get("uploader") or pr.get("uploader")
|
||||
duration = entry.get("duration")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
row: Dict[str, Any] = {
|
||||
"table": "download-media",
|
||||
"title": str(title or f"Item {idx}"),
|
||||
"detail": str(uploader or ""),
|
||||
"media_kind": "playlist-item",
|
||||
"playlist_index": idx,
|
||||
"columns": [
|
||||
("#", str(idx)),
|
||||
("Title", str(title or "")),
|
||||
("Duration", str(duration or "")),
|
||||
("Uploader", str(uploader or "")),
|
||||
],
|
||||
}
|
||||
results_list.append(row)
|
||||
table.add_result(row)
|
||||
|
||||
pipeline_context.set_current_stage_table(table)
|
||||
pipeline_context.set_last_result_table(table, results_list)
|
||||
|
||||
print(f"\n{table}")
|
||||
choice = input("Select items to download (@N, @2-5, @{1,3}, @*, or 'q' to cancel): ").strip()
|
||||
if not choice or choice.lower() in {"q", "quit", "cancel"}:
|
||||
return {"cancel": True, "playlist_items": None, "selected_urls": []}
|
||||
if choice.strip() == "@*" or choice.strip() == "*":
|
||||
# @* means all entries, not just displayed rows.
|
||||
selected_urls: List[str] = []
|
||||
for entry in entries:
|
||||
u = _entry_to_url(entry)
|
||||
if u and u not in selected_urls:
|
||||
selected_urls.append(u)
|
||||
# Only expand when we can derive URLs for all entries; otherwise fall back to yt-dlp playlist handling.
|
||||
if len(selected_urls) == len(entries):
|
||||
return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls}
|
||||
return {"cancel": False, "playlist_items": None, "selected_urls": []}
|
||||
|
||||
parsed_indices = _parse_at_selection(choice, max_index=len(display_entries))
|
||||
if not parsed_indices:
|
||||
log("Invalid selection. Use @N, @2-5, @{1,3}, or @*", file=sys.stderr)
|
||||
return {"cancel": True, "playlist_items": None, "selected_urls": []}
|
||||
|
||||
selected_urls: List[str] = []
|
||||
for i in parsed_indices:
|
||||
try:
|
||||
entry = display_entries[i - 1]
|
||||
except Exception:
|
||||
continue
|
||||
u = _entry_to_url(entry)
|
||||
if u and u not in selected_urls:
|
||||
selected_urls.append(u)
|
||||
|
||||
# If we can expand per-entry URLs, return them.
|
||||
if selected_urls and len(selected_urls) == len(parsed_indices):
|
||||
return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls}
|
||||
|
||||
# yt-dlp accepts comma-separated 1-based indices for playlist_items
|
||||
return {"cancel": False, "playlist_items": ",".join(str(i) for i in parsed_indices), "selected_urls": []}
|
||||
|
||||
# Playlist/multi-entry detection: if the URL has multiple items and the user didn't
|
||||
# specify -item, prompt for @ selection (supports @* for all).
|
||||
if len(supported_url) == 1 and not playlist_items and not ytdl_format:
|
||||
candidate_url = supported_url[0]
|
||||
selection_info = _maybe_prompt_playlist_items(candidate_url)
|
||||
if selection_info is not None:
|
||||
playlist_selection_handled = True
|
||||
if bool(selection_info.get("cancel")):
|
||||
return 0
|
||||
selected_urls = selection_info.get("selected_urls")
|
||||
if isinstance(selected_urls, list) and selected_urls:
|
||||
# Expand playlist/channel URL into per-entry URLs so that de-dup preflight
|
||||
# and downloads operate per file.
|
||||
supported_url = selected_urls
|
||||
playlist_items = None
|
||||
else:
|
||||
playlist_items = selection_info.get("playlist_items")
|
||||
|
||||
# If no -item, no explicit -format specified, and single URL, show the format table.
|
||||
# Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
|
||||
@@ -1232,8 +1585,15 @@ class Download_Media(Cmdlet):
|
||||
and not playlist_items
|
||||
and not ytdl_format
|
||||
and len(supported_url) == 1
|
||||
and not playlist_selection_handled
|
||||
):
|
||||
url = supported_url[0]
|
||||
|
||||
canonical_url = _canonicalize_url_for_storage(url)
|
||||
if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
|
||||
log(f"Skipping download: {url}", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
formats = list_formats(url, no_playlist=False)
|
||||
|
||||
if formats and len(formats) > 1:
|
||||
@@ -1379,12 +1739,18 @@ class Download_Media(Cmdlet):
|
||||
# Download each URL
|
||||
downloaded_count = 0
|
||||
clip_sections_spec = self._build_clip_sections_spec(clip_range)
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
|
||||
for url in supported_url:
|
||||
try:
|
||||
debug(f"Processing: {url}")
|
||||
|
||||
canonical_url = _canonicalize_url_for_storage(url)
|
||||
|
||||
# Preflight: warn if URL already exists in storage backends.
|
||||
if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
|
||||
log(f"Skipping download: {url}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# If playlist_items is specified but looks like a format ID (e.g. from table selection),
|
||||
# treat it as a format selector instead of playlist items.
|
||||
# This handles the case where @N selection passes -item <format_id>
|
||||
@@ -1532,24 +1898,17 @@ class Download_Media(Cmdlet):
|
||||
if title and f"title:{title}" not in tag:
|
||||
tag.insert(0, f"title:{title}")
|
||||
|
||||
# Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url,
|
||||
# but fall back to the original requested URL. If multiple unique urls are available,
|
||||
# join them into a comma-separated string.
|
||||
urls_to_consider: List[str] = []
|
||||
# Store the canonical URL for de-dup/search purposes.
|
||||
# Prefer yt-dlp's webpage_url, and do not mix in the raw requested URL (which may contain timestamps).
|
||||
final_url = None
|
||||
try:
|
||||
page_url = info.get("webpage_url") or info.get("url")
|
||||
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
||||
if page_url:
|
||||
urls_to_consider.append(str(page_url))
|
||||
final_url = str(page_url)
|
||||
except Exception:
|
||||
pass
|
||||
if url:
|
||||
urls_to_consider.append(str(url))
|
||||
|
||||
seen_urls: List[str] = []
|
||||
for u in urls_to_consider:
|
||||
if u and u not in seen_urls:
|
||||
seen_urls.append(u)
|
||||
final_url = ",".join(seen_urls) if seen_urls else None
|
||||
final_url = None
|
||||
if not final_url and url:
|
||||
final_url = str(url)
|
||||
|
||||
# Construct canonical PipeObject dict: hash, store, path, url, title, tags
|
||||
# Prefer explicit backend names (storage_name/storage_location). If none, default to PATH
|
||||
@@ -1561,6 +1920,7 @@ class Download_Media(Cmdlet):
|
||||
"url": final_url,
|
||||
"tag": tag,
|
||||
"action": "cmdlet:download-media",
|
||||
"is_temp": True,
|
||||
# download_mode removed (deprecated), keep media_kind
|
||||
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
|
||||
"media_kind": "video" if opts.mode == "video" else "audio",
|
||||
|
||||
@@ -184,6 +184,32 @@ class Get_Metadata(Cmdlet):
|
||||
mime_type = metadata.get("mime") or metadata.get("ext", "")
|
||||
file_size = metadata.get("size")
|
||||
duration_seconds = metadata.get("duration")
|
||||
if duration_seconds is None:
|
||||
duration_seconds = metadata.get("duration_seconds")
|
||||
if duration_seconds is None:
|
||||
duration_seconds = metadata.get("length")
|
||||
if duration_seconds is None and isinstance(metadata.get("duration_ms"), (int, float)):
|
||||
try:
|
||||
duration_seconds = float(metadata["duration_ms"]) / 1000.0
|
||||
except Exception:
|
||||
duration_seconds = None
|
||||
|
||||
if isinstance(duration_seconds, str):
|
||||
s = duration_seconds.strip()
|
||||
if s:
|
||||
try:
|
||||
duration_seconds = float(s)
|
||||
except ValueError:
|
||||
if ":" in s:
|
||||
parts = [p.strip() for p in s.split(":") if p.strip()]
|
||||
if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
|
||||
nums = [int(p) for p in parts]
|
||||
if len(nums) == 2:
|
||||
duration_seconds = float(nums[0] * 60 + nums[1])
|
||||
else:
|
||||
duration_seconds = float(nums[0] * 3600 + nums[1] * 60 + nums[2])
|
||||
else:
|
||||
duration_seconds = None
|
||||
pages = metadata.get("pages")
|
||||
url = metadata.get("url") or []
|
||||
imported_ts = self._extract_imported_ts(metadata)
|
||||
|
||||
@@ -12,7 +12,13 @@ from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
try:
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
|
||||
_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
|
||||
except Exception:
|
||||
_ol_scrape_isbn_metadata = None # type: ignore[assignment]
|
||||
_ol_scrape_openlibrary_metadata = None # type: ignore[assignment]
|
||||
from Provider.metadata_provider import get_metadata_provider, list_metadata_providers
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
@@ -31,6 +37,10 @@ except ImportError:
|
||||
extract_title = None
|
||||
|
||||
|
||||
_scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
|
||||
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -691,249 +701,22 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:
|
||||
|
||||
|
||||
def _scrape_isbn_metadata(isbn: str) -> List[str]:
|
||||
"""Scrape metadata for an ISBN using Open Library API."""
|
||||
new_tags = []
|
||||
if _ol_scrape_isbn_metadata is None:
|
||||
log("OpenLibrary scraper unavailable", file=sys.stderr)
|
||||
return []
|
||||
try:
|
||||
from ..API.HTTP import HTTPClient
|
||||
import json as json_module
|
||||
|
||||
isbn_clean = isbn.replace('-', '').strip()
|
||||
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
|
||||
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json_module.loads(response.content.decode('utf-8'))
|
||||
except Exception as e:
|
||||
log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not data:
|
||||
log(f"No ISBN metadata found for: {isbn}")
|
||||
return []
|
||||
|
||||
book_data = next(iter(data.values()), None)
|
||||
if not book_data:
|
||||
return []
|
||||
|
||||
if 'title' in book_data:
|
||||
new_tags.append(f"title:{book_data['title']}")
|
||||
|
||||
if 'authors' in book_data and isinstance(book_data['authors'], list):
|
||||
for author in book_data['authors'][:3]:
|
||||
if 'name' in author:
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
|
||||
if 'publish_date' in book_data:
|
||||
new_tags.append(f"publish_date:{book_data['publish_date']}")
|
||||
|
||||
if 'publishers' in book_data and isinstance(book_data['publishers'], list):
|
||||
for pub in book_data['publishers'][:1]:
|
||||
if 'name' in pub:
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
|
||||
if 'description' in book_data:
|
||||
desc = book_data['description']
|
||||
if isinstance(desc, dict) and 'value' in desc:
|
||||
desc = desc['value']
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
# Include description if available (limit to 200 chars to keep it manageable)
|
||||
if len(desc_str) > 0:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
if 'number_of_pages' in book_data:
|
||||
page_count = book_data['number_of_pages']
|
||||
if page_count and isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict):
|
||||
identifiers = book_data['identifiers']
|
||||
|
||||
if 'openlibrary' in identifiers:
|
||||
ol_ids = identifiers['openlibrary']
|
||||
if isinstance(ol_ids, list) and ol_ids:
|
||||
new_tags.append(f"openlibrary:{ol_ids[0]}")
|
||||
elif isinstance(ol_ids, str):
|
||||
new_tags.append(f"openlibrary:{ol_ids}")
|
||||
|
||||
if 'lccn' in identifiers:
|
||||
lccn_list = identifiers['lccn']
|
||||
if isinstance(lccn_list, list) and lccn_list:
|
||||
new_tags.append(f"lccn:{lccn_list[0]}")
|
||||
elif isinstance(lccn_list, str):
|
||||
new_tags.append(f"lccn:{lccn_list}")
|
||||
|
||||
if 'oclc' in identifiers:
|
||||
oclc_list = identifiers['oclc']
|
||||
if isinstance(oclc_list, list) and oclc_list:
|
||||
new_tags.append(f"oclc:{oclc_list[0]}")
|
||||
elif isinstance(oclc_list, str):
|
||||
new_tags.append(f"oclc:{oclc_list}")
|
||||
|
||||
if 'goodreads' in identifiers:
|
||||
goodreads_list = identifiers['goodreads']
|
||||
if isinstance(goodreads_list, list) and goodreads_list:
|
||||
new_tags.append(f"goodreads:{goodreads_list[0]}")
|
||||
elif isinstance(goodreads_list, str):
|
||||
new_tags.append(f"goodreads:{goodreads_list}")
|
||||
|
||||
if 'librarything' in identifiers:
|
||||
lt_list = identifiers['librarything']
|
||||
if isinstance(lt_list, list) and lt_list:
|
||||
new_tags.append(f"librarything:{lt_list[0]}")
|
||||
elif isinstance(lt_list, str):
|
||||
new_tags.append(f"librarything:{lt_list}")
|
||||
|
||||
if 'doi' in identifiers:
|
||||
doi_list = identifiers['doi']
|
||||
if isinstance(doi_list, list) and doi_list:
|
||||
new_tags.append(f"doi:{doi_list[0]}")
|
||||
elif isinstance(doi_list, str):
|
||||
new_tags.append(f"doi:{doi_list}")
|
||||
|
||||
if 'internet_archive' in identifiers:
|
||||
ia_list = identifiers['internet_archive']
|
||||
if isinstance(ia_list, list) and ia_list:
|
||||
new_tags.append(f"internet_archive:{ia_list[0]}")
|
||||
elif isinstance(ia_list, str):
|
||||
new_tags.append(f"internet_archive:{ia_list}")
|
||||
|
||||
log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
|
||||
return new_tags
|
||||
return list(_ol_scrape_isbn_metadata(isbn))
|
||||
except Exception as e:
|
||||
log(f"ISBN scraping error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def _scrape_openlibrary_metadata(olid: str) -> List[str]:
|
||||
"""Scrape metadata for an OpenLibrary ID using the .json API endpoint.
|
||||
|
||||
Fetches from https://openlibrary.org/books/{OLID}.json and extracts:
|
||||
- Title, authors, publish date, publishers
|
||||
- Description
|
||||
- Subjects as freeform tags (without namespace prefix)
|
||||
- Identifiers (ISBN, LCCN, OCLC, etc.)
|
||||
"""
|
||||
new_tags = []
|
||||
if _ol_scrape_openlibrary_metadata is None:
|
||||
log("OpenLibrary scraper unavailable", file=sys.stderr)
|
||||
return []
|
||||
try:
|
||||
from ..API.HTTP import HTTPClient
|
||||
import json as json_module
|
||||
|
||||
# Format: OL9674499M or just 9674499M
|
||||
olid_clean = olid.replace('OL', '').replace('M', '')
|
||||
if not olid_clean.isdigit():
|
||||
olid_clean = olid
|
||||
|
||||
# Ensure we have the full OLID format for the URL
|
||||
if not olid.startswith('OL'):
|
||||
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
|
||||
else:
|
||||
url = f"https://openlibrary.org/books/{olid}.json"
|
||||
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json_module.loads(response.content.decode('utf-8'))
|
||||
except Exception as e:
|
||||
log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not data:
|
||||
log(f"No OpenLibrary metadata found for: {olid}")
|
||||
return []
|
||||
|
||||
# Add title
|
||||
if 'title' in data:
|
||||
new_tags.append(f"title:{data['title']}")
|
||||
|
||||
# Add authors
|
||||
if 'authors' in data and isinstance(data['authors'], list):
|
||||
for author in data['authors'][:3]:
|
||||
if isinstance(author, dict) and 'name' in author:
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
elif isinstance(author, str):
|
||||
new_tags.append(f"author:{author}")
|
||||
|
||||
# Add publish date
|
||||
if 'publish_date' in data:
|
||||
new_tags.append(f"publish_date:{data['publish_date']}")
|
||||
|
||||
# Add publishers
|
||||
if 'publishers' in data and isinstance(data['publishers'], list):
|
||||
for pub in data['publishers'][:1]:
|
||||
if isinstance(pub, dict) and 'name' in pub:
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
elif isinstance(pub, str):
|
||||
new_tags.append(f"publisher:{pub}")
|
||||
|
||||
# Add description
|
||||
if 'description' in data:
|
||||
desc = data['description']
|
||||
if isinstance(desc, dict) and 'value' in desc:
|
||||
desc = desc['value']
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
if len(desc_str) > 0:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
# Add number of pages
|
||||
if 'number_of_pages' in data:
|
||||
page_count = data['number_of_pages']
|
||||
if page_count and isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
# Add subjects as FREEFORM tags (no namespace prefix)
|
||||
if 'subjects' in data and isinstance(data['subjects'], list):
|
||||
for subject in data['subjects'][:10]:
|
||||
if subject and isinstance(subject, str):
|
||||
subject_clean = str(subject).strip()
|
||||
if subject_clean and subject_clean not in new_tags:
|
||||
new_tags.append(subject_clean)
|
||||
|
||||
# Add identifiers
|
||||
if 'identifiers' in data and isinstance(data['identifiers'], dict):
|
||||
identifiers = data['identifiers']
|
||||
|
||||
if 'isbn_10' in identifiers:
|
||||
isbn_10_list = identifiers['isbn_10']
|
||||
if isinstance(isbn_10_list, list) and isbn_10_list:
|
||||
new_tags.append(f"isbn_10:{isbn_10_list[0]}")
|
||||
elif isinstance(isbn_10_list, str):
|
||||
new_tags.append(f"isbn_10:{isbn_10_list}")
|
||||
|
||||
if 'isbn_13' in identifiers:
|
||||
isbn_13_list = identifiers['isbn_13']
|
||||
if isinstance(isbn_13_list, list) and isbn_13_list:
|
||||
new_tags.append(f"isbn_13:{isbn_13_list[0]}")
|
||||
elif isinstance(isbn_13_list, str):
|
||||
new_tags.append(f"isbn_13:{isbn_13_list}")
|
||||
|
||||
if 'lccn' in identifiers:
|
||||
lccn_list = identifiers['lccn']
|
||||
if isinstance(lccn_list, list) and lccn_list:
|
||||
new_tags.append(f"lccn:{lccn_list[0]}")
|
||||
elif isinstance(lccn_list, str):
|
||||
new_tags.append(f"lccn:{lccn_list}")
|
||||
|
||||
if 'oclc_numbers' in identifiers:
|
||||
oclc_list = identifiers['oclc_numbers']
|
||||
if isinstance(oclc_list, list) and oclc_list:
|
||||
new_tags.append(f"oclc:{oclc_list[0]}")
|
||||
elif isinstance(oclc_list, str):
|
||||
new_tags.append(f"oclc:{oclc_list}")
|
||||
|
||||
if 'goodreads' in identifiers:
|
||||
goodreads_list = identifiers['goodreads']
|
||||
if isinstance(goodreads_list, list) and goodreads_list:
|
||||
new_tags.append(f"goodreads:{goodreads_list[0]}")
|
||||
elif isinstance(goodreads_list, str):
|
||||
new_tags.append(f"goodreads:{goodreads_list}")
|
||||
|
||||
log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
|
||||
return new_tags
|
||||
return list(_ol_scrape_openlibrary_metadata(olid))
|
||||
except Exception as e:
|
||||
log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
@@ -1,28 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Sequence
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from ._shared import Cmdlet, SharedArgs, parse_cmdlet_args, get_field, normalize_hash
|
||||
from SYS.logger import log
|
||||
from Store import Store
|
||||
|
||||
|
||||
@dataclass
|
||||
class UrlItem:
|
||||
url: str
|
||||
hash: str
|
||||
store: str
|
||||
|
||||
|
||||
class Get_Url(Cmdlet):
|
||||
"""Get url associated with files via hash+store."""
|
||||
|
||||
NAME = "get-url"
|
||||
SUMMARY = "List url associated with a file"
|
||||
USAGE = "@1 | get-url"
|
||||
ARGS = [
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
]
|
||||
DETAIL = [
|
||||
"- Lists all url associated with file identified by hash+store",
|
||||
]
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="get-url",
|
||||
summary="List url associated with a file",
|
||||
usage="@1 | get-url",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
],
|
||||
detail=[
|
||||
"- Lists all url associated with file identified by hash+store",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Get url for file via hash+store backend."""
|
||||
@@ -53,18 +65,34 @@ class Get_Url(Cmdlet):
|
||||
|
||||
urls = backend.get_url(file_hash)
|
||||
|
||||
if urls:
|
||||
for u in urls:
|
||||
# Emit rich object for pipeline compatibility
|
||||
ctx.emit({
|
||||
"url": u,
|
||||
"hash": file_hash,
|
||||
"store": store_name,
|
||||
})
|
||||
return 0
|
||||
else:
|
||||
ctx.emit("No url found")
|
||||
return 0
|
||||
from result_table import ResultTable
|
||||
|
||||
title = str(get_field(result, "title") or "").strip()
|
||||
table_title = "Title"
|
||||
if title:
|
||||
table_title = f"Title: {title}"
|
||||
|
||||
table = ResultTable(table_title, max_columns=1).set_preserve_order(True)
|
||||
table.set_source_command("get-url", [])
|
||||
|
||||
items: List[UrlItem] = []
|
||||
for u in list(urls or []):
|
||||
u = str(u or "").strip()
|
||||
if not u:
|
||||
continue
|
||||
row = table.add_row()
|
||||
row.add_column("Url", u)
|
||||
item = UrlItem(url=u, hash=file_hash, store=str(store_name))
|
||||
items.append(item)
|
||||
ctx.emit(item)
|
||||
|
||||
# Make this a real result table so @.. / @,, can navigate it
|
||||
ctx.set_last_result_table(table if items else None, items, subject=result)
|
||||
|
||||
if not items:
|
||||
log("No url found", file=sys.stderr)
|
||||
|
||||
return 0
|
||||
|
||||
except KeyError:
|
||||
log(f"Error: Storage backend '{store_name}' not configured")
|
||||
@@ -74,7 +102,6 @@ class Get_Url(Cmdlet):
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet
|
||||
register(["get-url", "get_url"])(Get_Url)
|
||||
CMDLET = Get_Url()
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import json
|
||||
@@ -11,57 +10,9 @@ import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
|
||||
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help, normalize_hash, first_title_tag
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusNetwork = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tag: str | None = None
|
||||
hash: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tag:
|
||||
payload["tag"] = self.tag
|
||||
if self.hash:
|
||||
payload["hash"] = self.hash
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
|
||||
|
||||
@@ -86,12 +37,15 @@ class Search_Store(Cmdlet):
|
||||
detail=[
|
||||
"Search across storage backends: Folder stores and Hydrus instances",
|
||||
"Use -store to search a specific backend by name",
|
||||
"URL search: url:* (any URL) or url:<value> (URL substring)",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results include hash for downstream commands (get-file, add-tag, etc.)",
|
||||
"Examples:",
|
||||
"search-store foo # Search all storage backends",
|
||||
"search-store -store home '*' # Search 'home' Hydrus instance",
|
||||
"search-store -store test 'video' # Search 'test' folder store",
|
||||
"search-store 'url:*' # Files that have any URL",
|
||||
"search-store 'url:youtube.com' # Files whose URL contains substring",
|
||||
"search-store song -type audio # Search for audio files",
|
||||
"search-store movie -tag action # Search with tag filter",
|
||||
],
|
||||
@@ -100,6 +54,40 @@ class Search_Store(Cmdlet):
|
||||
self.register()
|
||||
|
||||
# --- Helper methods -------------------------------------------------
|
||||
@staticmethod
|
||||
def _parse_hash_query(query: str) -> List[str]:
|
||||
"""Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
|
||||
|
||||
Supported examples:
|
||||
- hash:<h1>,<h2>,<h3>
|
||||
- Hash: <h1> <h2> <h3>
|
||||
- hash:{<h1>, <h2>}
|
||||
"""
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
|
||||
if not m:
|
||||
return []
|
||||
|
||||
rest = (m.group(1) or "").strip()
|
||||
if rest.startswith("{") and rest.endswith("}"):
|
||||
rest = rest[1:-1].strip()
|
||||
if rest.startswith("[") and rest.endswith("]"):
|
||||
rest = rest[1:-1].strip()
|
||||
|
||||
# Split on commas and whitespace.
|
||||
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
|
||||
out: List[str] = []
|
||||
for part in raw_parts:
|
||||
h = normalize_hash(part)
|
||||
if not h:
|
||||
continue
|
||||
if h not in out:
|
||||
out.append(h)
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
@@ -150,10 +138,10 @@ class Search_Store(Cmdlet):
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
_tag_filters: List[str] = []
|
||||
_size_filter: Optional[Tuple[str, int]] = None
|
||||
_duration_filter: Optional[Tuple[str, float]] = None
|
||||
_type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
limit = 100
|
||||
searched_backends: List[str] = []
|
||||
@@ -166,7 +154,7 @@ class Search_Store(Cmdlet):
|
||||
storage_backend = args_list[i + 1]
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
_tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
@@ -175,7 +163,7 @@ class Search_Store(Cmdlet):
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
_type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not arg.startswith("-"):
|
||||
query = f"{query} {arg}".strip() if query else arg
|
||||
@@ -195,6 +183,8 @@ class Search_Store(Cmdlet):
|
||||
if store_filter and not storage_backend:
|
||||
storage_backend = store_filter
|
||||
|
||||
hash_query = self._parse_hash_query(query)
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
@@ -230,12 +220,136 @@ class Search_Store(Cmdlet):
|
||||
table_title += f" [{storage_backend}]"
|
||||
|
||||
table = ResultTable(table_title)
|
||||
if hash_query:
|
||||
try:
|
||||
table.set_preserve_order(True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from Store import Store
|
||||
storage = Store(config=config or {})
|
||||
from Store._base import Store as BaseStore
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if hash_query:
|
||||
# Explicit hash list search: build rows from backend metadata.
|
||||
backends_to_try: List[str] = []
|
||||
if backend_to_search:
|
||||
backends_to_try = [backend_to_search]
|
||||
else:
|
||||
backends_to_try = list(storage.list_backends())
|
||||
|
||||
found_any = False
|
||||
for h in hash_query:
|
||||
resolved_backend_name: Optional[str] = None
|
||||
resolved_backend = None
|
||||
|
||||
for backend_name in backends_to_try:
|
||||
try:
|
||||
backend = storage[backend_name]
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
# If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL).
|
||||
meta = backend.get_metadata(h)
|
||||
if meta is None:
|
||||
continue
|
||||
resolved_backend_name = backend_name
|
||||
resolved_backend = backend
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if resolved_backend_name is None or resolved_backend is None:
|
||||
continue
|
||||
|
||||
found_any = True
|
||||
searched_backends.append(resolved_backend_name)
|
||||
|
||||
# Resolve a path/URL string if possible
|
||||
path_str: Optional[str] = None
|
||||
try:
|
||||
maybe_path = resolved_backend.get_file(h)
|
||||
if isinstance(maybe_path, Path):
|
||||
path_str = str(maybe_path)
|
||||
elif isinstance(maybe_path, str) and maybe_path:
|
||||
path_str = maybe_path
|
||||
except Exception:
|
||||
path_str = None
|
||||
|
||||
meta_obj: Dict[str, Any] = {}
|
||||
try:
|
||||
meta_obj = resolved_backend.get_metadata(h) or {}
|
||||
except Exception:
|
||||
meta_obj = {}
|
||||
|
||||
tags_list: List[str] = []
|
||||
try:
|
||||
tag_result = resolved_backend.get_tag(h)
|
||||
if isinstance(tag_result, tuple) and tag_result:
|
||||
maybe_tags = tag_result[0]
|
||||
else:
|
||||
maybe_tags = tag_result
|
||||
if isinstance(maybe_tags, list):
|
||||
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
|
||||
except Exception:
|
||||
tags_list = []
|
||||
|
||||
title_from_tag: Optional[str] = None
|
||||
try:
|
||||
title_tag = first_title_tag(tags_list)
|
||||
if title_tag and ":" in title_tag:
|
||||
title_from_tag = title_tag.split(":", 1)[1].strip()
|
||||
except Exception:
|
||||
title_from_tag = None
|
||||
|
||||
title = title_from_tag or meta_obj.get("title") or meta_obj.get("name")
|
||||
if not title and path_str:
|
||||
try:
|
||||
title = Path(path_str).stem
|
||||
except Exception:
|
||||
title = path_str
|
||||
|
||||
ext_val = meta_obj.get("ext") or meta_obj.get("extension")
|
||||
if not ext_val and path_str:
|
||||
try:
|
||||
ext_val = Path(path_str).suffix
|
||||
except Exception:
|
||||
ext_val = None
|
||||
|
||||
size_bytes = meta_obj.get("size")
|
||||
if size_bytes is None:
|
||||
size_bytes = meta_obj.get("size_bytes")
|
||||
try:
|
||||
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
|
||||
except Exception:
|
||||
size_bytes_int = None
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"title": str(title or h),
|
||||
"hash": h,
|
||||
"store": resolved_backend_name,
|
||||
"path": path_str,
|
||||
"ext": self._normalize_extension(ext_val),
|
||||
"size_bytes": size_bytes_int,
|
||||
"tag": tags_list,
|
||||
}
|
||||
|
||||
table.add_result(payload)
|
||||
results_list.append(payload)
|
||||
ctx.emit(payload)
|
||||
|
||||
if found_any:
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
if backend_to_search:
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend = storage[backend_to_search]
|
||||
@@ -243,7 +357,9 @@ class Search_Store(Cmdlet):
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
debug(f"[search-store] Searching '{backend_to_search}'")
|
||||
results = target_backend.search(query, limit=limit)
|
||||
debug(f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)")
|
||||
else:
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
@@ -257,7 +373,9 @@ class Search_Store(Cmdlet):
|
||||
continue
|
||||
searched_backends.append(backend_name)
|
||||
|
||||
debug(f"[search-store] Searching '{backend_name}'")
|
||||
backend_results = backend.search(query, limit=limit - len(all_results))
|
||||
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
@@ -317,11 +435,6 @@ class Search_Store(Cmdlet):
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
|
||||
# Debug: Verify table rows match items list
|
||||
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
|
||||
if len(table.rows) != len(results_list):
|
||||
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user