updating and refactoring codebase for improved performance and maintainability

This commit is contained in:
2026-05-03 17:29:32 -07:00
parent b7d3dc5f2d
commit 77cab1bd27
17 changed files with 590 additions and 294 deletions
+82 -22
View File
@@ -40,11 +40,58 @@ build_pipeline_preview = sh.build_pipeline_preview
get_field = sh.get_field
from SYS.utils import sha256_file, unique_path, sanitize_filename
from SYS.metadata import write_metadata
# Canonical supported filetypes for all stores/cmdlets
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
class _CommandDependencies:
"""Command-scope cache for Store and plugin instances to avoid repeated instantiation."""
def __init__(self, config: Dict[str, Any]) -> None:
self.config = config
self._store: Optional[Store] = None
self._plugins: Dict[str, Any] = {}
def get_store(self) -> Optional[Store]:
"""Lazily initialize and return the command-scope Store instance."""
if self._store is None:
try:
self._store = Store(self.config)
except Exception:
self._store = None
return self._store
def get_plugin(self, name: str) -> Optional[Any]:
"""Cached plugin lookup by name."""
from ProviderCore.registry import get_plugin
norm_name = str(name or "").strip().lower()
if not norm_name:
return None
if norm_name in self._plugins:
return self._plugins[norm_name]
plugin = get_plugin(norm_name, self.config)
self._plugins[norm_name] = plugin
return plugin
def get_plugin_with_capability(self, name: str, capability: str) -> Optional[Any]:
"""Cached plugin lookup with capability check."""
from ProviderCore.registry import get_plugin_with_capability
norm_name = str(name or "").strip().lower()
if not norm_name:
return None
cache_key = f"{norm_name}#{capability}"
if cache_key in self._plugins:
return self._plugins[cache_key]
plugin = get_plugin_with_capability(norm_name, capability, self.config)
self._plugins[cache_key] = plugin
return plugin
DEBUG_PIPE_NOTE_PREVIEW_LENGTH = 256
# Protocol schemes that identify a remote resource / not a local file path.
@@ -220,11 +267,9 @@ class Add_File(Cmdlet):
parsed = parse_cmdlet_args(args, self)
progress = PipelineProgress(ctx)
# Initialize Store for backend resolution
try:
storage_registry = Store(config)
except Exception:
storage_registry = None
# Initialize command-scope dependency context (caches Store/plugins)
deps = _CommandDependencies(config)
storage_registry = deps.get_store()
path_arg = parsed.get("path")
location = parsed.get("store")
@@ -348,7 +393,7 @@ class Add_File(Cmdlet):
is_storage_backend_location = False
if location:
try:
store_for_lookup = storage_registry or Store(config)
store_for_lookup = storage_registry or deps.get_store()
is_storage_backend_location = Add_File._resolve_backend_by_name(store_for_lookup, str(location)) is not None
except Exception:
is_storage_backend_location = False
@@ -368,6 +413,7 @@ class Add_File(Cmdlet):
plugin_instance,
config,
store_instance=storage_registry,
deps=deps,
)
effective_storage_backend_name = plugin_storage_backend or (
@@ -629,10 +675,11 @@ class Add_File(Cmdlet):
config,
export_destination=(Path(location) if location and not is_storage_backend_location else None),
store_instance=storage_registry,
deps=deps,
)
if not media_path and plugin_name:
media_path, file_hash, temp_dir_to_cleanup = Add_File._download_piped_source(
pipe_obj, config, storage_registry
pipe_obj, config, storage_registry, deps=deps
)
if media_path:
try:
@@ -702,7 +749,7 @@ class Add_File(Cmdlet):
if location:
try:
store = storage_registry or Store(config)
store = storage_registry or deps.get_store()
resolved_backend = Add_File._resolve_backend_by_name(store, str(location))
if resolved_backend is not None:
code = self._handle_storage_backend(
@@ -833,7 +880,8 @@ class Add_File(Cmdlet):
Add_File._apply_pending_relationships(
pending_relationship_pairs,
config,
store_instance=storage_registry
store_instance=storage_registry,
deps=deps
)
except Exception:
pass
@@ -1063,6 +1111,7 @@ class Add_File(Cmdlet):
config: Dict[str,
Any],
store_instance: Optional[Store] = None,
deps: Optional[_CommandDependencies] = None,
) -> None:
"""Persist relationships to backends that support relationships.
@@ -1071,8 +1120,11 @@ class Add_File(Cmdlet):
if not pending:
return
if deps is None:
deps = _CommandDependencies(config)
try:
store = store_instance if store_instance is not None else Store(config)
store = store_instance if store_instance is not None else deps.get_store()
except Exception:
return
@@ -1343,6 +1395,7 @@ class Add_File(Cmdlet):
Any],
export_destination: Optional[Path] = None,
store_instance: Optional[Any] = None,
deps: Optional[_CommandDependencies] = None,
) -> Tuple[Optional[Path],
Optional[str],
Optional[Path]]:
@@ -1371,9 +1424,9 @@ class Add_File(Cmdlet):
if r_hash and r_store:
try:
store = store_instance
if not store:
store = Store(config)
if deps is None:
deps = _CommandDependencies(config)
store = store_instance or deps.get_store()
backend = Add_File._resolve_backend_by_name(store, r_store)
if backend is not None:
@@ -1441,6 +1494,7 @@ class Add_File(Cmdlet):
result,
pipe_obj,
config,
deps=deps,
)
if downloaded_path:
pipe_obj.path = str(downloaded_path)
@@ -1471,14 +1525,16 @@ class Add_File(Cmdlet):
config: Dict[str, Any],
*,
store_instance: Optional[Any] = None,
deps: Optional[_CommandDependencies] = None,
) -> Optional[str]:
plugin_key = Add_File._normalize_provider_key(plugin_name)
if not plugin_key:
return None
from ProviderCore.registry import get_plugin_with_capability
if deps is None:
deps = _CommandDependencies(config)
file_provider = get_plugin_with_capability(plugin_key, "upload", config)
file_provider = deps.get_plugin_with_capability(plugin_key, "upload")
if file_provider is None:
return None
@@ -1528,6 +1584,7 @@ class Add_File(Cmdlet):
result: Any,
pipe_obj: models.PipeObject,
config: Dict[str, Any],
deps: Optional[_CommandDependencies] = None,
) -> Tuple[Optional[Path], Optional[str], Optional[Path]]:
plugin_key = None
for source in (
@@ -1544,9 +1601,10 @@ class Add_File(Cmdlet):
if not plugin_key:
return None, None, None
from ProviderCore.registry import get_plugin
if deps is None:
deps = _CommandDependencies(config)
plugin = get_plugin(plugin_key, config)
plugin = deps.get_plugin(plugin_key)
if plugin is None:
return None, None, None
@@ -1562,16 +1620,17 @@ class Add_File(Cmdlet):
pipe_obj: models.PipeObject,
config: Dict[str, Any],
store_instance: Optional[Any],
deps: Optional[_CommandDependencies] = None,
) -> Tuple[Optional[Path], Optional[str], Optional[Path]]:
r_hash = str(getattr(pipe_obj, "hash", None) or getattr(pipe_obj, "file_hash", None) or "").strip()
r_store = str(getattr(pipe_obj, "store", None) or "").strip()
if not (r_hash and r_store):
return None, None, None
try:
store = store_instance or Store(config)
except Exception:
store = None
if deps is None:
deps = _CommandDependencies(config)
store = store_instance or deps.get_store()
backend = Add_File._resolve_backend_by_name(store, r_store) if store is not None else None
if backend is None:
return None, None, None
@@ -2244,6 +2303,7 @@ class Add_File(Cmdlet):
relationships = Add_File._get_relationships(result, pipe_obj)
try:
write_sidecar(target_path, tags, url, f_hash)
from SYS.metadata import write_metadata # lazy: avoids 1000+ module chain at startup
write_metadata(
target_path,
hash_value=f_hash,
+3 -1
View File
@@ -24,7 +24,8 @@ from SYS.pipeline_progress import PipelineProgress
from SYS.result_table import Table
from SYS.rich_display import stderr_console as get_stderr_console
from SYS import pipeline as pipeline_context
from SYS.metadata import normalize_urls as normalize_url_list
# SYS.metadata import deferred: normalize_urls loaded lazily at call site to avoid
# pulling in Cryptodome (~900ms) at module import time.
from SYS.selection_builder import (
extract_selection_fields,
extract_urls_from_selection_args,
@@ -1226,6 +1227,7 @@ class Download_File(Cmdlet):
and not a.startswith("-")
)
]
from SYS.metadata import normalize_urls as normalize_url_list # lazy: avoids Cryptodome at startup
raw_url = normalize_url_list(url_candidates)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
+28 -31
View File
@@ -14,14 +14,20 @@ import sys
from SYS.logger import log, debug
from plugins.metadata_provider import (
get_default_subject_scrape_plugin,
get_metadata_plugin,
get_metadata_plugin_for_url,
list_metadata_plugins,
scrape_isbn_metadata,
scrape_openlibrary_metadata,
)
# plugins.metadata_provider is deferred: it transitively loads yt_dlp, Cryptodome,
# imdbinfo, musicbrainzngs and ~1400 modules (~1.5s). Import lazily on first use.
_METADATA_PROVIDER_MOD: Optional[Any] = None
def _mp() -> Any:
"""Return the (lazily imported) plugins.metadata_provider module."""
global _METADATA_PROVIDER_MOD
if _METADATA_PROVIDER_MOD is None:
import plugins.metadata_provider as _m
_METADATA_PROVIDER_MOD = _m
return _METADATA_PROVIDER_MOD
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
@@ -41,11 +47,6 @@ CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
try:
from SYS.metadata import extract_title
except ImportError:
extract_title = None
def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
"""Deduplicate tags case-insensitively while preserving order."""
@@ -210,7 +211,7 @@ def _extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]:
def _scrape_openlibrary_metadata(olid: str) -> List[str]:
try:
return list(scrape_openlibrary_metadata(olid))
return list(_mp().scrape_openlibrary_metadata(olid))
except Exception as e:
log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
return []
@@ -218,7 +219,7 @@ def _scrape_openlibrary_metadata(olid: str) -> List[str]:
def _scrape_isbn_metadata(isbn: str) -> List[str]:
try:
return list(scrape_isbn_metadata(isbn))
return list(_mp().scrape_isbn_metadata(isbn))
except Exception as e:
log(f"ISBN scraping error: {e}", file=sys.stderr)
return []
@@ -400,7 +401,7 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_target = str(scrape_url or "").strip() if scrape_url is not None else ""
plugin = None
if scrape_target.startswith(("http://", "https://")):
plugin = get_metadata_plugin_for_url(scrape_target, config)
plugin = _mp().get_metadata_plugin_for_url(scrape_target, config)
if plugin is None:
log("No metadata plugin can scrape this URL", file=sys.stderr)
return 1
@@ -412,9 +413,9 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
if scrape_target:
plugin = get_metadata_plugin(scrape_target, config)
plugin = _mp().get_metadata_plugin(scrape_target, config)
else:
plugin = get_default_subject_scrape_plugin(config)
plugin = _mp().get_default_subject_scrape_plugin(config)
if plugin is None:
if scrape_target:
log(f"Unknown metadata plugin: {scrape_target}", file=sys.stderr)
@@ -749,7 +750,7 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
return 0
plugin_for_apply = get_metadata_plugin(str(result_provider), config)
plugin_for_apply = _mp().get_metadata_plugin(str(result_provider), config)
if plugin_for_apply is not None:
apply_tags = plugin_for_apply.filter_tags_for_store_apply(
[str(t) for t in result_tags if t is not None]
@@ -944,18 +945,14 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
_SCRAPE_CHOICES = []
try:
_SCRAPE_CHOICES = sorted(list_metadata_plugins().keys())
except Exception:
_SCRAPE_CHOICES = [
"itunes",
"openlibrary",
"googlebooks",
"google",
"musicbrainz",
"imdb",
]
_SCRAPE_CHOICES = [
"itunes",
"openlibrary",
"googlebooks",
"google",
"musicbrainz",
"imdb",
]
class Get_Tag(Cmdlet):
+33 -33
View File
@@ -40,49 +40,49 @@ except ImportError:
PdfWriter = None
PdfReader = None
try:
from SYS.metadata import (
read_tags_from_file,
merge_multiple_tag_lists,
)
# Stub fallbacks used before SYS.metadata is lazily imported (or if unavailable).
HAS_METADATA_API: bool = False
_metadata_loaded: bool = False
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
def read_tags_from_file(file_path: Path) -> List[str]:
return []
def read_tags_from_file(file_path: Path) -> List[str]:
return []
def write_tags_to_file(
file_path: Path,
tags: List[str],
source_hashes: Optional[List[str]] = None,
url: Optional[List[str]] = None,
append: bool = False,
) -> bool:
return False
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
return tags
def merge_multiple_tag_lists(sources: List[List[str]],
strategy: str = "first") -> List[str]:
out: List[str] = []
seen: set[str] = set()
for src in sources:
for t in src or []:
s = str(t)
if s and s not in seen:
out.append(s)
seen.add(s)
return out
def merge_multiple_tag_lists(sources: List[List[str]],
strategy: str = "first") -> List[str]:
out: List[str] = []
seen: set[str] = set()
for src in sources:
for t in src or []:
s = str(t)
if s and s not in seen:
out.append(s)
seen.add(s)
return out
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
return None
def _ensure_metadata_imports() -> None:
"""Lazily import SYS.metadata to avoid loading Cryptodome (~1s) at startup."""
global _metadata_loaded, HAS_METADATA_API, read_tags_from_file, merge_multiple_tag_lists
if _metadata_loaded:
return
_metadata_loaded = True
try:
from SYS.metadata import ( # type: ignore[assignment]
read_tags_from_file as _rtf,
merge_multiple_tag_lists as _mml,
)
read_tags_from_file = _rtf # type: ignore[assignment]
merge_multiple_tag_lists = _mml # type: ignore[assignment]
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Merge multiple files into one."""
_ensure_metadata_imports()
# Parse help
if should_show_help(args):