dfslkjelf

2025-12-18 22:50:21 -08:00
parent 76691dbbf5
commit d637532237
16 changed files with 2587 additions and 299 deletions
--- a/cmdlet/delete_tag.py
+++ b/cmdlet/delete_tag.py
@@ -94,15 +94,22 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
        return 0
    
-    # Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
-    has_piped_tag = (result and hasattr(result, '__class__') and 
-                     result.__class__.__name__ == 'TagItem' and 
-                     hasattr(result, 'tag_name'))
-    
-    # Check if we have a piped list of TagItems (from @N selection)
-    has_piped_tag_list = (isinstance(result, list) and result and 
-                          hasattr(result[0], '__class__') and 
-                          result[0].__class__.__name__ == 'TagItem')
+    def _looks_like_tag_row(obj: Any) -> bool:
+        if obj is None:
+            return False
+        # TagItem (direct) or PipeObject/dict emitted from get-tag table rows.
+        try:
+            if hasattr(obj, '__class__') and obj.__class__.__name__ == 'TagItem' and hasattr(obj, 'tag_name'):
+                return True
+        except Exception:
+            pass
+        try:
+            return bool(get_field(obj, 'tag_name'))
+        except Exception:
+            return False
+
+    has_piped_tag = _looks_like_tag_row(result)
+    has_piped_tag_list = isinstance(result, list) and bool(result) and _looks_like_tag_row(result[0])

    if not args and not has_piped_tag and not has_piped_tag_list:
        log("Requires at least one tag argument")
@@ -195,9 +202,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # If we have TagItems and no args, we are deleting the tags themselves
    # If we have Files (or other objects) and args, we are deleting tags FROM those files
    
-    # Check if we are in "delete selected tags" mode (TagItems)
-    is_tag_item_mode = (items_to_process and hasattr(items_to_process[0], '__class__') and 
-                        items_to_process[0].__class__.__name__ == 'TagItem')
+    # Check if we are in "delete selected tags" mode (tag rows)
+    is_tag_item_mode = bool(items_to_process) and _looks_like_tag_row(items_to_process[0])
    
    if is_tag_item_mode:
        # Collect all tags to delete from the TagItems
@@ -248,8 +254,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            )
            item_store = override_store or get_field(item, "store")
            
-            if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem':
-                # It's a TagItem
+            if _looks_like_tag_row(item):
+                # It's a tag row (TagItem or PipeObject/dict with tag_name)
                if tags_arg:
                    # User provided tags to delete FROM this file (ignoring the tag name in the item?)
                    # Or maybe they want to delete the tag in the item AND the args?
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
@@ -36,11 +36,13 @@ class Download_File(Cmdlet):
        super().__init__(
            name="download-file",
            summary="Download files via HTTP or provider handlers",
-            usage="download-file <url> [options] OR @N | download-file [options]",
+            usage="download-file <url> [-path DIR] [options] OR @N | download-file [-path DIR] [options]",
            alias=["dl-file", "download-http"],
            arg=[
-                CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"),
                SharedArgs.URL,
+                SharedArgs.PATH,
+                # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
+                CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
                
            ],
            detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
@@ -50,10 +52,6 @@ class Download_File(Cmdlet):

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution method."""
-        stage_ctx = pipeline_context.get_stage_context()
-        in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
-        if in_pipeline and isinstance(config, dict):
-            config["_quiet_background_output"] = True
        return self._run_impl(result, args, config)

    def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
@@ -169,7 +167,47 @@ class Download_File(Cmdlet):
                    log(f"Error processing {url}: {e}", file=sys.stderr)

            # 2) Provider item downloads (piped results)
+            # Expand provider "folder" rows into their contained files when possible (e.g., AllDebrid magnets).
+            expanded_items: List[Any] = []
            for item in piped_items:
+                try:
+                    table = get_field(item, "table")
+                    media_kind = get_field(item, "media_kind")
+                    full_metadata = get_field(item, "full_metadata")
+                    target = get_field(item, "path") or get_field(item, "url")
+
+                    if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder":
+                        magnet_id = None
+                        if isinstance(full_metadata, dict):
+                            magnet_id = full_metadata.get("magnet_id")
+                        if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"):
+                            try:
+                                magnet_id = int(target.split(":")[-1])
+                            except Exception:
+                                magnet_id = None
+
+                        if magnet_id is not None and get_search_provider is not None:
+                            provider = get_search_provider("alldebrid", config)
+                            if provider is not None:
+                                try:
+                                    files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)})
+                                except Exception:
+                                    files = []
+
+                                # If the magnet isn't ready, provider.search returns a single not-ready folder row.
+                                if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder":
+                                    detail = getattr(files[0], "detail", "")
+                                    log(f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})", file=sys.stderr)
+                                else:
+                                    for sr in files:
+                                        expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr)
+                                    continue
+
+                    expanded_items.append(item)
+                except Exception:
+                    expanded_items.append(item)
+
+            for item in expanded_items:
                try:
                    table = get_field(item, "table")
                    title = get_field(item, "title")
@@ -226,8 +264,12 @@ class Download_File(Cmdlet):
                                from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
                                # Use plain title text (LibGen mirrors can be finicky with fielded query prefixes).
                                fallback_query = title_text
+                                exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
+                                if not callable(exec_fn):
+                                    log("[download-file] search-provider cmdlet unavailable; cannot run LibGen fallback search", file=sys.stderr)
+                                    continue

-                                ret = _SEARCH_PROVIDER_CMDLET.exec(
+                                ret = exec_fn(
                                    None,
                                    ["-provider", "libgen", "-query", fallback_query],
                                    config,
@@ -243,7 +285,10 @@ class Download_File(Cmdlet):
                                except Exception:
                                    pass

-                                return int(ret)
+                                    try:
+                                        return int(ret)  # type: ignore[arg-type]
+                                    except Exception:
+                                        return 1
                        except Exception:
                            pass

@@ -259,7 +304,14 @@ class Download_File(Cmdlet):
                                log("[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", file=sys.stderr)
                                continue
                        debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
-                        result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
+                        # Use provider title as filename hint so multiple items don't overwrite as downloaded_file.bin
+                        suggested_name = str(title).strip() if title is not None else None
+                        result_obj = _download_direct_file(
+                            target,
+                            final_output_dir,
+                            quiet=quiet_mode,
+                            suggested_filename=suggested_name,
+                        )
                        file_path = None
                        if hasattr(result_obj, "path"):
                            file_path = getattr(result_obj, "path")
@@ -301,7 +353,7 @@ class Download_File(Cmdlet):

    def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
        """Resolve the output directory from storage location or config."""
-        output_dir_arg = parsed.get("output")
+        output_dir_arg = parsed.get("path") or parsed.get("output")
        if output_dir_arg:
            try:
                out_path = Path(str(output_dir_arg)).expanduser()
--- a/cmdlet/download_media.py
+++ b/cmdlet/download_media.py
@@ -12,6 +12,7 @@ Focused cmdlet for video/audio downloads from yt-dlp-supported sites:
 from __future__ import annotations

 import sys
+import os
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence

@@ -430,10 +431,29 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
        "fragment_retries": 10,
        "http_chunk_size": 10_485_760,
        "restrictfilenames": True,
-        # Always show a progress indicator; do not tie it to debug logging.
-        "progress_hooks": [_progress_callback],
    }

+    # Prefer the bundled ffmpeg shipped with the repo (used for merges/remux/postproc).
+    try:
+        repo_root = Path(__file__).resolve().parents[1]
+        bundled_ffmpeg_dir = repo_root / "MPV" / "ffmpeg" / "bin"
+        if bundled_ffmpeg_dir.exists():
+            base_options.setdefault("ffmpeg_location", str(bundled_ffmpeg_dir))
+    except Exception:
+        pass
+
+    # On Windows, AV/indexers can transiently lock files at the end of a download.
+    # yt-dlp uses file_access_retries for renames (e.g. .part -> final). Default is low.
+    try:
+        if os.name == "nt":
+            base_options.setdefault("file_access_retries", 40)
+    except Exception:
+        pass
+
+    # Avoid writing progress bars when running in quiet/background mode (e.g. mpv detached pipelines).
+    if not getattr(opts, "quiet", False):
+        base_options["progress_hooks"] = [_progress_callback]
+
    if opts.cookies_path and opts.cookies_path.is_file():
        base_options["cookiefile"] = str(opts.cookies_path)

--- a/cmdlet/get_tag.py
+++ b/cmdlet/get_tag.py
@@ -12,6 +12,8 @@ from __future__ import annotations

 import sys

+from SYS.logger import log, debug
+
 try:
 	from Provider.openlibrary import OpenLibrary
 	_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
@@ -94,7 +96,7 @@ def _emit_tags_as_table(
 	file_hash: Optional[str],
 	store: str = "hydrus",
 	service_name: Optional[str] = None,
-	config: Dict[str, Any] = None,
+	config: Optional[Dict[str, Any]] = None,
 	item_title: Optional[str] = None,
 	path: Optional[str] = None,
 	subject: Optional[Any] = None,
@@ -107,11 +109,10 @@ def _emit_tags_as_table(
 	from result_table import ResultTable
 	
 	# Create ResultTable with just tag column (no title)
-	table_title = "Tag"
+	# Keep the title stable and avoid including hash fragments.
+	table_title = "tag"
 	if item_title:
-		table_title = f"Tag: {item_title}"
-		if file_hash:
-			table_title += f" [{file_hash[:8]}]"
+		table_title = f"tag: {item_title}"
 			
 	table = ResultTable(table_title, max_columns=1)
 	table.set_source_command("get-tag", [])
@@ -140,6 +141,28 @@ def _emit_tags_as_table(
 	except AttributeError:
 		ctx.set_last_result_table(table, tag_items, subject)
 	# Note: CLI will handle displaying the table via ResultTable formatting
+
+
+def _filter_scraped_tags(tags: List[str]) -> List[str]:
+	"""Filter out tags we don't want to import from scraping."""
+	blocked = {"title", "artist", "source"}
+	out: List[str] = []
+	seen: set[str] = set()
+	for t in tags:
+		if not t:
+			continue
+		s = str(t).strip()
+		if not s:
+			continue
+		ns = s.split(":", 1)[0].strip().lower() if ":" in s else ""
+		if ns in blocked:
+			continue
+		key = s.lower()
+		if key in seen:
+			continue
+		seen.add(key)
+		out.append(s)
+	return out
 def _summarize_tags(tags_list: List[str], limit: int = 8) -> str:
 	"""Create a summary of tags for display."""
 	shown = [t for t in tags_list[:limit] if t]
@@ -865,14 +888,32 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
 			return 1
 		
-		# Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename
+		# Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename.
+		# IMPORTANT: do not rely on `result.tag` for this because it can be stale (cached on
+		# the piped PipeObject). Always prefer the current store-backed tags when possible.
 		identifier_tags: List[str] = []
-		result_tags = get_field(result, "tag", None)
-		if isinstance(result_tags, list):
-			identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))]
-		
-		# Try local sidecar if no tags present on result
+		file_hash_for_scrape = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
+		store_for_scrape = get_field(result, "store", None)
+		if file_hash_for_scrape and store_for_scrape:
+			try:
+				from Store import Store
+				storage = Store(config)
+				backend = storage[str(store_for_scrape)]
+				current_tags, _src = backend.get_tag(file_hash_for_scrape, config=config)
+				if isinstance(current_tags, (list, tuple, set)) and current_tags:
+					identifier_tags = [str(t) for t in current_tags if isinstance(t, (str, bytes))]
+			except Exception:
+				# Fall back to whatever is present on the piped result if store lookup fails.
+				pass
+
+		# Fall back to tags carried on the result (may be stale).
 		if not identifier_tags:
+			result_tags = get_field(result, "tag", None)
+			if isinstance(result_tags, list):
+				identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))]
+		
+		# As a last resort, try local sidecar only when the item is not store-backed.
+		if not identifier_tags and (not file_hash_for_scrape or not store_for_scrape):
 			file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "filename", None)
 			if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")):
 				try:
@@ -939,8 +980,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		selection_payload = []
 		hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
 		store_for_payload = get_field(result, "store", None)
+		# Preserve a consistent path field when present so selecting a metadata row
+		# keeps referring to the original file.
+		path_for_payload = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
 		for idx, item in enumerate(items):
-			tags = provider.to_tags(item)
+			tags = _filter_scraped_tags(provider.to_tags(item))
 			row = table.add_row()
 			row.add_column("Title", item.get("title", ""))
 			row.add_column("Artist", item.get("artist", ""))
@@ -955,6 +999,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 				"year": item.get("year"),
 				"hash": hash_for_payload,
 				"store": store_for_payload,
+				"path": path_for_payload,
 				"extra": {
 					"tag": tags,
 					"provider": provider.name,
@@ -967,7 +1012,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		ctx.set_current_stage_table(table)
 		# Preserve items for @ selection and downstream pipes without emitting duplicates
 		ctx.set_last_result_items_only(selection_payload)
-		print(table)
 		return 0
 	
 	# If -scrape was requested but no URL, that's an error
@@ -978,6 +1022,70 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	# Handle @N selection which creates a list - extract the first item
 	if isinstance(result, list) and len(result) > 0:
 		result = result[0]
+
+	# If the current result already carries a tag list (e.g. a selected metadata
+	# row from get-tag -scrape itunes), APPLY those tags to the file in the store.
+	result_provider = get_field(result, "provider", None)
+	result_tags = get_field(result, "tag", None)
+	if result_provider and isinstance(result_tags, list) and result_tags:
+		file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
+		store_name = get_field(result, "store", None)
+		subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
+		if not file_hash or not store_name:
+			log("Selected metadata row is missing hash/store; cannot apply tags", file=sys.stderr)
+			_emit_tags_as_table(
+				tags_list=[str(t) for t in result_tags if t is not None],
+				file_hash=file_hash,
+				store=str(store_name or "local"),
+				service_name=None,
+				config=config,
+				item_title=str(get_field(result, "title", None) or result_provider),
+				path=str(subject_path) if subject_path else None,
+				subject=result,
+			)
+			return 0
+
+		# Apply tags to the store backend (no sidecar writing here).
+		apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
+		if not apply_tags:
+			log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
+			return 0
+		try:
+			from Store import Store
+			storage = Store(config)
+			backend = storage[str(store_name)]
+			ok = bool(backend.add_tag(file_hash, apply_tags, config=config))
+			if not ok:
+				log(f"Failed to apply tags to store '{store_name}'", file=sys.stderr)
+		except Exception as exc:
+			log(f"Failed to apply tags: {exc}", file=sys.stderr)
+			return 1
+
+		# Show updated tags after applying.
+		try:
+			updated_tags, _src = backend.get_tag(file_hash, config=config)
+		except Exception:
+			updated_tags = apply_tags
+		if not updated_tags:
+			updated_tags = apply_tags
+
+		_emit_tags_as_table(
+			tags_list=list(updated_tags),
+			file_hash=file_hash,
+			store=str(store_name),
+			service_name=None,
+			config=config,
+			item_title=str(get_field(result, "title", None) or get_field(result, "name", None) or str(result_provider)),
+			path=str(subject_path) if subject_path else None,
+			subject={
+				"hash": file_hash,
+				"store": str(store_name),
+				"path": str(subject_path) if subject_path else None,
+				"title": get_field(result, "title", None) or get_field(result, "name", None),
+				"extra": {"applied_provider": str(result_provider)},
+			},
+		)
+		return 0
 	
 	hash_from_result = normalize_hash(get_field(result, "hash", None))
 	file_hash = hash_override or hash_from_result
@@ -1022,6 +1130,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:

 	# Build a subject payload representing the file whose tags are being shown
 	subject_store = get_field(result, "store", None) or store_name
+	subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
 	subject_payload: Dict[str, Any] = {
 		"tag": list(current),
 		"title": item_title,
@@ -1034,12 +1143,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	}
 	if file_hash:
 		subject_payload["hash"] = file_hash
-	if local_path:
+	if subject_path:
 		try:
-			path_text = str(local_path)
-			subject_payload.update({
-				"path": path_text,
-			})
+			subject_payload["path"] = str(subject_path)
 		except Exception:
 			pass
 	
@@ -1050,7 +1156,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		service_name=service_name if source == "hydrus" else None,
 		config=config,
 		item_title=item_title,
-		path=str(local_path) if local_path else None,
+		path=str(subject_path) if subject_path else None,
 		subject=subject_payload,
 	)
 	
@@ -1116,55 +1222,7 @@ class Get_Tag(Cmdlet):

 	def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		"""Execute get-tag cmdlet."""
-		# Parse arguments
-		parsed = parse_cmdlet_args(args, self)
-		
-		# Get hash and store from parsed args or result
-		hash_override = parsed.get("hash")
-		file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash"))
-		store_name = parsed.get("store") or get_field(result, "store")
-		
-		if not file_hash:
-			log("No hash available in result", file=sys.stderr)
-			return 1
-		
-		if not store_name:
-			log("No store specified in result", file=sys.stderr)
-			return 1
-		
-		# Get tags using storage backend
-		try:
-			from Store import Store
-			storage_obj = Store(config)
-			backend = storage_obj[store_name]
-			current, source = backend.get_tag(file_hash, config=config)
-			
-			if not current:
-				log("No tags found", file=sys.stderr)
-				return 1
-			
-			# Build table and emit
-			item_title = get_field(result, "title") or file_hash[:16]
-			_emit_tags_as_table(
-				tags_list=current,
-				file_hash=file_hash,
-				store=store_name,
-				service_name="",
-				config=config,
-				item_title=item_title,
-				path=None,
-				subject=result,
-			)
-			return 0
-			
-		except KeyError:
-			log(f"Store '{store_name}' not found", file=sys.stderr)
-			return 1
-		except Exception as exc:
-			log(f"Failed to get tags: {exc}", file=sys.stderr)
-			import traceback
-			traceback.print_exc(file=sys.stderr)
-			return 1
+		return _run(result, args, config)


 # Create and register the cmdlet