From 9981424397aa978f10d5e6f95b705d74d1dde13e Mon Sep 17 00:00:00 2001 From: Nose Date: Mon, 12 Jan 2026 04:05:52 -0800 Subject: [PATCH] f --- Provider/telegram.py | 12 + SYS/metadata.py | 23 ++ Store/Folder.py | 10 +- Store/HydrusNetwork.py | 11 +- cmdlet/_shared.py | 270 ++++++++++++++++---- cmdlet/add_file.py | 378 +++++++++++----------------- cmdlet/add_url.py | 10 + cmdlet/download_file.py | 529 ++++++++++++++-------------------------- cmdlet/get_url.py | 70 ++---- cmdlet/search_file.py | 1 + readme.md | 14 +- 11 files changed, 646 insertions(+), 682 deletions(-) diff --git a/Provider/telegram.py b/Provider/telegram.py index a71c3ca..23509e7 100644 --- a/Provider/telegram.py +++ b/Provider/telegram.py @@ -1175,6 +1175,18 @@ class Telegram(Provider): raise ValueError("Not a Telegram URL") return self._download_message_media_sync(url=url, output_dir=output_dir) + def handle_url(self, url: str, *, output_dir: Optional[Path] = None) -> Tuple[bool, Optional[Path]]: + """Optional provider override to parse and act on URLs.""" + if not _looks_like_telegram_message_url(url): + return False, None + + try: + path, _info = self.download_url(url, output_dir or Path(".")) + return True, path + except Exception as e: + debug(f"[telegram] handle_url failed for {url}: {e}") + return False, None + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: url = str(getattr(result, "path", "") or "") if not url: diff --git a/SYS/metadata.py b/SYS/metadata.py index a932837..8971582 100644 --- a/SYS/metadata.py +++ b/SYS/metadata.py @@ -54,6 +54,18 @@ def _extend_namespaced( _append_unique(target, seen, f"{namespace}:{val}") +def _add_tag(tags: List[str], namespace: str, value: str) -> None: + """Add a namespaced tag if not already present.""" + if not namespace or not value: + return + normalized_value = value_normalize(value) + if not normalized_value: + return + candidate = f"{namespace}:{normalized_value}" + if candidate not in tags: + tags.append(candidate) + + def _coerce_duration(metadata: Dict[str, Any]) -> Optional[float]: for key in ("duration", "duration_seconds", "length", "duration_sec"): value = metadata.get(key) @@ -355,6 +367,17 @@ def normalize_urls(value: Any) -> List[str]: if not u: return None + # --- HEURISTIC FILTER --- + # Ensure it actually looks like a URL/identifier to avoid tag leakage. + # This prevents plain tags ("adam22", "10 books") from entering the URL list. + low = u.lower() + has_scheme = low.startswith(( + "http://", "https://", "magnet:", "torrent:", "tidal:", + "hydrus:", "ytdl:", "soulseek:", "matrix:", "file:" + )) + if not (has_scheme or "://" in low): + return None + # IMPORTANT: URLs can be case-sensitive in the path/query on some hosts # (e.g., https://0x0.st/PzGY.webp). Do not lowercase or otherwise rewrite # the URL here; preserve exact casing and percent-encoding. diff --git a/Store/Folder.py b/Store/Folder.py index 23a0157..5b0dbcd 100644 --- a/Store/Folder.py +++ b/Store/Folder.py @@ -558,15 +558,15 @@ class Folder(Store): if url: try: - debug( - f"[Folder.add_file] merging {len(url)} URLs for {file_hash}", - file=sys.stderr, - ) from SYS.metadata import normalize_urls existing_meta = db.get_metadata(file_hash) or {} existing_urls = normalize_urls(existing_meta.get("url")) incoming_urls = normalize_urls(url) + debug( + f"[Folder.add_file] merging {len(incoming_urls)} URLs for {file_hash}: {incoming_urls}", + file=sys.stderr, + ) changed = False for entry in list(incoming_urls or []): if not entry: @@ -580,7 +580,7 @@ class Folder(Store): {"url": existing_urls}, ) debug( - f"[Folder.add_file] URLs merged for {file_hash}", + f"[Folder.add_file] URLs merged for {file_hash}: {existing_urls}", file=sys.stderr, ) except Exception as exc: diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index c7acfbf..411c7a3 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -1670,16 +1670,23 @@ class HydrusNetwork(Store): raw_urls: Any = meta.get("known_urls" ) or meta.get("urls") or meta.get("url") or [] + + def _is_url(s: Any) -> bool: + if not isinstance(s, str): + return False + v = s.strip().lower() + return bool(v and ("://" in v or v.startswith(("magnet:", "torrent:")))) + if isinstance(raw_urls, str): val = raw_urls.strip() - return [val] if val else [] + return [val] if _is_url(val) else [] if isinstance(raw_urls, list): out: list[str] = [] for u in raw_urls: if not isinstance(u, str): continue u = u.strip() - if u: + if u and _is_url(u): out.append(u) return out return [] diff --git a/cmdlet/_shared.py b/cmdlet/_shared.py index 0a47cee..f7f40e2 100644 --- a/cmdlet/_shared.py +++ b/cmdlet/_shared.py @@ -188,13 +188,6 @@ class SharedArgs: query_key="store", ) - PATH = CmdletArg( - name="path", - type="string", - choices=[], # Dynamically populated via get_store_choices() - description="selects store", - ) - URL = CmdletArg( name="url", type="string", @@ -206,7 +199,6 @@ class SharedArgs: description="selects provider", ) - @staticmethod @staticmethod def get_store_choices(config: Optional[Dict[str, Any]] = None, force: bool = False) -> List[str]: """Get list of available store backend names. @@ -765,6 +757,166 @@ def parse_cmdlet_args(args: Sequence[str], return result +def resolve_target_dir( + parsed: Dict[str, Any], + config: Dict[str, Any], + *, + handle_creations: bool = True +) -> Optional[Path]: + """Resolve a target directory from -path, -output, -storage, or config fallback. + + Args: + parsed: Parsed cmdlet arguments dict. + config: System configuration dict. + handle_creations: Whether to create the directory if it doesn't exist. + + Returns: + Path to the resolved directory, or None if invalid. + """ + # Priority 1: Explicit -path or -output + target = parsed.get("path") or parsed.get("output") + if target: + try: + p = Path(str(target)).expanduser().resolve() + if handle_creations: + p.mkdir(parents=True, exist_ok=True) + return p + except Exception as e: + log(f"Cannot use target path {target}: {e}", file=sys.stderr) + return None + + # Priority 2: --storage flag + storage_val = parsed.get("storage") + if storage_val: + try: + return SharedArgs.resolve_storage(storage_val) + except Exception as e: + log(f"Invalid storage location: {e}", file=sys.stderr) + return None + + # Priority 3: Config fallback via single source of truth + try: + from SYS.config import resolve_output_dir + out_dir = resolve_output_dir(config) + if handle_creations: + out_dir.mkdir(parents=True, exist_ok=True) + return out_dir + except Exception: + import tempfile + p = Path(tempfile.gettempdir()) / "Medios-Macina" + if handle_creations: + p.mkdir(parents=True, exist_ok=True) + return p + + +def coerce_to_path(value: Any) -> Path: + """Extract a Path from common provider result shapes (Path, str, dict, object).""" + if isinstance(value, Path): + return value + if isinstance(value, str): + return Path(value) + + # Try attribute + p = getattr(value, "path", None) + if p: + return Path(str(p)) + + # Try dict + if isinstance(value, dict): + p = value.get("path") + if p: + return Path(str(p)) + + raise ValueError(f"Cannot coerce {type(value).__name__} to Path (missing 'path' field)") + + + +def resolve_media_kind_by_extension(path: Path) -> str: + """Resolve media kind (audio, video, image, document, other) from file extension.""" + if not isinstance(path, Path): + try: + path = Path(str(path)) + except Exception: + return "other" + + suffix = path.suffix.lower() + if suffix in {".mp3", + ".flac", + ".wav", + ".m4a", + ".aac", + ".ogg", + ".opus", + ".wma", + ".mka"}: + return "audio" + if suffix in { + ".mp4", + ".mkv", + ".webm", + ".mov", + ".avi", + ".flv", + ".mpg", + ".mpeg", + ".ts", + ".m4v", + ".wmv", + }: + return "video" + if suffix in {".jpg", + ".jpeg", + ".png", + ".gif", + ".webp", + ".bmp", + ".tiff"}: + return "image" + if suffix in {".pdf", + ".epub", + ".txt", + ".mobi", + ".azw3", + ".cbz", + ".cbr", + ".doc", + ".docx"}: + return "document" + return "other" + + +def build_pipeline_preview(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> List[str]: + """Construct a short preview list for pipeline/cmdlet progress UI.""" + preview: List[str] = [] + + # 1. Add raw URLs + try: + for u in (raw_urls or [])[:3]: + if u: + preview.append(str(u)) + except Exception: + pass + + # 2. Add titles from piped items + if len(preview) < 5: + try: + for item in (piped_items or [])[:5]: + if len(preview) >= 5: + break + title = get_field(item, "title") or get_field(item, "target") or "Piped item" + preview.append(str(title)) + except Exception: + pass + + # 3. Handle empty case + if not preview: + total = len(raw_urls or []) + len(piped_items or []) + if total: + preview.append(f"Processing {total} item(s)...") + + return preview + + def normalize_hash(hash_hex: Optional[str]) -> Optional[str]: """Normalize a hash string to lowercase, or return None if invalid. @@ -2034,36 +2186,45 @@ def collapse_namespace_tag( def extract_tag_from_result(result: Any) -> list[str]: + """Extract all tags from a result dict or PipeObject. + + Handles mixed types (lists, sets, strings) and various field names. + """ tag: list[str] = [] + + def _extend(candidate: Any) -> None: + if not candidate: + return + if isinstance(candidate, (list, set, tuple)): + tag.extend(str(t) for t in candidate if t is not None) + elif isinstance(candidate, str): + tag.append(candidate) + if isinstance(result, models.PipeObject): tag.extend(result.tag or []) if isinstance(result.extra, dict): - extra_tag = result.extra.get("tag") - if isinstance(extra_tag, list): - tag.extend(extra_tag) - elif isinstance(extra_tag, str): - tag.append(extra_tag) + _extend(result.extra.get("tag")) + if isinstance(result.metadata, dict): + _extend(result.metadata.get("tag")) + _extend(result.metadata.get("tags")) elif hasattr(result, "tag"): # Handle objects with tag attribute (e.g. SearchResult) - val = getattr(result, "tag") - if isinstance(val, (list, set, tuple)): - tag.extend(val) - elif isinstance(val, str): - tag.append(val) + _extend(getattr(result, "tag")) if isinstance(result, dict): - raw_tag = result.get("tag") - if isinstance(raw_tag, list): - tag.extend(raw_tag) - elif isinstance(raw_tag, str): - tag.append(raw_tag) + _extend(result.get("tag")) + _extend(result.get("tags")) + extra = result.get("extra") if isinstance(extra, dict): - extra_tag = extra.get("tag") - if isinstance(extra_tag, list): - tag.extend(extra_tag) - elif isinstance(extra_tag, str): - tag.append(extra_tag) + _extend(extra.get("tag")) + _extend(extra.get("tags")) + + fm = result.get("full_metadata") or result.get("metadata") + if isinstance(fm, dict): + _extend(fm.get("tag")) + _extend(fm.get("tags")) + return merge_sequences(tag, case_sensitive=True) @@ -2079,6 +2240,11 @@ def extract_title_from_result(result: Any) -> Optional[str]: def extract_url_from_result(result: Any) -> list[str]: + """Extract all unique URLs from a result dict or PipeObject. + + Handles mixed types (lists, strings) and various field names (url, source_url, webpage_url). + Centralizes extraction logic for cmdlets like download-file, add-file, get-url. + """ url: list[str] = [] def _extend(candidate: Any) -> None: @@ -2089,40 +2255,48 @@ def extract_url_from_result(result: Any) -> list[str]: elif isinstance(candidate, str): url.append(candidate) + # Priority 1: PipeObject (structured data) if isinstance(result, models.PipeObject): - _extend(result.extra.get("url")) - _extend(result.extra.get("url")) # Also check singular url + _extend(result.url) + _extend(result.source_url) + # Also check extra and metadata for legacy or rich captures + if isinstance(result.extra, dict): + _extend(result.extra.get("url")) + _extend(result.extra.get("source_url")) if isinstance(result.metadata, dict): _extend(result.metadata.get("url")) - _extend(result.metadata.get("url")) - _extend(result.metadata.get("url")) + _extend(result.metadata.get("source_url")) + _extend(result.metadata.get("webpage_url")) if isinstance(getattr(result, "full_metadata", None), dict): fm = getattr(result, "full_metadata", None) if isinstance(fm, dict): _extend(fm.get("url")) - _extend(fm.get("url")) - _extend(fm.get("url")) - elif hasattr(result, "url") or hasattr(result, "url"): - # Handle objects with url/url attribute - _extend(getattr(result, "url", None)) - _extend(getattr(result, "url", None)) + _extend(fm.get("source_url")) + _extend(fm.get("webpage_url")) + # Priority 2: Generic objects with .url or .source_url attribute + elif hasattr(result, "url") or hasattr(result, "source_url"): + _extend(getattr(result, "url", None)) + _extend(getattr(result, "source_url", None)) + + # Priority 3: Dictionary if isinstance(result, dict): _extend(result.get("url")) - _extend(result.get("url")) - _extend(result.get("url")) - fm = result.get("full_metadata") - if isinstance(fm, dict): - _extend(fm.get("url")) - _extend(fm.get("url")) - _extend(fm.get("url")) + _extend(result.get("source_url")) + _extend(result.get("webpage_url")) + extra = result.get("extra") if isinstance(extra, dict): _extend(extra.get("url")) - _extend(extra.get("url")) - _extend(extra.get("url")) + + fm = result.get("full_metadata") or result.get("metadata") + if isinstance(fm, dict): + _extend(fm.get("url")) + _extend(fm.get("source_url")) + _extend(fm.get("webpage_url")) - return merge_sequences(url, case_sensitive=True) + from SYS.metadata import normalize_urls + return normalize_urls(url) def extract_relationships(result: Any) -> Optional[Dict[str, Any]]: diff --git a/cmdlet/add_file.py b/cmdlet/add_file.py index 3ea7321..bb77a6f 100644 --- a/cmdlet/add_file.py +++ b/cmdlet/add_file.py @@ -28,6 +28,12 @@ extract_relationships = sh.extract_relationships extract_duration = sh.extract_duration coerce_to_pipe_object = sh.coerce_to_pipe_object collapse_namespace_tag = sh.collapse_namespace_tag +resolve_target_dir = sh.resolve_target_dir +resolve_media_kind_by_extension = sh.resolve_media_kind_by_extension +coerce_to_path = sh.coerce_to_path +build_pipeline_preview = sh.build_pipeline_preview +get_field = sh.get_field + from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store from SYS.utils import sha256_file, unique_path from SYS.metadata import write_metadata @@ -198,6 +204,12 @@ class Add_File(Cmdlet): parsed = parse_cmdlet_args(args, self) progress = PipelineProgress(ctx) + # Initialize Store for backend resolution + try: + storage_registry = Store(config) + except Exception: + storage_registry = None + path_arg = parsed.get("path") location = parsed.get("store") provider_name = parsed.get("provider") @@ -508,7 +520,7 @@ class Add_File(Cmdlet): pipe_obj = coerce_to_pipe_object(item, path_arg) try: - label = pipe_obj.title or pipe_obj.name + label = pipe_obj.title if not label and pipe_obj.path: try: label = Path(str(pipe_obj.path)).name @@ -527,7 +539,7 @@ class Add_File(Cmdlet): delete_after_item = delete_after try: media_path, file_hash, temp_dir_to_cleanup = self._resolve_source( - item, path_arg, pipe_obj, config + item, path_arg, pipe_obj, config, store_instance=storage_registry ) debug( f"[add-file] RESOLVED source: path={media_path}, hash={file_hash[:12] if file_hash else 'N/A'}..." @@ -575,7 +587,7 @@ class Add_File(Cmdlet): if location: try: - store = Store(config) + store = storage_registry or Store(config) backends = store.list_backends() if location in backends: code = self._handle_storage_backend( @@ -591,6 +603,7 @@ class Add_File(Cmdlet): pending_url_associations=pending_url_associations, suppress_last_stage_overlay=want_final_search_file, auto_search_file=auto_search_file_after_add, + store_instance=storage_registry, ) else: code = self._handle_local_export( @@ -638,7 +651,8 @@ class Add_File(Cmdlet): try: Add_File._apply_pending_url_associations( pending_url_associations, - config + config, + store_instance=storage_registry ) except Exception: pass @@ -660,6 +674,7 @@ class Add_File(Cmdlet): store=str(location), hash_values=hashes, config=config, + store_instance=storage_registry, ) if not refreshed_items: # Fallback: at least show the add-file payloads as a display overlay @@ -681,7 +696,8 @@ class Add_File(Cmdlet): try: Add_File._apply_pending_relationships( pending_relationship_pairs, - config + config, + store_instance=storage_registry ) except Exception: pass @@ -699,7 +715,8 @@ class Add_File(Cmdlet): store: str, hash_values: List[str], config: Dict[str, - Any] + Any], + store_instance: Optional[Store] = None, ) -> Optional[List[Any]]: """Run search-file for a list of hashes and promote the table to a display overlay. @@ -894,7 +911,8 @@ class Add_File(Cmdlet): set[tuple[str, str]]], config: Dict[str, - Any] + Any], + store_instance: Optional[Store] = None, ) -> None: """Persist relationships to backends that support relationships. @@ -904,7 +922,7 @@ class Add_File(Cmdlet): return try: - store = Store(config) + store = store_instance if store_instance is not None else Store(config) except Exception: return @@ -976,6 +994,7 @@ class Add_File(Cmdlet): pipe_obj: models.PipeObject, config: Dict[str, Any], + store_instance: Optional[Any] = None, ) -> Tuple[Optional[Path], Optional[str], Optional[Path]]: @@ -983,162 +1002,79 @@ class Add_File(Cmdlet): Returns (media_path, file_hash, temp_dir_to_cleanup). """ - # PRIORITY 1a: Try hash+path from directory scan result (has 'path' and 'hash' keys) + # PRIORITY 1a: Try hash+path from directory scan result if isinstance(result, dict): - result_path = result.get("path") - result_hash = result.get("hash") - # Check if this looks like a directory scan result (has path and hash but no 'store' key) - result_store = result.get("store") - if result_path and result_hash and not result_store: + r_path = result.get("path") + r_hash = result.get("hash") + r_store = result.get("store") + # If we have path+hash but no store, it's likely a dir scan result + if r_path and r_hash and not r_store: try: - media_path = ( - Path(result_path) if not isinstance(result_path, - Path) else result_path - ) - if media_path.exists() and media_path.is_file(): - debug( - f"[add-file] Using path+hash from directory scan: {media_path}" - ) - pipe_obj.path = str(media_path) - return media_path, str(result_hash), None - except Exception as exc: - debug(f"[add-file] Failed to use directory scan result: {exc}") + p = coerce_to_path(r_path) + if p.exists() and p.is_file(): + pipe_obj.path = str(p) + return p, str(r_hash), None + except Exception: + pass - # PRIORITY 1b: Try hash+store from result dict (most reliable for @N selections) + # PRIORITY 1b: Try hash+store from result dict (fetch from backend) if isinstance(result, dict): - result_hash = result.get("hash") - result_store = result.get("store") - if result_hash and result_store: - debug( - f"[add-file] Using hash+store from result: hash={str(result_hash)[:12]}..., store={result_store}" - ) + r_hash = result.get("hash") + r_store = result.get("store") + if r_hash and r_store: try: - store = Store(config) - if result_store in store.list_backends(): - backend = store[result_store] - media_path = backend.get_file(result_hash) - if isinstance(media_path, Path) and media_path.exists(): - pipe_obj.path = str(media_path) - return media_path, str(result_hash), None - - if isinstance(media_path, str) and media_path.strip(): - downloaded, tmp_dir = Add_File._maybe_download_backend_file( - backend, - str(result_hash), - pipe_obj, + store = store_instance + if not store: + store = Store(config) + + if r_store in store.list_backends(): + backend = store[r_store] + # Try direct access (Path) + mp = backend.get_file(r_hash) + if isinstance(mp, Path) and mp.exists(): + pipe_obj.path = str(mp) + return mp, str(r_hash), None + + # Try download to temp + if isinstance(mp, str) and mp.strip(): + dl_path, tmp_dir = Add_File._maybe_download_backend_file( + backend, str(r_hash), pipe_obj ) - if isinstance(downloaded, Path) and downloaded.exists(): - pipe_obj.path = str(downloaded) - return downloaded, str(result_hash), tmp_dir - except Exception as exc: - debug(f"[add-file] Failed to retrieve via hash+store: {exc}") + if dl_path and dl_path.exists(): + pipe_obj.path = str(dl_path) + return dl_path, str(r_hash), tmp_dir + except Exception: + pass - - # PRIORITY 2: Try explicit path argument + # PRIORITY 2: Generic Coercion (Path arg > PipeObject > Result) + candidate: Optional[Path] = None + if path_arg: - media_path = Path(path_arg) - pipe_obj.path = str(media_path) - debug(f"[add-file] Using explicit path argument: {media_path}") - return media_path, None, None + candidate = Path(path_arg) + elif pipe_obj.path: + candidate = Path(pipe_obj.path) + + if not candidate: + # Unwrap list if needed + obj = result[0] if isinstance(result, list) and result else result + if obj: + try: + candidate = coerce_to_path(obj) + except ValueError: + pass - # PRIORITY 3: Try from pipe_obj.path (check file first before URL) - pipe_path = getattr(pipe_obj, "path", None) - if pipe_path: - pipe_path_str = str(pipe_path) - debug(f"Resolved pipe_path: {pipe_path_str}") - if pipe_path_str.lower().startswith(("http://", - "https://", - "magnet:", - "torrent:", - "tidal:", - "hydrus:")): - log( - "add-file ingests local files only. Use download-file first.", - file=sys.stderr, - ) - return None, None, None - return Path(pipe_path_str), None, None + if candidate: + s = str(candidate).lower() + if s.startswith(("http://", "https://", "magnet:", "torrent:", "tidal:", "hydrus:")): + log("add-file ingests local files only. Use download-file first.", file=sys.stderr) + return None, None, None + + pipe_obj.path = str(candidate) + # Retain hash from input if available to avoid re-hashing + hash_hint = get_field(result, "hash") or get_field(result, "file_hash") or getattr(pipe_obj, "hash", None) + return candidate, hash_hint, None - # Try from result (if it's a string path or URL) - if isinstance(result, str): - debug(f"Checking result string: {result}") - # Check if result is a URL before treating as file path - if result.lower().startswith(("http://", - "https://", - "magnet:", - "torrent:", - "tidal:", - "hydrus:")): - log( - "add-file ingests local files only. Use download-file first.", - file=sys.stderr, - ) - return None, None, None - media_path = Path(result) - pipe_obj.path = str(media_path) - return media_path, None, None - - # Try from result if it's a list (pipeline emits multiple results) - if isinstance(result, list) and result: - first_item = result[0] - # If the first item is a string, it's either a URL or a file path - if isinstance(first_item, str): - debug(f"Checking result list[0]: {first_item}") - if first_item.lower().startswith(("http://", - "https://", - "magnet:", - "torrent:", - "tidal:", - "hydrus:")): - log( - "add-file ingests local files only. Use download-file first.", - file=sys.stderr, - ) - return None, None, None - media_path = Path(first_item) - pipe_obj.path = str(media_path) - return media_path, None, None - - # If the first item is a dict, interpret it as a PipeObject-style result - if isinstance(first_item, dict): - # Look for path or path-like keys - path_candidate = ( - first_item.get("path") or first_item.get("filepath") - or first_item.get("file") - ) - # If the dict includes a 'paths' list (multi-part/section download), prefer the first file - paths_val = first_item.get("paths") - if not path_candidate and isinstance(paths_val, - (list, - tuple)) and paths_val: - path_candidate = paths_val[0] - if path_candidate: - debug(f"Resolved path from result dict: {path_candidate}") - try: - media_path = Path(path_candidate) - pipe_obj.path = str(media_path) - return media_path, first_item.get("hash"), None - except Exception: - return None, first_item.get("hash"), None - - # If first item is a PipeObject object - try: - # models.PipeObject is an actual class; check attribute presence - from SYS import models as _models - - if isinstance(first_item, _models.PipeObject): - path_candidate = getattr(first_item, "path", None) - if path_candidate: - debug(f"Resolved path from PipeObject: {path_candidate}") - media_path = Path(path_candidate) - pipe_obj.path = str(media_path) - return media_path, getattr(first_item, "hash", None), None - except Exception: - pass - - debug( - f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}" - ) + debug(f"No resolution path matched. result type={type(result).__name__}") log("File path could not be resolved") return None, None, None @@ -1207,18 +1143,6 @@ class Add_File(Cmdlet): if media_path is None: return False - target_str = str(media_path) - - # add-file does not accept URL inputs. - if target_str.lower().startswith(("http://", - "https://", - "magnet:", - "torrent:", - "tidal:", - "hydrus:")): - log("add-file ingests local files only.", file=sys.stderr) - return False - if not media_path.exists() or not media_path.is_file(): log(f"File not found: {media_path}") return False @@ -1232,34 +1156,54 @@ class Add_File(Cmdlet): return True + @staticmethod + def _is_probable_url(s: Any) -> bool: + """Check if a string looks like a URL/magnet/identifier (vs a tag/title).""" + if not isinstance(s, str): + return False + val = s.strip().lower() + if not val: + return False + # Obvious schemes + if val.startswith(("http://", "https://", "magnet:", "torrent:", "tidal:", "hydrus:")): + return True + # Domain-like patterns or local file paths (but we want URLs here) + if "://" in val: + return True + # Hydrus hash-like search queries are NOT urls + if val.startswith("hash:"): + return False + return False + @staticmethod def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]: + """Extract valid URLs from pipe object or result dict.""" from SYS.metadata import normalize_urls - # Prefer explicit PipeObject.url if present - urls: List[str] = [] - try: - urls = normalize_urls(getattr(pipe_obj, "url", None)) - except Exception: - urls = [] + candidates: List[str] = [] + + # 1. Prefer explicit PipeObject top-level field + if pipe_obj.url: + candidates.append(pipe_obj.url) + if pipe_obj.source_url: + candidates.append(pipe_obj.source_url) - # Then check extra.url - if not urls: - try: - if isinstance(pipe_obj.extra, dict): - urls = normalize_urls(pipe_obj.extra.get("url")) - except Exception: - pass + # 2. Check extra and metadata fields + if isinstance(pipe_obj.extra, dict): + u = pipe_obj.extra.get("url") + if isinstance(u, list): + candidates.extend(str(x) for x in u if x) + elif isinstance(u, str): + candidates.append(u) - # Then check result dict - if not urls and isinstance(result, dict): - urls = normalize_urls(result.get("url")) + # 3. Check result (which might be a dict or another PipeObject) + raw_from_result = extract_url_from_result(result) + if raw_from_result: + candidates.extend(raw_from_result) - # Finally, try extractor helper - if not urls: - urls = normalize_urls(extract_url_from_result(result)) - - return urls + # 4. Normalize and filter: MUST look like a URL to avoid tag leakage + normalized = normalize_urls(candidates) + return [u for u in normalized if Add_File._is_probable_url(u)] @staticmethod def _get_relationships(result: Any, @@ -1588,6 +1532,8 @@ class Add_File(Cmdlet): merged_tags.append(f"title:{preferred_title}") merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False) + # Final safety filter: ensures no tags/titles leaked into URL list + merged_url = [u for u in merged_url if Add_File._is_probable_url(u)] file_hash = Add_File._resolve_file_hash( result, @@ -1645,7 +1591,8 @@ class Add_File(Cmdlet): if file_hash and not pipe_obj.hash: pipe_obj.hash = file_hash if isinstance(pipe_obj.extra, dict): - pipe_obj.extra.setdefault("url", merged_url) + # Update (don't setdefault) to ensure URLs matched from sidecars or source stores are tracked + pipe_obj.extra["url"] = merged_url return merged_tags, merged_url, preferred_title, file_hash @staticmethod @@ -1830,11 +1777,13 @@ class Add_File(Cmdlet): List[str]]]]] = None, suppress_last_stage_overlay: bool = False, auto_search_file: bool = True, + store_instance: Optional[Store] = None, ) -> int: """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.).""" ##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr) delete_after_effective = bool(delete_after) + # ... (lines omitted for brevity but I need to keep them contextually correct) if not delete_after_effective: # When download-media is piped into add-file, the downloaded artifact is a temp file. # After it is persisted to a storage backend, delete the temp copy to avoid duplicates. @@ -1863,7 +1812,7 @@ class Add_File(Cmdlet): pass try: - store = Store(config) + store = store_instance if store_instance is not None else Store(config) backend = store[backend_name] hydrus_like_backend = False @@ -2202,12 +2151,13 @@ class Add_File(Cmdlet): List[tuple[str, List[str]]]], config: Dict[str, - Any] + Any], + store_instance: Optional[Store] = None, ) -> None: """Apply deferred URL associations in bulk, grouped per backend.""" try: - store = Store(config) + store = store_instance if store_instance is not None else Store(config) except Exception: return @@ -2329,51 +2279,7 @@ class Add_File(Cmdlet): @staticmethod def _resolve_media_kind(path: Path) -> str: - # Reusing logic - suffix = path.suffix.lower() - if suffix in {".mp3", - ".flac", - ".wav", - ".m4a", - ".aac", - ".ogg", - ".opus", - ".wma", - ".mka"}: - return "audio" - if suffix in { - ".mp4", - ".mkv", - ".webm", - ".mov", - ".avi", - ".flv", - ".mpg", - ".mpeg", - ".ts", - ".m4v", - ".wmv", - }: - return "video" - if suffix in {".jpg", - ".jpeg", - ".png", - ".gif", - ".webp", - ".bmp", - ".tiff"}: - return "image" - if suffix in {".pdf", - ".epub", - ".txt", - ".mobi", - ".azw3", - ".cbz", - ".cbr", - ".doc", - ".docx"}: - return "document" - return "other" + return resolve_media_kind_by_extension(path) @staticmethod def _persist_local_metadata( diff --git a/cmdlet/add_url.py b/cmdlet/add_url.py index 8ea5dc4..0bcf90a 100644 --- a/cmdlet/add_url.py +++ b/cmdlet/add_url.py @@ -74,6 +74,16 @@ class Add_Url(sh.Cmdlet): "store") if result is not None else None ) url_arg = parsed.get("url") + if not url_arg: + try: + inferred = sh.extract_url_from_result(result) + if inferred: + candidate = inferred[0] + if isinstance(candidate, str) and candidate.strip(): + url_arg = candidate.strip() + parsed["url"] = url_arg + except Exception: + pass # If we have multiple piped items, we will resolve hash/store per item below. if not results: diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index 4e8a82e..02f44b2 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -52,6 +52,9 @@ parse_cmdlet_args = sh.parse_cmdlet_args register_url_with_local_library = sh.register_url_with_local_library coerce_to_pipe_object = sh.coerce_to_pipe_object get_field = sh.get_field +resolve_target_dir = sh.resolve_target_dir +coerce_to_path = sh.coerce_to_path +build_pipeline_preview = sh.build_pipeline_preview class Download_File(Cmdlet): @@ -168,49 +171,67 @@ class Download_File(Cmdlet): debug(f"Provider {provider_name} claimed {url}") try: # Try generic handle_url + handled = False if hasattr(provider, "handle_url"): - handled, path = provider.handle_url(str(url), output_dir=final_output_dir) - if handled: - if path: - self._emit_local_file( - downloaded_path=Path(str(path)), - source=str(url), - title_hint=Path(str(path)).stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, - progress=progress, - config=config, - provider_hint=provider_name - ) - downloaded_count += 1 - continue - - # Try generic download_url - elif hasattr(provider, "download_url"): - downloaded_path = provider.download_url(str(url), final_output_dir) - if downloaded_path: - self._emit_local_file( - downloaded_path=Path(downloaded_path), - source=str(url), - title_hint=Path(str(downloaded_path)).stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, - provider_hint=provider_name, - progress=progress, - config=config, - ) - downloaded_count += 1 - continue + try: + handled, path = provider.handle_url(str(url), output_dir=final_output_dir) + if handled: + if path: + self._emit_local_file( + downloaded_path=Path(str(path)), + source=str(url), + title_hint=Path(str(path)).stem, + tags_hint=None, + media_kind_hint="file", + full_metadata=None, + progress=progress, + config=config, + provider_hint=provider_name + ) + downloaded_count += 1 + continue + except Exception as e: + debug(f"Provider {provider_name} handle_url error: {e}") + + # Try generic download_url if not already handled + if not handled and hasattr(provider, "download_url"): + res = provider.download_url(str(url), final_output_dir) + if res: + # Standardize result: can be Path, tuple(Path, Info), or dict with "path" + p_val = None + extra_meta = None + if isinstance(res, (str, Path)): + p_val = Path(res) + elif isinstance(res, tuple) and len(res) > 0: + p_val = Path(res[0]) + if len(res) > 1 and isinstance(res[1], dict): + extra_meta = res[1] + elif isinstance(res, dict): + path_candidate = res.get("path") or res.get("file_path") + if path_candidate: + p_val = Path(path_candidate) + extra_meta = res + + if p_val: + self._emit_local_file( + downloaded_path=p_val, + source=str(url), + title_hint=p_val.stem, + tags_hint=None, + media_kind_hint=extra_meta.get("media_kind") if extra_meta else "file", + full_metadata=extra_meta, + provider_hint=provider_name, + progress=progress, + config=config, + ) + downloaded_count += 1 + continue except Exception as e: log(f"Provider {provider_name} error handling {url}: {e}", file=sys.stderr) # Fallthrough to direct download? - # If a provider explicitly claimed it but failed, maybe we shouldn't fallback? - # But "barebones" implies robustness might be up to user. - # We'll continue to next URL. - continue + # If a provider explicitly claimed it but failed, we'll try direct download as a last resort. + pass # Direct Download Fallback result_obj = _download_direct_file( @@ -409,7 +430,7 @@ class Download_File(Cmdlet): suggested_filename=suggested_name, pipeline_progress=progress, ) - downloaded_path = self._path_from_download_result(result_obj) + downloaded_path = coerce_to_path(result_obj) if downloaded_path is None: log( @@ -481,17 +502,6 @@ class Download_File(Cmdlet): return downloaded_count, queued_magnet_submissions - @staticmethod - def _path_from_download_result(result_obj: Any) -> Path: - file_path = None - if hasattr(result_obj, "path"): - file_path = getattr(result_obj, "path") - elif isinstance(result_obj, dict): - file_path = result_obj.get("path") - if not file_path: - file_path = str(result_obj) - return Path(str(file_path)) - def _emit_local_file( self, *, @@ -506,7 +516,7 @@ class Download_File(Cmdlet): provider_hint: Optional[str] = None, ) -> None: title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem - hash_value = self._compute_file_hash(downloaded_path) + hash_value = sha256_file(downloaded_path) notes: Optional[Dict[str, str]] = None try: if isinstance(full_metadata, dict): @@ -544,38 +554,6 @@ class Download_File(Cmdlet): pipeline_context.emit(payload) - @staticmethod - def _normalize_urls(parsed: Dict[str, Any]) -> List[str]: - urls: List[str] = [] - url_value: Any = None - if isinstance(parsed, dict): - url_value = parsed.get("url") - - try: - urls = normalize_url_list(url_value) - except Exception: - urls = [] - - if not urls and isinstance(parsed, dict): - query_val = parsed.get("query") - try: - if isinstance(query_val, str) and query_val.strip().lower().startswith("url:"): - urls = normalize_url_list(query_val) - except Exception: - pass - - return urls - - @staticmethod - def _collect_piped_items_if_no_urls(result: Any, raw_url: Sequence[str]) -> List[Any]: - if raw_url: - return [] - if result is None: - return [] - if isinstance(result, list): - return list(result) - return [result] - @staticmethod def _load_provider_registry() -> Dict[str, Any]: """Lightweight accessor for provider helpers without hard dependencies.""" @@ -597,73 +575,8 @@ class Download_File(Cmdlet): "SearchResult": None, } - @staticmethod - def _safe_total_items(raw_url: Sequence[str], piped_items: Sequence[Any]) -> int: - """Return a sane item count for progress display.""" - try: - url_count = len(raw_url or []) - except Exception: - url_count = 0 - try: - piped_count = len(piped_items or []) - except Exception: - piped_count = 0 - total = url_count + piped_count - return total if total > 0 else 1 - - @staticmethod - def _build_preview(raw_url: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[str]: - """Construct a short preview list for the local progress UI.""" - preview: List[str] = [] - - try: - for url in raw_url or []: - if len(preview) >= 5: - break - preview.append(str(url)) - except Exception: - pass - - if len(preview) < 5: - try: - items = piped_items if isinstance(piped_items, list) else list(piped_items or []) - except Exception: - items = [] - for item in items: - if len(preview) >= 5: - break - try: - label = get_field(item, "title") or get_field(item, "path") or get_field(item, "url") - except Exception: - label = None - if label: - preview.append(str(label)) - - # If we still have nothing, supply a generic placeholder to avoid empty previews. - if not preview and total_items: - preview.append(f"{total_items} item(s)") - - return preview - # === Streaming helpers (yt-dlp) === - @staticmethod - def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]: - if raw_urls: - return raw_urls - if not result: - return raw_urls - - results_to_check = result if isinstance(result, list) else [result] - for item in results_to_check: - try: - url = get_field(item, "url") or get_field(item, "target") - except Exception: - url = None - if url: - raw_urls.append(url) - return raw_urls - @staticmethod def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]: supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)] @@ -1633,7 +1546,7 @@ class Download_File(Cmdlet): if unsupported_list: debug(f"Skipping {len(unsupported_list)} unsupported url (use direct HTTP mode)") - final_output_dir = self._resolve_streaming_output_dir(parsed, config) + final_output_dir = resolve_target_dir(parsed, config) if not final_output_dir: return 1 @@ -1860,45 +1773,6 @@ class Download_File(Cmdlet): log(f"Error in streaming download handler: {e}", file=sys.stderr) return 1 - def _resolve_streaming_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: - path_override = parsed.get("path") - if path_override: - try: - candidate = Path(str(path_override)).expanduser() - if candidate.suffix: - candidate = candidate.parent - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using output directory override: {candidate}") - return candidate - except Exception as e: - log(f"Invalid -path output directory: {e}", file=sys.stderr) - return None - - try: - temp_value = (config or {}).get("temp") if isinstance(config, dict) else None - except Exception: - temp_value = None - if temp_value: - try: - candidate = Path(str(temp_value)).expanduser() - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using config temp directory: {candidate}") - return candidate - except Exception as e: - log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr) - return None - - try: - import tempfile - - candidate = Path(tempfile.gettempdir()) / "Medios-Macina" - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using OS temp directory: {candidate}") - return candidate - except Exception as e: - log(f"Cannot create OS temp directory: {e}", file=sys.stderr) - return None - def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]: def _to_seconds(ts: str) -> Optional[int]: ts = str(ts).strip() @@ -2001,7 +1875,7 @@ class Download_File(Cmdlet): def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {} media_path = Path(download_result.path) - hash_value = download_result.hash_value or self._compute_file_hash(media_path) + hash_value = download_result.hash_value or sha256_file(media_path) title = info.get("title") or media_path.stem tag = list(download_result.tag or []) @@ -2398,8 +2272,19 @@ class Download_File(Cmdlet): # Parse arguments parsed = parse_cmdlet_args(args, self) - raw_url = self._normalize_urls(parsed) - piped_items = self._collect_piped_items_if_no_urls(result, raw_url) + # Resolve URLs from -url or positional arguments + url_candidates = parsed.get("url") or [a for a in parsed.get("args", []) if isinstance(a, str) and (a.startswith("http") or "://" in a)] + raw_url = normalize_url_list(url_candidates) + + quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False + + # Fallback to piped items if no explicit URLs provided + piped_items = [] + if not raw_url: + if isinstance(result, list): + piped_items = list(result) + elif result is not None: + piped_items = [result] # Handle TABLE_AUTO_STAGES routing: if a piped PipeObject has _selection_args, # re-invoke download-file with those args instead of processing the PipeObject itself @@ -2470,7 +2355,7 @@ class Download_File(Cmdlet): if picker_result is not None: return int(picker_result) - streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result) + streaming_candidates = list(raw_url) supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates) streaming_exit_code: Optional[int] = None @@ -2504,7 +2389,7 @@ class Download_File(Cmdlet): return int(picker_result) # Get output directory - final_output_dir = self._resolve_output_dir(parsed, config) + final_output_dir = resolve_target_dir(parsed, config) if not final_output_dir: return 1 @@ -2513,8 +2398,8 @@ class Download_File(Cmdlet): # If the caller isn't running the shared pipeline Live progress UI (e.g. direct # cmdlet execution), start a minimal local pipeline progress panel so downloads # show consistent, Rich-formatted progress (like download-media). - total_items = self._safe_total_items(raw_url, piped_items) - preview = self._build_preview(raw_url, piped_items, total_items) + total_items = max(1, len(raw_url or []) + len(piped_items or [])) + preview = build_pipeline_preview(raw_url, piped_items) progress.ensure_local_ui( label="download-file", @@ -2525,91 +2410,16 @@ class Download_File(Cmdlet): downloaded_count = 0 # Special-case: support selection-inserted magnet-id arg to drive provider downloads - magnet_id_raw = parsed.get("magnet-id") - if magnet_id_raw: - try: - magnet_id = int(str(magnet_id_raw).strip()) - except Exception: - log(f"[download-file] invalid magnet-id: {magnet_id_raw}", file=sys.stderr) - return 1 - - get_provider = registry.get("get_provider") - provider_name = str(parsed.get("provider") or "alldebrid").strip().lower() - provider_obj = None - if get_provider is not None: - try: - provider_obj = get_provider(provider_name, config) - except Exception: - provider_obj = None - - if provider_obj is None: - log(f"[download-file] provider '{provider_name}' not available", file=sys.stderr) - return 1 - - SearchResult = registry.get("SearchResult") - try: - if SearchResult is not None: - sr = SearchResult( - table=provider_name, - title=f"magnet-{magnet_id}", - path=f"alldebrid:magnet:{magnet_id}", - full_metadata={ - "magnet_id": magnet_id, - "provider": provider_name, - "provider_view": "files", - }, - ) - else: - sr = None - except Exception: - sr = None - - def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None: - title_hint = metadata.get("name") or relpath or f"magnet-{magnet_id}" - self._emit_local_file( - downloaded_path=path, - source=file_url or f"alldebrid:magnet:{magnet_id}", - title_hint=title_hint, - tags_hint=None, - media_kind_hint="file", - full_metadata=metadata, - progress=progress, - config=config, - provider_hint=provider_name, - ) - - try: - downloaded_extra = provider_obj.download_items( - sr, - final_output_dir, - emit=_on_emit, - progress=progress, - quiet_mode=quiet_mode, - path_from_result=self._path_from_download_result, - config=config, - ) - except TypeError: - downloaded_extra = provider_obj.download_items( - sr, - final_output_dir, - emit=_on_emit, - progress=progress, - quiet_mode=quiet_mode, - path_from_result=self._path_from_download_result, - ) - except Exception as exc: - log(f"[download-file] failed to download magnet {magnet_id}: {exc}", file=sys.stderr) - return 1 - - if downloaded_extra: - debug(f"[download-file] AllDebrid magnet {magnet_id} emitted {downloaded_extra} files") - return 0 - - log( - f"[download-file] AllDebrid magnet {magnet_id} produced no downloads", - file=sys.stderr, - ) - return 1 + magnet_ret = self._process_magnet_id( + parsed=parsed, + registry=registry, + config=config, + final_output_dir=final_output_dir, + progress=progress, + quiet_mode=quiet_mode + ) + if magnet_ret is not None: + return magnet_ret urls_downloaded, early_exit = self._process_explicit_urls( raw_urls=raw_url, @@ -2662,6 +2472,104 @@ class Download_File(Cmdlet): pass progress.close_local_ui(force_complete=True) + def _process_magnet_id( + self, + *, + parsed: Dict[str, Any], + registry: Dict[str, Any], + config: Dict[str, Any], + final_output_dir: Path, + progress: PipelineProgress, + quiet_mode: bool + ) -> Optional[int]: + magnet_id_raw = parsed.get("magnet-id") + if not magnet_id_raw: + return None + + try: + magnet_id = int(str(magnet_id_raw).strip()) + except Exception: + log(f"[download-file] invalid magnet-id: {magnet_id_raw}", file=sys.stderr) + return 1 + + get_provider = registry.get("get_provider") + provider_name = str(parsed.get("provider") or "alldebrid").strip().lower() + provider_obj = None + if get_provider is not None: + try: + provider_obj = get_provider(provider_name, config) + except Exception: + provider_obj = None + + if provider_obj is None: + log(f"[download-file] provider '{provider_name}' not available", file=sys.stderr) + return 1 + + SearchResult = registry.get("SearchResult") + try: + if SearchResult is not None: + sr = SearchResult( + table=provider_name, + title=f"magnet-{magnet_id}", + path=f"alldebrid:magnet:{magnet_id}", + full_metadata={ + "magnet_id": magnet_id, + "provider": provider_name, + "provider_view": "files", + }, + ) + else: + sr = None + except Exception: + sr = None + + def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None: + title_hint = metadata.get("name") or relpath or f"magnet-{magnet_id}" + self._emit_local_file( + downloaded_path=path, + source=file_url or f"alldebrid:magnet:{magnet_id}", + title_hint=title_hint, + tags_hint=None, + media_kind_hint="file", + full_metadata=metadata, + progress=progress, + config=config, + provider_hint=provider_name, + ) + + try: + downloaded_extra = provider_obj.download_items( + sr, + final_output_dir, + emit=_on_emit, + progress=progress, + quiet_mode=quiet_mode, + path_from_result=coerce_to_path, + config=config, + ) + except TypeError: + downloaded_extra = provider_obj.download_items( + sr, + final_output_dir, + emit=_on_emit, + progress=progress, + quiet_mode=quiet_mode, + path_from_result=coerce_to_path, + ) + except Exception as exc: + log(f"[download-file] failed to download magnet {magnet_id}: {exc}", file=sys.stderr) + return 1 + + if downloaded_extra: + debug(f"[download-file] AllDebrid magnet {magnet_id} emitted {downloaded_extra} files") + return 0 + + log( + f"[download-file] AllDebrid magnet {magnet_id} produced no downloads", + file=sys.stderr, + ) + return 1 + def _maybe_show_provider_picker( self, *, @@ -2714,67 +2622,6 @@ class Download_File(Cmdlet): return None - def _resolve_output_dir(self, - parsed: Dict[str, - Any], - config: Dict[str, - Any]) -> Optional[Path]: - """Resolve the output directory from storage location or config.""" - output_dir_arg = parsed.get("path") or parsed.get("output") - if output_dir_arg: - try: - out_path = Path(str(output_dir_arg)).expanduser() - out_path.mkdir(parents=True, exist_ok=True) - return out_path - except Exception as e: - log( - f"Cannot use output directory {output_dir_arg}: {e}", - file=sys.stderr - ) - return None - - storage_location = parsed.get("storage") - - # Priority 1: --storage flag - if storage_location: - try: - return SharedArgs.resolve_storage(storage_location) - except Exception as e: - log(f"Invalid storage location: {e}", file=sys.stderr) - return None - - # Priority 2: Config default output/temp directory, then OS temp - try: - from SYS.config import resolve_output_dir - final_output_dir = resolve_output_dir(config) - except Exception: - import tempfile - final_output_dir = Path(tempfile.gettempdir()) - - debug(f"Using default directory: {final_output_dir}") - - # Ensure directory exists - try: - final_output_dir.mkdir(parents=True, exist_ok=True) - except Exception as e: - log( - f"Cannot create output directory {final_output_dir}: {e}", - file=sys.stderr - ) - return None - - return final_output_dir - - def _compute_file_hash(self, filepath: Path) -> str: - """Compute SHA256 hash of a file.""" - import hashlib - - sha256_hash = hashlib.sha256() - with open(filepath, "rb") as f: - for byte_block in iter(lambda: f.read(4096), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - # Module-level singleton registration CMDLET = Download_File() diff --git a/cmdlet/get_url.py b/cmdlet/get_url.py index 383d038..685ebc0 100644 --- a/cmdlet/get_url.py +++ b/cmdlet/get_url.py @@ -95,39 +95,9 @@ class Get_Url(Cmdlet): return item.strip() return None - @staticmethod - def _extract_url_from_result(result: Any) -> Optional[str]: - # Prefer explicit url field. - u = Get_Url._extract_first_url(get_field(result, "url")) - if u: - return u - - # Fall back to ResultTable-style columns list. - cols = None - if isinstance(result, dict): - cols = result.get("columns") - else: - cols = getattr(result, "columns", None) - if isinstance(cols, list): - for pair in cols: - try: - if isinstance(pair, (list, tuple)) and len(pair) == 2: - k, v = pair - if str(k or "").strip().lower() in {"url", "urls"}: - u2 = Get_Url._extract_first_url(v) - if u2: - return u2 - except Exception: - continue - return None - @staticmethod def _extract_title_from_result(result: Any) -> Optional[str]: # Prefer explicit title field. - t = get_field(result, "title") - if isinstance(t, str) and t.strip(): - return t.strip() - # Fall back to ResultTable-style columns list. cols = None if isinstance(result, dict): @@ -318,6 +288,13 @@ class Get_Url(Cmdlet): for url in (urls or []): if not self._match_url_pattern(str(url), raw_pattern): continue + + # Double-check it looks like a URL to avoid data leakage from dirty DBs + from SYS.metadata import normalize_urls + valid = normalize_urls([str(url)]) + if not valid: + continue + items.append( UrlItem( url=str(url), @@ -328,7 +305,7 @@ class Get_Url(Cmdlet): ext=str(ext or ""), ) ) - found_stores.add(str(store_name)) + found_stores.add(str(store_name)) except Exception as exc: debug( f"Error searching store '{store_name}': {exc}", @@ -358,10 +335,6 @@ class Get_Url(Cmdlet): # Check if user provided a URL pattern to search for search_pattern = parsed.get("url") - # Allow piping a URL row (or any result with a url field/column) into get-url. - if not search_pattern: - search_pattern = self._extract_url_from_result(result) - if search_pattern: # URL search mode: find all files with matching URLs across stores items, stores_searched = self._search_urls_across_stores(search_pattern, config) @@ -405,9 +378,13 @@ class Get_Url(Cmdlet): } display_items.append(payload) table.add_result(payload) - ctx.emit(payload) ctx.set_last_result_table(table if display_items else None, display_items, subject=result) + + # Emit after table state is finalized to prevent side effects in TUI rendering + for d in display_items: + ctx.emit(d) + log( f"Found {len(items)} matching url(s) in {len(stores_searched)} store(s)" ) @@ -433,18 +410,16 @@ class Get_Url(Cmdlet): log("Error: No store name provided") return 1 - # Normalize hash - file_hash = normalize_hash(file_hash) - if not file_hash: - log("Error: Invalid hash format") - return 1 - # Get backend and retrieve url try: storage = Store(config) backend = storage[store_name] urls = backend.get_url(file_hash) + + # Filter URLs to avoid data leakage from dirty DBs + from SYS.metadata import normalize_urls + urls = normalize_urls(urls) title = str(get_field(result, "title") or "").strip() table_title = "Title" @@ -468,10 +443,15 @@ class Get_Url(Cmdlet): row.add_column("Url", u) item = UrlItem(url=u, hash=file_hash, store=str(store_name)) items.append(item) - ctx.emit(item) - # Make this a real result table so @.. / @,, can navigate it - ctx.set_last_result_table(table if items else None, items, subject=result) + # Use overlay mode to avoid "merging" with the previous status/table state. + # This is idiomatic for detail views and prevents the search table from being + # contaminated by partial re-renders. + ctx.set_last_result_table_overlay(table if items else None, items, subject=result) + + # Emit items at the end for pipeline continuity + for item in items: + ctx.emit(item) if not items: log("No url found", file=sys.stderr) diff --git a/cmdlet/search_file.py b/cmdlet/search_file.py index ea9a26f..52d482c 100644 --- a/cmdlet/search_file.py +++ b/cmdlet/search_file.py @@ -792,6 +792,7 @@ class search_file(Cmdlet): "ext": self._normalize_extension(ext_val), "size_bytes": size_bytes_int, "tag": tags_list, + "url": meta_obj.get("url") or [], } table.add_result(payload) diff --git a/readme.md b/readme.md index c75cf63..205e973 100644 --- a/readme.md +++ b/readme.md @@ -4,8 +4,16 @@

4 TEXT BASED FILE ONTOLOGY

-Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably. +Medios-Macina is a CLI file media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably. +

ELEVATED PITCH

+

CONTENTS

FEATURES
@@ -16,10 +24,6 @@ Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging TUTORIAL

- - - -

Features