refactor(download): remove ProviderCore/download.py, move sanitize_filename to SYS.utils, replace callers to use API.HTTP.HTTPClient

2026-01-06 01:38:59 -08:00
parent 3b363dd536
commit 41c11d39fd
38 changed files with 2640 additions and 526 deletions
--- a/CLI.py
+++ b/CLI.py
@@ -67,6 +67,7 @@ from SYS.cmdlet_catalog import (
 )
 from SYS.config import get_local_storage_path, load_config
 from SYS.result_table import ResultTable
+from ProviderCore.registry import provider_inline_query_choices

 HELP_EXAMPLE_SOURCE_COMMANDS = {
    ".help-example",
@@ -797,10 +798,10 @@ class CmdletIntrospection:
    @staticmethod
    def store_choices(config: Dict[str, Any]) -> List[str]:
        try:
-            from Store import Store
+            # Use config-only helper to avoid instantiating backends during completion
+            from Store.registry import list_configured_backend_names

-            storage = Store(config=config, suppress_debug=True)
-            return list(storage.list_backends() or [])
+            return list(list_configured_backend_names(config) or [])
        except Exception:
            return []

@@ -903,6 +904,21 @@ class CmdletCompleter(Completer):

        return used

+    @staticmethod
+    def _flag_value(tokens: Sequence[str], *flags: str) -> Optional[str]:
+        want = {str(f).strip().lower() for f in flags if str(f).strip()}
+        if not want:
+            return None
+        for idx, tok in enumerate(tokens):
+            low = str(tok or "").strip().lower()
+            if "=" in low:
+                head, val = low.split("=", 1)
+                if head in want:
+                    return tok.split("=", 1)[1]
+            if low in want and idx + 1 < len(tokens):
+                return tokens[idx + 1]
+        return None
+
    def get_completions(
        self,
        document: Document,
@@ -971,6 +987,48 @@ class CmdletCompleter(Completer):
            prev_token = stage_tokens[-2].lower() if len(stage_tokens) > 1 else ""

        config = self._config_loader.load()
+
+        provider_name = None
+        if cmd_name == "search-file":
+            provider_name = self._flag_value(stage_tokens, "-provider", "--provider")
+
+        if (
+            cmd_name == "search-file"
+            and provider_name
+            and not ends_with_space
+            and ":" in current_token
+            and not current_token.startswith("-")
+        ):
+            # Allow quoted tokens like "system:g
+            quote_prefix = current_token[0] if current_token[:1] in {"'", '"'} else ""
+            inline_token = current_token[1:] if quote_prefix else current_token
+            if inline_token.endswith(quote_prefix) and len(inline_token) > 1:
+                inline_token = inline_token[:-1]
+
+            # Allow comma-separated inline specs; operate on the last segment only.
+            if "," in inline_token:
+                inline_token = inline_token.split(",")[-1].lstrip()
+
+            if ":" not in inline_token:
+                return
+
+            field, partial = inline_token.split(":", 1)
+            field = field.strip().lower()
+            partial_lower = partial.strip().lower()
+            inline_choices = provider_inline_query_choices(provider_name, field, config)
+            if inline_choices:
+                filtered = (
+                    [c for c in inline_choices if partial_lower in str(c).lower()]
+                    if partial_lower
+                    else list(inline_choices)
+                )
+                for choice in (filtered or inline_choices):
+                    # Replace only the partial after the colon; keep the field prefix and quotes as typed.
+                    start_pos = -len(partial)
+                    suggestion = str(choice)
+                    yield Completion(suggestion, start_position=start_pos)
+                return
+
        choices = CmdletIntrospection.arg_choices(
            cmd_name=cmd_name,
            arg_name=prev_token,
@@ -2580,27 +2638,32 @@ class PipelineExecutor:
                    else:
                        cmd_list = []

-                    expanded_stage: List[str] = cmd_list + source_args + selected_row_args
+                        # IMPORTANT: Put selected row args *before* source_args.
+                        # Rationale: The cmdlet argument parser treats the *first* unknown
+                        # token as a positional value (e.g., URL). If `source_args`
+                        # contain unknown flags (like -provider which download-file does
+                        # not declare), they could be misinterpreted as the positional
+                        # URL argument and cause attempts to download strings like
+                        # "-provider" (which is invalid). By placing selection args
+                        # first we ensure the intended URL/selection token is parsed
+                        # as the positional URL and avoid this class of parsing errors.
+                        expanded_stage: List[str] = cmd_list + selected_row_args + source_args

-                    if first_stage_had_extra_args and stages:
-                        expanded_stage += stages[0]
-                        stages[0] = expanded_stage
-                    else:
-                        stages.insert(0, expanded_stage)
+                        if first_stage_had_extra_args and stages:
+                            expanded_stage += stages[0]
+                            stages[0] = expanded_stage
+                        else:
+                            stages.insert(0, expanded_stage)

-                    if pipeline_session and worker_manager:
-                        try:
-                            worker_manager.log_step(
-                                pipeline_session.worker_id,
-                                f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
-                            )
-                        except Exception:
-                            pass
+                        if pipeline_session and worker_manager:
+                            try:
+                                worker_manager.log_step(
+                                    pipeline_session.worker_id,
+                                    f"@N expansion: {source_cmd} + selected_args={selected_row_args} + source_args={source_args}",
+                                )
+                            except Exception:
+                                pass

-                    selection_indices = []
-                    command_expanded = True
-
-        if (not command_expanded) and selection_indices:
            stage_table = None
            try:
                stage_table = ctx.get_current_stage_table()
@@ -2770,6 +2833,41 @@ class PipelineExecutor:
                    except Exception:
                        auto_stage = None

+                def _apply_row_action_to_stage(stage_idx: int) -> bool:
+                    if not selection_indices or len(selection_indices) != 1:
+                        return False
+                    try:
+                        row_action = ctx.get_current_stage_table_row_selection_action(
+                            selection_indices[0]
+                        )
+                    except Exception:
+                        row_action = None
+                    if not row_action:
+                        # Fallback to serialized payload when the table row is unavailable
+                        try:
+                            items = ctx.get_last_result_items() or []
+                            if 0 <= selection_indices[0] < len(items):
+                                maybe = items[selection_indices[0]]
+                                if isinstance(maybe, dict):
+                                    candidate = maybe.get("_selection_action")
+                                    if isinstance(candidate, (list, tuple)):
+                                        row_action = [str(x) for x in candidate if x is not None]
+                                        debug(f"@N row {selection_indices[0]} restored action from payload: {row_action}")
+                        except Exception:
+                            row_action = row_action or None
+                    if not row_action:
+                        debug(f"@N row {selection_indices[0]} has no selection_action")
+                        return False
+                    normalized = [str(x) for x in row_action if x is not None]
+                    if not normalized:
+                        return False
+                    debug(f"Applying row action for row {selection_indices[0]} -> {normalized}")
+                    if 0 <= stage_idx < len(stages):
+                        debug(f"Replacing stage {stage_idx} {stages[stage_idx]} with row action {normalized}")
+                        stages[stage_idx] = normalized
+                        return True
+                    return False
+
                if not stages:
                    if isinstance(table_type, str) and table_type.startswith("metadata."):
                        print("Auto-applying metadata selection via get-tag")
@@ -2779,7 +2877,43 @@ class PipelineExecutor:
                            print(f"Auto-running selection via {auto_stage[0]}")
                        except Exception:
                            pass
-                        stages.append(list(auto_stage))
+                        # Append the auto stage now. If the user also provided a selection
+                        # (e.g., @1 | add-file ...), we want to attach the row selection
+                        # args *to the auto-inserted stage* so the download command receives
+                        # the selected row information immediately.
+                        stages.append(list(auto_stage) + (source_args or []))
+                        debug(f"Inserted auto stage before row action: {stages[-1]}")
+
+                        # If the caller included a selection (e.g., @1) try to attach
+                        # the selection args immediately to the inserted auto stage so
+                        # the expansion is effective in a single pass.
+                        if selection_indices:
+                            try:
+                                if not _apply_row_action_to_stage(len(stages) - 1):
+                                    # Only support single-row selection for auto-attach here
+                                    if len(selection_indices) == 1:
+                                        idx = selection_indices[0]
+                                        row_args = ctx.get_current_stage_table_row_selection_args(idx)
+                                        if not row_args:
+                                            try:
+                                                items = ctx.get_last_result_items() or []
+                                                if 0 <= idx < len(items):
+                                                    maybe = items[idx]
+                                                    if isinstance(maybe, dict):
+                                                        candidate = maybe.get("_selection_args")
+                                                        if isinstance(candidate, (list, tuple)):
+                                                            row_args = [str(x) for x in candidate if x is not None]
+                                            except Exception:
+                                                row_args = row_args or None
+                                        if row_args:
+                                            # Place selection args before any existing source args
+                                            inserted = stages[-1]
+                                            if inserted:
+                                                cmd = inserted[0]
+                                                tail = [str(x) for x in inserted[1:]]
+                                                stages[-1] = [cmd] + [str(x) for x in row_args] + tail
+                            except Exception:
+                                pass
                else:
                    first_cmd = stages[0][0] if stages and stages[0] else None
                    if isinstance(table_type, str) and table_type.startswith("metadata.") and first_cmd not in (
@@ -2795,8 +2929,41 @@ class PipelineExecutor:
                        auto_cmd_norm = _norm_cmd(auto_stage[0])
                        if first_cmd_norm not in (auto_cmd_norm, ".pipe", ".mpv"):
                            debug(f"Auto-inserting {auto_cmd_norm} after selection")
-                            stages.insert(0, list(auto_stage))
+                            # Insert the auto stage before the user-specified stage
+                            stages.insert(0, list(auto_stage) + (source_args or []))
+                            debug(f"Inserted auto stage before existing pipeline: {stages[0]}")

+                            # If a selection is present, attach the row selection args to the
+                            # newly-inserted stage so the download stage runs with the
+                            # selected row information.
+                            if selection_indices:
+                                try:
+                                    if not _apply_row_action_to_stage(0):
+                                        if len(selection_indices) == 1:
+                                            idx = selection_indices[0]
+                                            row_args = ctx.get_current_stage_table_row_selection_args(idx)
+                                            if not row_args:
+                                                try:
+                                                    items = ctx.get_last_result_items() or []
+                                                    if 0 <= idx < len(items):
+                                                        maybe = items[idx]
+                                                        if isinstance(maybe, dict):
+                                                            candidate = maybe.get("_selection_args")
+                                                            if isinstance(candidate, (list, tuple)):
+                                                                row_args = [str(x) for x in candidate if x is not None]
+                                                except Exception:
+                                                    row_args = row_args or None
+                                            if row_args:
+                                                inserted = stages[0]
+                                                if inserted:
+                                                    cmd = inserted[0]
+                                                    tail = [str(x) for x in inserted[1:]]
+                                                    stages[0] = [cmd] + [str(x) for x in row_args] + tail
+                                except Exception:
+                                    pass
+
+                # After inserting/appending an auto-stage, continue processing so later
+                # selection-expansion logic can still run (e.g., for example selectors).
                return True, piped_result
            else:
                print("No previous results to select from\n")