"""search-file cmdlet: Search for files in storage backends (Hydrus).""" from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional from collections import deque import uuid from pathlib import Path import re import json import sys import html import time from urllib.parse import urlparse, parse_qs, unquote, urljoin from SYS.logger import log, debug from SYS.payload_builders import build_file_result_payload, normalize_file_extension from ProviderCore.registry import get_search_provider, list_search_providers from SYS.rich_display import ( show_provider_config_panel, show_store_config_panel, show_available_providers_panel, ) from SYS.database import insert_worker, update_worker, append_worker_stdout from SYS.item_accessors import get_extension_field, get_int_field, get_result_title from SYS.selection_builder import build_default_selection from SYS.result_publication import publish_result_table from ._shared import ( Cmdlet, CmdletArg, SharedArgs, get_field, get_preferred_store_backend, should_show_help, normalize_hash, first_title_tag, parse_hash_query, ) from SYS import pipeline as ctx _WHITESPACE_RE = re.compile(r"\s+") _SITE_TOKEN_RE = re.compile(r"(?:^|\s)site:([^\s,]+)", flags=re.IGNORECASE) _FILETYPE_TOKEN_RE = re.compile( r"(?:^|\s)(?:ext|filetype|type):\.?([a-z0-9]{1,12})\b", flags=re.IGNORECASE, ) _SITE_REMOVE_RE = re.compile(r"(?:^|\s)site:[^\s,]+", flags=re.IGNORECASE) _FILETYPE_REMOVE_RE = re.compile( r"(?:^|\s)(?:ext|filetype|type):\.?[a-z0-9]{1,12}\b", flags=re.IGNORECASE, ) _SCHEME_PREFIX_RE = re.compile(r"^[a-z]+:") _YAHOO_RU_RE = re.compile(r"/RU=([^/]+)/RK=", flags=re.IGNORECASE) _HTML_TAG_RE = re.compile(r"<[^>]+>") _DDG_RESULT_ANCHOR_RE = re.compile( r']+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>(.*?)', flags=re.IGNORECASE | re.DOTALL, ) _GENERIC_ANCHOR_RE = re.compile( r']+href=["\']([^"\']+)["\'][^>]*>(.*?)', flags=re.IGNORECASE | re.DOTALL, ) _BING_RESULT_ANCHOR_RE = re.compile( r'