fdf
This commit is contained in:
@@ -14,6 +14,7 @@ import time
|
||||
from urllib.parse import urlparse, parse_qs, unquote, urljoin
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.payload_builders import build_file_result_payload, normalize_file_extension
|
||||
from ProviderCore.registry import get_search_provider, list_search_providers
|
||||
from SYS.rich_display import (
|
||||
show_provider_config_panel,
|
||||
@@ -21,12 +22,16 @@ from SYS.rich_display import (
|
||||
show_available_providers_panel,
|
||||
)
|
||||
from SYS.database import insert_worker, update_worker, append_worker_stdout
|
||||
from SYS.item_accessors import get_extension_field, get_int_field, get_result_title
|
||||
from SYS.selection_builder import build_default_selection
|
||||
from SYS.result_publication import publish_result_table
|
||||
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
get_field,
|
||||
get_preferred_store_backend,
|
||||
should_show_help,
|
||||
normalize_hash,
|
||||
first_title_tag,
|
||||
@@ -34,6 +39,35 @@ from ._shared import (
|
||||
)
|
||||
from SYS import pipeline as ctx
|
||||
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
_SITE_TOKEN_RE = re.compile(r"(?:^|\s)site:([^\s,]+)", flags=re.IGNORECASE)
|
||||
_FILETYPE_TOKEN_RE = re.compile(
|
||||
r"(?:^|\s)(?:ext|filetype|type):\.?([a-z0-9]{1,12})\b",
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
_SITE_REMOVE_RE = re.compile(r"(?:^|\s)site:[^\s,]+", flags=re.IGNORECASE)
|
||||
_FILETYPE_REMOVE_RE = re.compile(
|
||||
r"(?:^|\s)(?:ext|filetype|type):\.?[a-z0-9]{1,12}\b",
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
_SCHEME_PREFIX_RE = re.compile(r"^[a-z]+:")
|
||||
_YAHOO_RU_RE = re.compile(r"/RU=([^/]+)/RK=", flags=re.IGNORECASE)
|
||||
_HTML_TAG_RE = re.compile(r"<[^>]+>")
|
||||
_DDG_RESULT_ANCHOR_RE = re.compile(
|
||||
r'<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>(.*?)</a>',
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
_GENERIC_ANCHOR_RE = re.compile(
|
||||
r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)</a>',
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
_BING_RESULT_ANCHOR_RE = re.compile(
|
||||
r'<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>',
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
_STORE_FILTER_RE = re.compile(r"\bstore:([^\s,]+)", flags=re.IGNORECASE)
|
||||
_STORE_FILTER_REMOVE_RE = re.compile(r"\s*[,]?\s*store:[^\s,]+", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
class _WorkerLogger:
|
||||
def __init__(self, worker_id: str) -> None:
|
||||
@@ -230,7 +264,7 @@ class search_file(Cmdlet):
|
||||
|
||||
@staticmethod
|
||||
def _normalize_space(text: Any) -> str:
|
||||
return re.sub(r"\s+", " ", str(text or "")).strip()
|
||||
return _WHITESPACE_RE.sub(" ", str(text or "")).strip()
|
||||
|
||||
@classmethod
|
||||
def _build_web_search_plan(
|
||||
@@ -266,7 +300,7 @@ class search_file(Cmdlet):
|
||||
site_token_to_strip = ""
|
||||
seed_url = ""
|
||||
|
||||
site_match = re.search(r"(?:^|\s)site:([^\s,]+)", text, flags=re.IGNORECASE)
|
||||
site_match = _SITE_TOKEN_RE.search(text)
|
||||
if site_match:
|
||||
site_host = cls._extract_site_host(site_match.group(1))
|
||||
seed_url = str(site_match.group(1) or "").strip()
|
||||
@@ -286,7 +320,7 @@ class search_file(Cmdlet):
|
||||
lower_candidate = candidate.lower()
|
||||
if lower_candidate.startswith(("ext:", "filetype:", "type:", "site:")):
|
||||
continue
|
||||
if re.match(r"^[a-z]+:", lower_candidate) and not lower_candidate.startswith(
|
||||
if _SCHEME_PREFIX_RE.match(lower_candidate) and not lower_candidate.startswith(
|
||||
("http://", "https://")
|
||||
):
|
||||
continue
|
||||
@@ -299,11 +333,7 @@ class search_file(Cmdlet):
|
||||
if not site_host:
|
||||
return None
|
||||
|
||||
filetype_match = re.search(
|
||||
r"(?:^|\s)(?:ext|filetype|type):\.?([a-z0-9]{1,12})\b",
|
||||
text,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
filetype_match = _FILETYPE_TOKEN_RE.search(text)
|
||||
filetype = cls._normalize_extension(filetype_match.group(1)) if filetype_match else ""
|
||||
|
||||
# Feature gate: trigger this web-search mode when filetype is present
|
||||
@@ -313,13 +343,8 @@ class search_file(Cmdlet):
|
||||
return None
|
||||
|
||||
residual = text
|
||||
residual = re.sub(r"(?:^|\s)site:[^\s,]+", " ", residual, flags=re.IGNORECASE)
|
||||
residual = re.sub(
|
||||
r"(?:^|\s)(?:ext|filetype|type):\.?[a-z0-9]{1,12}\b",
|
||||
" ",
|
||||
residual,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
residual = _SITE_REMOVE_RE.sub(" ", residual)
|
||||
residual = _FILETYPE_REMOVE_RE.sub(" ", residual)
|
||||
|
||||
if site_from_positional and positional_args:
|
||||
first = str(positional_args[0] or "").strip()
|
||||
@@ -631,7 +656,7 @@ class search_file(Cmdlet):
|
||||
|
||||
# Yahoo result links often look like:
|
||||
# https://r.search.yahoo.com/.../RU=<url-encoded-target>/RK=...
|
||||
ru_match = re.search(r"/RU=([^/]+)/RK=", raw_href, flags=re.IGNORECASE)
|
||||
ru_match = _YAHOO_RU_RE.search(raw_href)
|
||||
if ru_match:
|
||||
try:
|
||||
return str(unquote(ru_match.group(1))).strip()
|
||||
@@ -664,6 +689,75 @@ class search_file(Cmdlet):
|
||||
return False
|
||||
return host == target or host.endswith(f".{target}")
|
||||
|
||||
@staticmethod
|
||||
def _itertext_join(node: Any) -> str:
|
||||
try:
|
||||
return " ".join([str(text).strip() for text in node.itertext() if str(text).strip()])
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _html_fragment_to_text(fragment: Any) -> str:
|
||||
text = _HTML_TAG_RE.sub(" ", str(fragment or ""))
|
||||
return html.unescape(text)
|
||||
|
||||
@classmethod
|
||||
def _append_web_result(
|
||||
cls,
|
||||
items: List[Dict[str, str]],
|
||||
seen_urls: set[str],
|
||||
*,
|
||||
site_host: str,
|
||||
url_text: str,
|
||||
title_text: str,
|
||||
snippet_text: str,
|
||||
) -> None:
|
||||
url_clean = str(url_text or "").strip()
|
||||
if not url_clean or not url_clean.startswith(("http://", "https://")):
|
||||
return
|
||||
if not cls._url_matches_site(url_clean, site_host):
|
||||
return
|
||||
if url_clean in seen_urls:
|
||||
return
|
||||
|
||||
seen_urls.add(url_clean)
|
||||
items.append(
|
||||
{
|
||||
"url": url_clean,
|
||||
"title": cls._normalize_space(title_text) or url_clean,
|
||||
"snippet": cls._normalize_space(snippet_text),
|
||||
}
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _parse_web_results_with_fallback(
|
||||
cls,
|
||||
*,
|
||||
html_text: str,
|
||||
limit: int,
|
||||
lxml_parser: Any,
|
||||
regex_parser: Any,
|
||||
fallback_when_empty: bool = False,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Run an lxml-based parser with an optional regex fallback."""
|
||||
items: List[Dict[str, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
should_run_regex = False
|
||||
|
||||
try:
|
||||
from lxml import html as lxml_html
|
||||
|
||||
doc = lxml_html.fromstring(html_text or "")
|
||||
lxml_parser(doc, items, seen_urls)
|
||||
should_run_regex = fallback_when_empty and not items
|
||||
except Exception:
|
||||
should_run_regex = True
|
||||
|
||||
if should_run_regex:
|
||||
regex_parser(html_text or "", items, seen_urls)
|
||||
|
||||
return items[:limit]
|
||||
|
||||
@classmethod
|
||||
def _parse_duckduckgo_results(
|
||||
cls,
|
||||
@@ -673,36 +767,7 @@ class search_file(Cmdlet):
|
||||
limit: int,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Parse DuckDuckGo HTML results into normalized rows."""
|
||||
items: List[Dict[str, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
def _add_item(url_text: str, title_text: str, snippet_text: str) -> None:
|
||||
url_clean = str(url_text or "").strip()
|
||||
if not url_clean:
|
||||
return
|
||||
if not url_clean.startswith(("http://", "https://")):
|
||||
return
|
||||
if not cls._url_matches_site(url_clean, site_host):
|
||||
return
|
||||
if url_clean in seen_urls:
|
||||
return
|
||||
|
||||
seen_urls.add(url_clean)
|
||||
title_clean = cls._normalize_space(title_text)
|
||||
snippet_clean = cls._normalize_space(snippet_text)
|
||||
items.append(
|
||||
{
|
||||
"url": url_clean,
|
||||
"title": title_clean or url_clean,
|
||||
"snippet": snippet_clean,
|
||||
}
|
||||
)
|
||||
|
||||
# Preferred parser path (lxml is already a project dependency).
|
||||
try:
|
||||
from lxml import html as lxml_html
|
||||
|
||||
doc = lxml_html.fromstring(html_text or "")
|
||||
def _parse_lxml(doc: Any, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
result_nodes = doc.xpath("//div[contains(@class, 'result')]")
|
||||
|
||||
for node in result_nodes:
|
||||
@@ -712,40 +777,47 @@ class search_file(Cmdlet):
|
||||
|
||||
link = links[0]
|
||||
href = cls._extract_duckduckgo_target_url(link.get("href"))
|
||||
title = " ".join([str(t).strip() for t in link.itertext() if str(t).strip()])
|
||||
title = cls._itertext_join(link)
|
||||
|
||||
snippet_nodes = node.xpath(".//*[contains(@class, 'result__snippet')]")
|
||||
snippet = ""
|
||||
if snippet_nodes:
|
||||
snippet = " ".join(
|
||||
[str(t).strip() for t in snippet_nodes[0].itertext() if str(t).strip()]
|
||||
)
|
||||
snippet = cls._itertext_join(snippet_nodes[0])
|
||||
|
||||
_add_item(href, title, snippet)
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text=snippet,
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
# Fallback to regex parser below.
|
||||
pass
|
||||
|
||||
if items:
|
||||
return items[:limit]
|
||||
def _parse_regex(raw_html: str, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
for match in _DDG_RESULT_ANCHOR_RE.finditer(raw_html):
|
||||
href = cls._extract_duckduckgo_target_url(match.group(1))
|
||||
title_html = match.group(2)
|
||||
title = cls._html_fragment_to_text(title_html)
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text="",
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
# Regex fallback for environments where HTML parsing fails.
|
||||
anchor_pattern = re.compile(
|
||||
r'<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>(.*?)</a>',
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
return cls._parse_web_results_with_fallback(
|
||||
html_text=html_text,
|
||||
limit=limit,
|
||||
lxml_parser=_parse_lxml,
|
||||
regex_parser=_parse_regex,
|
||||
fallback_when_empty=True,
|
||||
)
|
||||
for match in anchor_pattern.finditer(html_text or ""):
|
||||
href = cls._extract_duckduckgo_target_url(match.group(1))
|
||||
title_html = match.group(2)
|
||||
title = re.sub(r"<[^>]+>", " ", str(title_html or ""))
|
||||
title = html.unescape(title)
|
||||
_add_item(href, title, "")
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return items[:limit]
|
||||
|
||||
@classmethod
|
||||
def _parse_yahoo_results(
|
||||
@@ -756,51 +828,43 @@ class search_file(Cmdlet):
|
||||
limit: int,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Parse Yahoo HTML search results into normalized rows."""
|
||||
items: List[Dict[str, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
def _add_item(url_text: str, title_text: str, snippet_text: str) -> None:
|
||||
url_clean = str(url_text or "").strip()
|
||||
if not url_clean or not url_clean.startswith(("http://", "https://")):
|
||||
return
|
||||
if not cls._url_matches_site(url_clean, site_host):
|
||||
return
|
||||
if url_clean in seen_urls:
|
||||
return
|
||||
seen_urls.add(url_clean)
|
||||
items.append(
|
||||
{
|
||||
"url": url_clean,
|
||||
"title": cls._normalize_space(title_text) or url_clean,
|
||||
"snippet": cls._normalize_space(snippet_text),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
from lxml import html as lxml_html
|
||||
|
||||
doc = lxml_html.fromstring(html_text or "")
|
||||
def _parse_lxml(doc: Any, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
for node in doc.xpath("//a[@href]"):
|
||||
href = cls._extract_yahoo_target_url(node.get("href"))
|
||||
title = " ".join([str(t).strip() for t in node.itertext() if str(t).strip()])
|
||||
_add_item(href, title, "")
|
||||
if len(items) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
anchor_pattern = re.compile(
|
||||
r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)</a>',
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
for match in anchor_pattern.finditer(html_text or ""):
|
||||
href = cls._extract_yahoo_target_url(match.group(1))
|
||||
title_html = match.group(2)
|
||||
title = re.sub(r"<[^>]+>", " ", str(title_html or ""))
|
||||
title = html.unescape(title)
|
||||
_add_item(href, title, "")
|
||||
title = cls._itertext_join(node)
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text="",
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return items[:limit]
|
||||
def _parse_regex(raw_html: str, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
for match in _GENERIC_ANCHOR_RE.finditer(raw_html):
|
||||
href = cls._extract_yahoo_target_url(match.group(1))
|
||||
title_html = match.group(2)
|
||||
title = cls._html_fragment_to_text(title_html)
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text="",
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return cls._parse_web_results_with_fallback(
|
||||
html_text=html_text,
|
||||
limit=limit,
|
||||
lxml_parser=_parse_lxml,
|
||||
regex_parser=_parse_regex,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _query_yahoo(
|
||||
@@ -881,30 +945,7 @@ class search_file(Cmdlet):
|
||||
limit: int,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Parse Bing HTML search results into normalized rows."""
|
||||
items: List[Dict[str, str]] = []
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
def _add_item(url_text: str, title_text: str, snippet_text: str) -> None:
|
||||
url_clean = str(url_text or "").strip()
|
||||
if not url_clean or not url_clean.startswith(("http://", "https://")):
|
||||
return
|
||||
if not cls._url_matches_site(url_clean, site_host):
|
||||
return
|
||||
if url_clean in seen_urls:
|
||||
return
|
||||
seen_urls.add(url_clean)
|
||||
items.append(
|
||||
{
|
||||
"url": url_clean,
|
||||
"title": cls._normalize_space(title_text) or url_clean,
|
||||
"snippet": cls._normalize_space(snippet_text),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
from lxml import html as lxml_html
|
||||
|
||||
doc = lxml_html.fromstring(html_text or "")
|
||||
def _parse_lxml(doc: Any, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
result_nodes = doc.xpath("//li[contains(@class, 'b_algo')]")
|
||||
|
||||
for node in result_nodes:
|
||||
@@ -913,7 +954,7 @@ class search_file(Cmdlet):
|
||||
continue
|
||||
link = links[0]
|
||||
href = str(link.get("href") or "").strip()
|
||||
title = " ".join([str(t).strip() for t in link.itertext() if str(t).strip()])
|
||||
title = cls._itertext_join(link)
|
||||
|
||||
snippet = ""
|
||||
for sel in (
|
||||
@@ -923,28 +964,41 @@ class search_file(Cmdlet):
|
||||
):
|
||||
snip_nodes = node.xpath(sel)
|
||||
if snip_nodes:
|
||||
snippet = " ".join(
|
||||
[str(t).strip() for t in snip_nodes[0].itertext() if str(t).strip()]
|
||||
)
|
||||
snippet = cls._itertext_join(snip_nodes[0])
|
||||
break
|
||||
|
||||
_add_item(href, title, snippet)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
anchor_pattern = re.compile(
|
||||
r"<h2[^>]*>\s*<a[^>]+href=\"([^\"]+)\"[^>]*>(.*?)</a>",
|
||||
flags=re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
for match in anchor_pattern.finditer(html_text or ""):
|
||||
href = match.group(1)
|
||||
title = re.sub(r"<[^>]+>", " ", str(match.group(2) or ""))
|
||||
title = html.unescape(title)
|
||||
_add_item(href, title, "")
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text=snippet,
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return items[:limit]
|
||||
def _parse_regex(raw_html: str, items: List[Dict[str, str]], seen_urls: set[str]) -> None:
|
||||
for match in _BING_RESULT_ANCHOR_RE.finditer(raw_html):
|
||||
href = match.group(1)
|
||||
title = cls._html_fragment_to_text(match.group(2))
|
||||
cls._append_web_result(
|
||||
items,
|
||||
seen_urls,
|
||||
site_host=site_host,
|
||||
url_text=href,
|
||||
title_text=title,
|
||||
snippet_text="",
|
||||
)
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return cls._parse_web_results_with_fallback(
|
||||
html_text=html_text,
|
||||
limit=limit,
|
||||
lxml_parser=_parse_lxml,
|
||||
regex_parser=_parse_regex,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _query_web_search(
|
||||
@@ -1218,33 +1272,30 @@ class search_file(Cmdlet):
|
||||
if file_name:
|
||||
title = file_name
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"title": title,
|
||||
"path": target_url,
|
||||
"url": target_url,
|
||||
"source": "web",
|
||||
"store": "web",
|
||||
"table": "web.search",
|
||||
"ext": detected_ext,
|
||||
"detail": snippet,
|
||||
"tag": [f"site:{site_host}"] + ([f"type:{detected_ext}"] if detected_ext else []),
|
||||
"columns": [
|
||||
payload = build_file_result_payload(
|
||||
title=title,
|
||||
path=target_url,
|
||||
url=target_url,
|
||||
source="web",
|
||||
store="web",
|
||||
table="web.search",
|
||||
ext=detected_ext,
|
||||
detail=snippet,
|
||||
tag=[f"site:{site_host}"] + ([f"type:{detected_ext}"] if detected_ext else []),
|
||||
columns=[
|
||||
("Title", title),
|
||||
("Type", detected_ext),
|
||||
("URL", target_url),
|
||||
],
|
||||
"_selection_args": ["-url", target_url],
|
||||
"_selection_action": ["download-file", "-url", target_url],
|
||||
}
|
||||
_selection_args=["-url", target_url],
|
||||
_selection_action=["download-file", "-url", target_url],
|
||||
)
|
||||
|
||||
table.add_result(payload)
|
||||
results_list.append(payload)
|
||||
ctx.emit(payload)
|
||||
|
||||
if refresh_mode:
|
||||
ctx.set_last_result_table_preserve_history(table, results_list)
|
||||
else:
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
publish_result_table(ctx, table, results_list, overlay=refresh_mode)
|
||||
|
||||
ctx.set_current_stage_table(table)
|
||||
|
||||
@@ -1267,15 +1318,7 @@ class search_file(Cmdlet):
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
ext = str(ext_value or "").strip().lstrip(".")
|
||||
for sep in (" ", "|", "(", "[", "{", ",", ";"):
|
||||
if sep in ext:
|
||||
ext = ext.split(sep, 1)[0]
|
||||
break
|
||||
if "." in ext:
|
||||
ext = ext.split(".")[-1]
|
||||
ext = "".join(ch for ch in ext if ch.isalnum())
|
||||
return ext[:5]
|
||||
return normalize_file_extension(ext_value)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_lookup_target(value: Optional[str]) -> str:
|
||||
@@ -1580,10 +1623,7 @@ class search_file(Cmdlet):
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
if refresh_mode:
|
||||
ctx.set_last_result_table_preserve_history(table, results_list)
|
||||
else:
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
publish_result_table(ctx, table, results_list, overlay=refresh_mode)
|
||||
|
||||
ctx.set_current_stage_table(table)
|
||||
|
||||
@@ -1764,11 +1804,11 @@ class search_file(Cmdlet):
|
||||
|
||||
store_filter: Optional[str] = None
|
||||
if query:
|
||||
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
|
||||
match = _STORE_FILTER_RE.search(query)
|
||||
if match:
|
||||
store_filter = match.group(1).strip() or None
|
||||
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
|
||||
query = re.sub(r"\s{2,}", " ", query)
|
||||
query = _STORE_FILTER_REMOVE_RE.sub(" ", query)
|
||||
query = _WHITESPACE_RE.sub(" ", query)
|
||||
query = query.strip().strip(",")
|
||||
|
||||
if store_filter and not storage_backend:
|
||||
@@ -1912,19 +1952,15 @@ class search_file(Cmdlet):
|
||||
for h in hash_query:
|
||||
resolved_backend_name: Optional[str] = None
|
||||
resolved_backend = None
|
||||
store_registry = None
|
||||
|
||||
for backend_name in backends_to_try:
|
||||
backend = None
|
||||
try:
|
||||
backend = get_backend_instance(config, backend_name, suppress_debug=True)
|
||||
if backend is None:
|
||||
# Last-resort: instantiate full registry for this backend only
|
||||
from Store import Store as _Store
|
||||
_store = _Store(config=config, suppress_debug=True)
|
||||
if _store.is_available(backend_name):
|
||||
backend = _store[backend_name]
|
||||
except Exception:
|
||||
backend = None
|
||||
backend, store_registry, _exc = get_preferred_store_backend(
|
||||
config,
|
||||
backend_name,
|
||||
store_registry=store_registry,
|
||||
suppress_debug=True,
|
||||
)
|
||||
if backend is None:
|
||||
continue
|
||||
try:
|
||||
@@ -2017,16 +2053,14 @@ class search_file(Cmdlet):
|
||||
except Exception:
|
||||
title_from_tag = None
|
||||
|
||||
title = title_from_tag or meta_obj.get("title") or meta_obj.get(
|
||||
"name"
|
||||
)
|
||||
title = title_from_tag or get_result_title(meta_obj, "title", "name")
|
||||
if not title and path_str:
|
||||
try:
|
||||
title = Path(path_str).stem
|
||||
except Exception:
|
||||
title = path_str
|
||||
|
||||
ext_val = meta_obj.get("ext") or meta_obj.get("extension")
|
||||
ext_val = get_extension_field(meta_obj, "ext", "extension")
|
||||
if not ext_val and path_str:
|
||||
try:
|
||||
ext_val = Path(path_str).suffix
|
||||
@@ -2038,27 +2072,19 @@ class search_file(Cmdlet):
|
||||
except Exception:
|
||||
ext_val = None
|
||||
|
||||
size_bytes = meta_obj.get("size")
|
||||
if size_bytes is None:
|
||||
size_bytes = meta_obj.get("size_bytes")
|
||||
try:
|
||||
size_bytes_int: Optional[int] = (
|
||||
int(size_bytes) if size_bytes is not None else None
|
||||
)
|
||||
except Exception:
|
||||
size_bytes_int = None
|
||||
size_bytes_int = get_int_field(meta_obj, "size", "size_bytes")
|
||||
|
||||
payload: Dict[str,
|
||||
Any] = {
|
||||
"title": str(title or h),
|
||||
"hash": h,
|
||||
"store": resolved_backend_name,
|
||||
"path": path_str,
|
||||
"ext": self._normalize_extension(ext_val),
|
||||
"size_bytes": size_bytes_int,
|
||||
"tag": tags_list,
|
||||
"url": meta_obj.get("url") or [],
|
||||
}
|
||||
payload = build_file_result_payload(
|
||||
title=title,
|
||||
fallback_title=h,
|
||||
hash_value=h,
|
||||
store=resolved_backend_name,
|
||||
path=path_str,
|
||||
ext=ext_val,
|
||||
size_bytes=size_bytes_int,
|
||||
tag=tags_list,
|
||||
url=meta_obj.get("url") or [],
|
||||
)
|
||||
|
||||
self._set_storage_display_columns(payload)
|
||||
|
||||
@@ -2106,16 +2132,20 @@ class search_file(Cmdlet):
|
||||
|
||||
if backend_to_search:
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend, _store_registry, exc = get_preferred_store_backend(
|
||||
config,
|
||||
backend_to_search,
|
||||
suppress_debug=True,
|
||||
)
|
||||
if target_backend is None:
|
||||
if exc is not None:
|
||||
log(f"Backend '{backend_to_search}' not found: {exc}", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, "error")
|
||||
return 1
|
||||
debug(f"[search-file] Requested backend '{backend_to_search}' not found")
|
||||
return 1
|
||||
try:
|
||||
target_backend = get_backend_instance(config, backend_to_search, suppress_debug=True)
|
||||
if target_backend is None:
|
||||
from Store import Store as _Store
|
||||
_store = _Store(config=config, suppress_debug=True)
|
||||
if _store.is_available(backend_to_search):
|
||||
target_backend = _store[backend_to_search]
|
||||
else:
|
||||
debug(f"[search-file] Requested backend '{backend_to_search}' not found")
|
||||
return 1
|
||||
pass
|
||||
except Exception as exc:
|
||||
log(f"Backend '{backend_to_search}' not found: {exc}", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, "error")
|
||||
@@ -2135,18 +2165,19 @@ class search_file(Cmdlet):
|
||||
)
|
||||
else:
|
||||
all_results = []
|
||||
store_registry = None
|
||||
for backend_name in list_configured_backend_names(config or {}):
|
||||
try:
|
||||
backend = get_backend_instance(config, backend_name, suppress_debug=True)
|
||||
backend, store_registry, _exc = get_preferred_store_backend(
|
||||
config,
|
||||
backend_name,
|
||||
store_registry=store_registry,
|
||||
suppress_debug=True,
|
||||
)
|
||||
if backend is None:
|
||||
from Store import Store as _Store
|
||||
_store = _Store(config=config, suppress_debug=True)
|
||||
if _store.is_available(backend_name):
|
||||
backend = _store[backend_name]
|
||||
else:
|
||||
# Configured backend name exists but has no registered implementation or failed to load.
|
||||
# (e.g. 'all-debrid' being treated as a store but having no store provider).
|
||||
continue
|
||||
# Configured backend name exists but has no registered implementation or failed to load.
|
||||
# (e.g. 'all-debrid' being treated as a store but having no store provider).
|
||||
continue
|
||||
|
||||
searched_backends.append(backend_name)
|
||||
|
||||
@@ -2216,63 +2247,11 @@ class search_file(Cmdlet):
|
||||
|
||||
# Populate default selection args for interactive @N selection/hash/url handling
|
||||
try:
|
||||
sel_args: Optional[List[str]] = None
|
||||
sel_action: Optional[List[str]] = None
|
||||
|
||||
# Prefer explicit path when available
|
||||
p_val = normalized.get("path") or normalized.get("target") or normalized.get("url")
|
||||
if p_val:
|
||||
p_str = str(p_val or "").strip()
|
||||
if p_str:
|
||||
if p_str.startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
h = normalized.get("hash") or normalized.get("file_hash") or normalized.get("hash_hex")
|
||||
s_val = normalized.get("store")
|
||||
if h and s_val and "/view_file" in p_str:
|
||||
try:
|
||||
h_norm = normalize_hash(h)
|
||||
except Exception:
|
||||
h_norm = str(h)
|
||||
sel_args = ["-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
sel_action = ["get-metadata", "-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
else:
|
||||
sel_args = ["-url", p_str]
|
||||
sel_action = ["download-file", "-url", p_str]
|
||||
else:
|
||||
try:
|
||||
from SYS.utils import expand_path
|
||||
|
||||
full_path = expand_path(p_str)
|
||||
# Prefer showing metadata details when we have a hash+store context
|
||||
h = normalized.get("hash") or normalized.get("file_hash") or normalized.get("hash_hex")
|
||||
s_val = normalized.get("store")
|
||||
if h and s_val:
|
||||
try:
|
||||
h_norm = normalize_hash(h)
|
||||
except Exception:
|
||||
h_norm = str(h)
|
||||
sel_args = ["-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
sel_action = ["get-metadata", "-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
else:
|
||||
sel_args = ["-path", str(full_path)]
|
||||
# Default action for local paths: get-file to fetch or operate on the path
|
||||
sel_action = ["get-file", "-path", str(full_path)]
|
||||
except Exception:
|
||||
sel_args = ["-path", p_str]
|
||||
sel_action = ["get-file", "-path", p_str]
|
||||
|
||||
# Fallback: use hash+store when available
|
||||
if sel_args is None:
|
||||
h = normalized.get("hash") or normalized.get("file_hash") or normalized.get("hash_hex")
|
||||
s_val = normalized.get("store")
|
||||
if h and s_val:
|
||||
try:
|
||||
h_norm = normalize_hash(h)
|
||||
except Exception:
|
||||
h_norm = str(h)
|
||||
sel_args = ["-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
# Show metadata details by default for store/hash selections
|
||||
sel_action = ["get-metadata", "-query", f"hash:{h_norm}", "-store", str(s_val)]
|
||||
|
||||
sel_args, sel_action = build_default_selection(
|
||||
path_value=normalized.get("path") or normalized.get("target") or normalized.get("url"),
|
||||
hash_value=normalized.get("hash") or normalized.get("file_hash") or normalized.get("hash_hex"),
|
||||
store_value=normalized.get("store"),
|
||||
)
|
||||
if sel_args:
|
||||
normalized["_selection_args"] = [str(x) for x in sel_args]
|
||||
if sel_action:
|
||||
@@ -2305,11 +2284,17 @@ class search_file(Cmdlet):
|
||||
subject_hash = query.split("hash:")[1].split(",")[0].strip()
|
||||
subject_context = {"store": backend_to_search, "hash": subject_hash}
|
||||
|
||||
ctx.set_last_result_table_overlay(table, results_list, subject=subject_context)
|
||||
publish_result_table(
|
||||
ctx,
|
||||
table,
|
||||
results_list,
|
||||
subject=subject_context,
|
||||
overlay=True,
|
||||
)
|
||||
except Exception:
|
||||
ctx.set_last_result_table_preserve_history(table, results_list)
|
||||
publish_result_table(ctx, table, results_list, overlay=True)
|
||||
else:
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
publish_result_table(ctx, table, results_list)
|
||||
db.append_worker_stdout(
|
||||
worker_id,
|
||||
_summarize_worker_results(results_list)
|
||||
|
||||
Reference in New Issue
Block a user