This commit is contained in:
2026-01-17 02:36:06 -08:00
parent 3a7c443004
commit c6fd6b4224
9 changed files with 440 additions and 226 deletions

View File

@@ -75,6 +75,17 @@ class Get_Url(Cmdlet):
return url.lower()
@staticmethod
def _looks_like_url_pattern(value: str) -> bool:
v = str(value or "").strip().lower()
if not v:
return False
if "://" in v:
return True
if v.startswith(("magnet:", "torrent:", "ytdl:", "tidal:", "ftp:", "sftp:", "file:")):
return True
return "." in v and "/" in v
@staticmethod
def _match_url_pattern(url: str, pattern: str) -> bool:
"""Match URL against pattern with wildcard support.
@@ -82,10 +93,14 @@ class Get_Url(Cmdlet):
Strips protocol/www from both URL and pattern before matching.
Supports * and ? wildcards.
"""
raw_pattern = str(pattern or "").strip()
normalized_url = Get_Url._normalize_url_for_search(url)
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
normalized_pattern = Get_Url._normalize_url_for_search(raw_pattern)
has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
looks_like_url = Get_Url._looks_like_url_pattern(raw_pattern)
has_wildcards = "*" in normalized_pattern or (
not looks_like_url and "?" in normalized_pattern
)
if has_wildcards:
return fnmatch(normalized_url, normalized_pattern)
@@ -324,25 +339,58 @@ class Get_Url(Cmdlet):
# This avoids the expensive/incorrect "search('*')" scan.
try:
raw_pattern = str(pattern or "").strip()
has_wildcards = any(ch in raw_pattern for ch in ("*", "?"))
looks_like_url = self._looks_like_url_pattern(raw_pattern)
has_wildcards = "*" in raw_pattern or (
not looks_like_url and "?" in raw_pattern
)
# If this is a Hydrus backend and the pattern is a single URL,
# normalize it through the official API. Skip for bare domains.
normalized_url = None
looks_like_url = (
"://" in raw_pattern or raw_pattern.startswith("magnet:")
)
if not has_wildcards and looks_like_url and hasattr(backend, "get_url_info"):
try:
info = backend.get_url_info(raw_pattern) # type: ignore[attr-defined]
if isinstance(info, dict):
norm = info.get("normalised_url") or info.get("normalized_url")
if isinstance(norm, str) and norm.strip():
normalized_url = norm.strip()
except Exception:
normalized_url = None
normalized_search_pattern = None
if not has_wildcards and looks_like_url:
normalized_search_pattern = self._normalize_url_for_search(
raw_pattern
)
if (
normalized_search_pattern
and normalized_search_pattern != raw_pattern
):
debug(
"get-url normalized raw pattern: %s -> %s",
raw_pattern,
normalized_search_pattern,
)
if hasattr(backend, "get_url_info"):
try:
info = backend.get_url_info(raw_pattern) # type: ignore[attr-defined]
if isinstance(info, dict):
norm = (
info.get("normalised_url")
or info.get("normalized_url")
)
if isinstance(norm, str) and norm.strip():
normalized_url = self._normalize_url_for_search(
norm.strip()
)
except Exception:
pass
if (
normalized_url
and normalized_url != normalized_search_pattern
and normalized_url != raw_pattern
):
debug(
"get-url normalized backend result: %s -> %s",
raw_pattern,
normalized_url,
)
target_pattern = normalized_url or raw_pattern
target_pattern = (
normalized_url
or normalized_search_pattern
or raw_pattern
)
if has_wildcards or not target_pattern:
search_query = "url:*"
else: