This commit is contained in:
2026-01-16 01:47:00 -08:00
parent 41e95d0360
commit 12436e5a6a
4 changed files with 492 additions and 130 deletions

View File

@@ -4,7 +4,7 @@ import json
import re
import shutil
import sys
from fnmatch import translate
from fnmatch import fnmatch, translate
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@@ -30,6 +30,28 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
return _normalize_hash(file_path.stem)
def _normalize_url_for_search(url: str) -> str:
value = str(url or "").strip()
value = re.sub(r"^[a-z][a-z0-9+.-]*://", "", value, flags=re.IGNORECASE)
value = re.sub(r"^www\.", "", value, flags=re.IGNORECASE)
return value.lower()
def _match_url_pattern(url: str, pattern: str) -> bool:
normalized_url = _normalize_url_for_search(url)
normalized_pattern = _normalize_url_for_search(pattern)
if not normalized_pattern:
return False
has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
if has_wildcards:
return fnmatch(normalized_url, normalized_pattern)
normalized_url_no_slash = normalized_url.rstrip("/")
normalized_pattern_no_slash = normalized_pattern.rstrip("/")
if normalized_pattern_no_slash and normalized_pattern_no_slash == normalized_url_no_slash:
return True
return normalized_pattern in normalized_url
class Folder(Store):
""""""
@@ -690,6 +712,12 @@ class Folder(Store):
match_all = query == "*" or (not query and bool(ext_filter))
results = []
search_dir = expand_path(self._location)
backend_label = str(
getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder"
)
debug(
f"[folder:{backend_label}] search start: query={query} limit={limit} root={search_dir}"
)
def _url_like_pattern(value: str) -> str:
# Interpret user patterns as substring matches (with optional glob wildcards).
@@ -1002,7 +1030,7 @@ class Folder(Store):
namespace, pattern = query.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip().lower()
debug(f"Performing namespace search: {namespace}:{pattern}")
debug(f"[folder:{backend_label}] namespace search: {namespace}:{pattern}")
if namespace == "hash":
normalized_hash = _normalize_hash(pattern)
@@ -1041,14 +1069,50 @@ class Folder(Store):
return results
if namespace == "url":
pattern_hint = kwargs.get("pattern_hint")
def _parse_url_value(raw: Any) -> list[str]:
if raw is None:
return []
if isinstance(raw, list):
return [str(u).strip() for u in raw if str(u).strip()]
if isinstance(raw, str):
text = raw.strip()
if not text:
return []
try:
parsed = json.loads(text)
if isinstance(parsed, list):
return [
str(u).strip()
for u in parsed
if str(u).strip()
]
except Exception:
pass
return [text]
return []
def _matches_pattern(url_list: list[str]) -> bool:
if not pattern_hint:
return True
for candidate_url in url_list:
if _match_url_pattern(candidate_url, pattern_hint):
return True
return False
if not pattern or pattern == "*":
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
rows = api.get_files_with_any_url(limit)
else:
debug(
f"[folder:{backend_label}] url search: like={pattern} (limit={limit})"
)
rows = api.get_files_by_url_like(
_url_like_pattern(pattern),
limit
)
for file_hash, file_path_str, size_bytes, ext in rows:
for file_hash, file_path_str, size_bytes, ext, url_raw in rows:
if not file_path_str:
continue
file_path = search_dir / str(file_path_str)
@@ -1059,6 +1123,9 @@ class Folder(Store):
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
urls = _parse_url_value(url_raw)
if not urls or not _matches_pattern(urls):
continue
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(
file_path,
@@ -1066,6 +1133,7 @@ class Folder(Store):
size_bytes,
file_hash
)
entry["urls"] = urls
results.append(entry)
if limit is not None and len(results) >= limit:
return results