fix search query parsing to allow multi-word query

This commit is contained in:
nose
2025-11-25 23:12:15 -08:00
parent f6ce155985
commit d1f08216a2
2 changed files with 41 additions and 6 deletions

View File

@@ -216,8 +216,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
elif low in {"-type", "--type"} and i + 1 < len(args_list): elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower() type_filter = args_list[i + 1].lower()
i += 2 i += 2
elif not query and not arg.startswith("-"): elif not arg.startswith("-"):
query = arg if query:
query += " " + arg
else:
query = arg
i += 1 i += 1
else: else:
i += 1 i += 1

View File

@@ -24,6 +24,7 @@ from typing import Any, Dict, Optional
import sys import sys
import shutil import shutil
import requests import requests
import re
from helper.logger import log, debug from helper.logger import log, debug
@@ -286,20 +287,36 @@ class LocalStorageBackend(StorageBackend):
query_pattern = f"%{query_lower}%" query_pattern = f"%{query_lower}%"
debug(f"Performing filename/tag search: {query_pattern}") debug(f"Performing filename/tag search: {query_pattern}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 100) * 50
cursor.execute(""" cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f FROM files f
WHERE LOWER(f.file_path) LIKE ? WHERE LOWER(f.file_path) LIKE ?
ORDER BY f.file_path ORDER BY f.file_path
LIMIT ? LIMIT ?
""", (query_pattern, limit or 1000)) """, (query_pattern, fetch_limit))
rows = cursor.fetchall() rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB") debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching
try:
word_regex = re.compile(r'\b' + re.escape(query_lower) + r'\b', re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set() seen_files = set()
for file_id, file_path_str, size_bytes in rows: for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files: if not file_path_str or file_path_str in seen_files:
continue continue
# Apply whole word filter on filename
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
continue
seen_files.add(file_path_str) seen_files.add(file_path_str)
file_path = Path(file_path_str) file_path = Path(file_path_str)
@@ -706,8 +723,23 @@ class HydrusStorageBackend(StorageBackend):
}) })
else: else:
# Free-form search: check if search terms match the title or tags # Free-form search: check if search terms match the title or tags
# Match if ANY search term is found in title or tags (OR logic) # Match if ALL search terms are found in title or tags (AND logic)
if query_lower == "*" or any(term in all_tags_str or term in title.lower() for term in search_terms): # AND use whole word matching
# Combine title and tags for searching
searchable_text = (title + " " + all_tags_str).lower()
match = True
if query_lower != "*":
for term in search_terms:
# Regex for whole word: \bterm\b
# Escape term to handle special chars
pattern = r'\b' + re.escape(term) + r'\b'
if not re.search(pattern, searchable_text):
match = False
break
if match:
results.append({ results.append({
"hash": hash_hex, "hash": hash_hex,
"hash_hex": hash_hex, "hash_hex": hash_hex,