This commit is contained in:
nose
2025-12-20 23:57:44 -08:00
parent b75faa49a2
commit 8ca5783970
39 changed files with 4294 additions and 1722 deletions

View File

@@ -452,7 +452,44 @@ class Folder(Store):
query = query.lower()
query_lower = query # Ensure query_lower is defined for all code paths
match_all = query == "*"
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = "".join(ch for ch in v if ch.isalnum())
return v
def _extract_system_filetype_ext(text: str) -> Optional[str]:
# Match: system:filetype = png (allow optional '=' and flexible spaces)
m = re.search(r"\bsystem:filetype\s*(?:=\s*)?([^\s,]+)", text)
if not m:
m = re.search(r"\bsystem:filetype\s*=\s*([^\s,]+)", text)
if not m:
return None
return _normalize_ext_filter(m.group(1)) or None
# Support `ext:<value>` and Hydrus-style `system:filetype = <value>` anywhere
# in the query (space or comma separated).
ext_filter: Optional[str] = None
try:
sys_ext = _extract_system_filetype_ext(query_lower)
if sys_ext:
ext_filter = sys_ext
query_lower = re.sub(r"\s*\bsystem:filetype\s*(?:=\s*)?[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
except Exception:
ext_filter = None
match_all = query == "*" or (not query and bool(ext_filter))
results = []
search_dir = Path(self._location).expanduser()
@@ -518,6 +555,41 @@ class Folder(Store):
try:
with DatabaseAPI(search_dir) as api:
ext_hashes: set[str] | None = None
if ext_filter:
# Fetch a bounded set of hashes to intersect with other filters.
ext_fetch_limit = (limit or 45) * 50
ext_hashes = api.get_file_hashes_by_ext(ext_filter, limit=ext_fetch_limit)
# ext-only search: query is empty (or coerced to match_all above).
if ext_filter and (not query_lower or query_lower == "*"):
rows = api.get_files_by_ext(ext_filter, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
debug(f"[folder:{backend_label}] {len(results)} result(s)")
return results
if tokens and len(tokens) > 1:
url_fetch_limit = (limit or 45) * 50
@@ -546,6 +618,22 @@ class Folder(Store):
return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
if namespace == 'system':
# Hydrus-compatible query: system:filetype = png
m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
if m_ft:
normalized_ext = _normalize_ext_filter(m_ft.group(1))
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
return set()
if namespace in {'ext', 'extension'}:
normalized_ext = _normalize_ext_filter(pattern)
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
if namespace == 'store':
if pattern not in {'local', 'file', 'filesystem'}:
return set()
@@ -579,6 +667,11 @@ class Folder(Store):
if not matching_hashes:
return results
if ext_hashes is not None:
matching_hashes = (matching_hashes or set()) & ext_hashes
if not matching_hashes:
return results
if not matching_hashes:
return results
@@ -596,6 +689,12 @@ class Folder(Store):
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
@@ -631,6 +730,12 @@ class Folder(Store):
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
@@ -658,6 +763,67 @@ class Folder(Store):
if limit is not None and len(results) >= limit:
return results
return results
if namespace == "system":
# Hydrus-compatible query: system:filetype = png
m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
if m_ft:
normalized_ext = _normalize_ext_filter(m_ft.group(1))
if not normalized_ext:
return results
rows = api.get_files_by_ext(normalized_ext, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
if namespace in {"ext", "extension"}:
normalized_ext = _normalize_ext_filter(pattern)
if not normalized_ext:
return results
rows = api.get_files_by_ext(normalized_ext, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
query_pattern = f"{namespace}:%"
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
@@ -674,12 +840,20 @@ class Folder(Store):
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
if fnmatch(value, pattern):
if ext_hashes is not None and file_hash not in ext_hashes:
break
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
all_tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
else:
debug(f"File missing on disk: {file_path}")
@@ -703,6 +877,8 @@ class Folder(Store):
for file_hash, file_path_str, size_bytes, ext in term_rows:
if not file_path_str:
continue
if ext_hashes is not None and file_hash not in ext_hashes:
continue
entry = hits.get(file_hash)
if entry:
entry["count"] += 1
@@ -746,6 +922,8 @@ class Folder(Store):
rows = api.get_all_files(limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if file_path_str:
if ext_hashes is not None and file_hash not in ext_hashes:
continue
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
@@ -753,6 +931,12 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
@@ -896,7 +1080,7 @@ class Folder(Store):
if db_tags:
# Return actual store name instead of generic "local_db"
store_name = self._name if self._name else "local"
return list(db_tags), store_name
return [str(t).strip().lower() for t in db_tags if isinstance(t, str) and t.strip()], store_name
except Exception as exc:
debug(f"Local DB lookup failed: {exc}")
return [], "unknown"
@@ -917,22 +1101,30 @@ class Folder(Store):
try:
with API_folder_store(Path(self._location)) as db:
# Get existing tags
existing_tags = list(db.get_tags(hash) or [])
original_tags_lower = {t.lower() for t in existing_tags}
# Merge new tags, handling namespace overwrites
for new_tag in tag:
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
# Remove existing tags in same namespace
existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
# Add new tag if not already present (case-insensitive check)
if new_tag.lower() not in original_tags_lower:
existing_tags.append(new_tag)
# Save merged tags
db.add_tags_to_hash(hash, existing_tags)
existing_tags = [t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()]
from metadata import compute_namespaced_tag_overwrite
_to_remove, _to_add, merged = compute_namespaced_tag_overwrite(existing_tags, tag or [])
if not _to_remove and not _to_add:
return True
# Folder DB tag table is case-sensitive and add_tags_to_hash() is additive.
# To enforce lowercase-only tags and namespace overwrites, rewrite the full tag set.
cursor = db.connection.cursor()
cursor.execute("DELETE FROM tags WHERE hash = ?", (hash,))
for t in merged:
t = str(t).strip().lower()
if t:
cursor.execute(
"INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)",
(hash, t),
)
db.connection.commit()
try:
db._update_metadata_modified_time(hash)
except Exception:
pass
return True
except Exception as exc:
debug(f"Local DB add_tags failed: {exc}")
@@ -949,7 +1141,10 @@ class Folder(Store):
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
db.remove_tags_from_hash(file_hash, list(tags))
tag_list = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
if not tag_list:
return True
db.remove_tags_from_hash(file_hash, tag_list)
return True
except Exception as exc:
debug(f"Local DB remove_tags failed: {exc}")
@@ -1006,6 +1201,130 @@ class Folder(Store):
debug(f"add_url failed for local file: {exc}")
return False
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Add known urls to many local files in one DB session.
This is a performance optimization used by cmdlets that receive many PipeObjects.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
# Normalize + coalesce duplicates per hash.
try:
from metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore
merged_by_hash: Dict[str, List[str]] = {}
for file_identifier, url_list in (items or []):
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
incoming: List[str]
if normalize_urls is not None:
try:
incoming = normalize_urls(url_list)
except Exception:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
else:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
if not incoming:
continue
existing = merged_by_hash.get(file_hash) or []
for u in incoming:
if u and u not in existing:
existing.append(u)
merged_by_hash[file_hash] = existing
if not merged_by_hash:
return True
import json
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Ensure metadata rows exist (may be needed for older entries).
for file_hash in merged_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
except Exception:
continue
# Load existing urls for all hashes in chunks.
existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in merged_by_hash.keys()}
hashes = list(merged_by_hash.keys())
chunk_size = 400
for i in range(0, len(hashes), chunk_size):
chunk = hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
row_hash = str(row[0]).strip().lower()
except Exception:
continue
raw_urls = None
try:
raw_urls = row[1]
except Exception:
raw_urls = None
parsed_urls: List[str] = []
if raw_urls:
try:
parsed = json.loads(raw_urls)
if normalize_urls is not None:
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
except Exception:
parsed_urls = []
existing_urls_by_hash[row_hash] = parsed_urls
# Compute updates and write in one commit.
updates: List[tuple[str, str]] = []
for file_hash, incoming_urls in merged_by_hash.items():
existing_urls = existing_urls_by_hash.get(file_hash) or []
final = list(existing_urls)
for u in incoming_urls:
if u and u not in final:
final.append(u)
if final != existing_urls:
try:
updates.append((json.dumps(final), file_hash))
except Exception:
continue
if updates:
cursor.executemany(
"UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
updates,
)
conn.commit()
return True
except Exception as exc:
debug(f"add_url_bulk failed for local file: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete known url from a local file by hash."""
from API.folder import API_folder_store
@@ -1031,6 +1350,119 @@ class Folder(Store):
debug(f"delete_url failed for local file: {exc}")
return False
def delete_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Delete known urls from many local files in one DB session."""
from API.folder import API_folder_store
try:
if not self._location:
return False
try:
from metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore
remove_by_hash: Dict[str, set[str]] = {}
for file_identifier, url_list in (items or []):
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
incoming: List[str]
if normalize_urls is not None:
try:
incoming = normalize_urls(url_list)
except Exception:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
else:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
remove = {u for u in incoming if u}
if not remove:
continue
remove_by_hash.setdefault(file_hash, set()).update(remove)
if not remove_by_hash:
return True
import json
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Ensure metadata rows exist.
for file_hash in remove_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
except Exception:
continue
# Load existing urls for hashes in chunks.
existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in remove_by_hash.keys()}
hashes = list(remove_by_hash.keys())
chunk_size = 400
for i in range(0, len(hashes), chunk_size):
chunk = hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
row_hash = str(row[0]).strip().lower()
except Exception:
continue
raw_urls = None
try:
raw_urls = row[1]
except Exception:
raw_urls = None
parsed_urls: List[str] = []
if raw_urls:
try:
parsed = json.loads(raw_urls)
if normalize_urls is not None:
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
except Exception:
parsed_urls = []
existing_urls_by_hash[row_hash] = parsed_urls
# Apply removals + write updates.
updates: List[tuple[str, str]] = []
for file_hash, remove_set in remove_by_hash.items():
existing_urls = existing_urls_by_hash.get(file_hash) or []
new_urls = [u for u in existing_urls if u not in remove_set]
if new_urls != existing_urls:
try:
updates.append((json.dumps(new_urls), file_hash))
except Exception:
continue
if updates:
cursor.executemany(
"UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
updates,
)
conn.commit()
return True
except Exception as exc:
debug(f"delete_url_bulk failed for local file: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a local file by hash."""
from API.folder import API_folder_store
@@ -1077,6 +1509,94 @@ class Folder(Store):
debug(f"set_note failed for local file: {exc}")
return False
def set_note_bulk(self, items: List[tuple[str, str, str]], **kwargs: Any) -> bool:
"""Set notes for many local files in one DB session.
Preserves existing semantics by only setting notes for hashes that still
map to a file path that exists on disk.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
# Normalize input.
normalized: List[tuple[str, str, str]] = []
for file_identifier, name, text in (items or []):
file_hash = str(file_identifier or "").strip().lower()
note_name = str(name or "").strip()
note_text = str(text or "")
if not file_hash or not _normalize_hash(file_hash) or not note_name:
continue
normalized.append((file_hash, note_name, note_text))
if not normalized:
return True
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Look up file paths for hashes in chunks (to verify existence).
wanted_hashes = sorted({h for (h, _n, _t) in normalized})
hash_to_path: Dict[str, str] = {}
chunk_size = 400
for i in range(0, len(wanted_hashes), chunk_size):
chunk = wanted_hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
h = str(row[0]).strip().lower()
p = str(row[1]).strip()
except Exception:
continue
if h and p:
hash_to_path[h] = p
# Ensure notes rows exist and only write for existing files.
inserts: List[tuple[str, str, str]] = []
for h, note_name, note_text in normalized:
p = hash_to_path.get(h)
if not p:
continue
try:
if not Path(p).exists():
continue
except Exception:
continue
inserts.append((h, note_name, note_text))
if not inserts:
return False
# Prefer upsert when supported, else fall back to INSERT OR REPLACE.
try:
cursor.executemany(
"INSERT INTO notes (hash, name, note) VALUES (?, ?, ?) "
"ON CONFLICT(hash, name) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP",
inserts,
)
except Exception:
cursor.executemany(
"INSERT OR REPLACE INTO notes (hash, name, note) VALUES (?, ?, ?)",
inserts,
)
conn.commit()
return True
except Exception as exc:
debug(f"set_note_bulk failed for local file: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a local file by hash."""
from API.folder import API_folder_store