This commit is contained in:
nose
2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions

View File

@@ -30,6 +30,8 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
return _normalize_hash(file_path.stem)
class Folder(Store):
""""""
# Track which locations have already been migrated to avoid repeated migrations
@@ -359,6 +361,17 @@ class Folder(Store):
else:
shutil.copy2(str(file_path), str(save_file))
debug(f"Local copy: {save_file}", file=sys.stderr)
# Best-effort: capture duration for media
duration_value: float | None = None
try:
from SYS.utils import ffprobe
probe = ffprobe(str(save_file))
duration = probe.get("duration")
if isinstance(duration, (int, float)) and duration > 0:
duration_value = float(duration)
except Exception:
duration_value = None
# Save to database
with API_folder_store(Path(self._location)) as db:
@@ -368,7 +381,8 @@ class Folder(Store):
db.save_metadata(save_file, {
'hash': file_hash,
'ext': ext_clean,
'size': file_path.stat().st_size
'size': file_path.stat().st_size,
'duration': duration_value,
})
# Add tags if provided
@@ -405,6 +419,21 @@ class Folder(Store):
results = []
search_dir = Path(self._location).expanduser()
def _url_like_pattern(value: str) -> str:
# Interpret user patterns as substring matches (with optional glob wildcards).
v = (value or "").strip().lower()
if not v or v == "*":
return "%"
v = v.replace("%", "\\%").replace("_", "\\_")
v = v.replace("*", "%").replace("?", "_")
if "%" not in v and "_" not in v:
return f"%{v}%"
if not v.startswith("%"):
v = "%" + v
if not v.endswith("%"):
v = v + "%"
return v
tokens = [t.strip() for t in query.split(',') if t.strip()]
if not match_all and len(tokens) == 1 and _normalize_hash(query):
@@ -453,6 +482,8 @@ class Folder(Store):
try:
with DatabaseAPI(search_dir) as api:
if tokens and len(tokens) > 1:
url_fetch_limit = (limit or 45) * 50
def _like_pattern(term: str) -> str:
return term.replace('*', '%').replace('?', '_')
@@ -473,6 +504,11 @@ class Folder(Store):
h = api.get_file_hash_by_hash(normalized_hash)
return {h} if h else set()
if namespace == 'url':
if not pattern or pattern == '*':
return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
if namespace == 'store':
if pattern not in {'local', 'file', 'filesystem'}:
return set()
@@ -562,6 +598,29 @@ class Folder(Store):
if limit is not None and len(results) >= limit:
return results
return results
if namespace == "url":
if not pattern or pattern == "*":
rows = api.get_files_with_any_url(limit)
else:
rows = api.get_files_by_url_like(_url_like_pattern(pattern), limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
query_pattern = f"{namespace}:%"
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
@@ -592,126 +651,59 @@ class Folder(Store):
if limit is not None and len(results) >= limit:
return results
elif not match_all:
# Strict tag-based search only (no filename/path searching).
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
fetch_limit = (limit or 45) * 50
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit)
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
word_regex = None
if len(terms) == 1:
term = terms[0]
has_wildcard = '*' in term or '?' in term
if has_wildcard:
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
try:
pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
word_regex = re.compile(pattern, re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes, file_hash in rows:
if not file_path_str or file_path_str in seen_files:
continue
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
# AND semantics across terms: each term must match at least one tag.
hits: dict[str, dict[str, Any]] = {}
for term in terms:
tag_pattern = f"%{term}%"
term_rows = api.get_files_by_namespace_pattern(tag_pattern, fetch_limit)
for file_hash, file_path_str, size_bytes, ext in term_rows:
if not file_path_str:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
entry = hits.get(file_hash)
if entry:
entry["count"] += 1
if size_bytes is not None:
entry["size"] = size_bytes
else:
hits[file_hash] = {
"path": file_path_str,
"size": size_bytes,
"hash": file_hash,
"count": 1,
}
if terms:
title_hits: dict[str, dict[str, Any]] = {}
for term in terms:
title_pattern = f"title:%{term}%"
title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit)
for file_hash, file_path_str, size_bytes, ext in title_rows:
if not file_path_str:
continue
entry = title_hits.get(file_hash)
if entry:
entry["count"] += 1
if size_bytes is not None:
entry["size"] = size_bytes
else:
title_hits[file_hash] = {
"path": file_path_str,
"size": size_bytes,
"hash": file_hash,
"count": 1,
}
if title_hits:
required = len(terms)
for file_hash, info in title_hits.items():
if info.get("count") != required:
continue
file_path_str = info.get("path")
if not file_path_str or file_path_str in seen_files:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
seen_files.add(file_path_str)
size_bytes = info.get("size")
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
results.append(entry)
if limit is not None and len(results) >= limit:
return results
query_pattern = f"%{query_lower}%"
tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit)
for file_hash, file_path_str, size_bytes, ext in tag_rows:
required = len(terms)
seen_files: set[str] = set()
for file_hash, info in hits.items():
if info.get("count") != required:
continue
file_path_str = info.get("path")
if not file_path_str or file_path_str in seen_files:
continue
seen_files.add(file_path_str)
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
if not file_path.exists():
continue
seen_files.add(file_path_str)
size_bytes = info.get("size")
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if limit is not None and len(results) >= limit:
return results
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry_obj = _create_entry(file_path, tags, size_bytes, info.get("hash"))
results.append(entry_obj)
if limit is not None and len(results) >= limit:
break
else:
rows = api.get_all_files(limit)
@@ -726,10 +718,8 @@ class Folder(Store):
entry = _create_entry(file_path, tags, size_bytes, file_hash)
results.append(entry)
if results:
debug(f"Returning {len(results)} results from DB")
else:
debug("No results found in DB")
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
debug(f"[folder:{backend_label}] {len(results)} result(s)")
return results
except Exception as e:
@@ -938,9 +928,11 @@ class Folder(Store):
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
return list(meta.get("url") or [])
urls = normalize_urls(meta.get("url"))
return urls
except Exception as exc:
debug(f"Local DB get_metadata failed: {exc}")
return []
@@ -955,11 +947,13 @@ class Folder(Store):
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = list(meta.get("url") or [])
existing_urls = normalize_urls(meta.get("url"))
incoming_urls = normalize_urls(url)
changed = False
for u in list(url or []):
for u in list(incoming_urls or []):
if not u:
continue
if u not in existing_urls:
@@ -982,10 +976,11 @@ class Folder(Store):
file_hash = file_identifier
if self._location:
try:
from metadata import normalize_urls
with API_folder_store(Path(self._location)) as db:
meta = db.get_metadata(file_hash) or {}
existing_urls = list(meta.get("url") or [])
remove_set = {u for u in (url or []) if u}
existing_urls = normalize_urls(meta.get("url"))
remove_set = {u for u in normalize_urls(url) if u}
if not remove_set:
return False
new_urls = [u for u in existing_urls if u not in remove_set]