f
This commit is contained in:
104
API/folder.py
104
API/folder.py
@@ -217,6 +217,7 @@ class API_folder_store:
|
|||||||
self.connection: Optional[sqlite3.Connection] = None
|
self.connection: Optional[sqlite3.Connection] = None
|
||||||
# Use the shared lock
|
# Use the shared lock
|
||||||
self._db_lock = self._shared_db_lock
|
self._db_lock = self._shared_db_lock
|
||||||
|
mm_debug(f"[folder-db] init: root={self.library_root} db={self.db_path}")
|
||||||
self._init_db()
|
self._init_db()
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
@@ -284,6 +285,7 @@ class API_folder_store:
|
|||||||
"""Initialize database connection and create tables if needed."""
|
"""Initialize database connection and create tables if needed."""
|
||||||
with self._with_db_lock():
|
with self._with_db_lock():
|
||||||
try:
|
try:
|
||||||
|
mm_debug(f"[folder-db] opening sqlite db: {self.db_path}")
|
||||||
# Ensure the library root exists; sqlite cannot create parent dirs.
|
# Ensure the library root exists; sqlite cannot create parent dirs.
|
||||||
try:
|
try:
|
||||||
# User safety: Folder store must be created in a blank folder/no files in it.
|
# User safety: Folder store must be created in a blank folder/no files in it.
|
||||||
@@ -326,6 +328,7 @@ class API_folder_store:
|
|||||||
timeout=20.0
|
timeout=20.0
|
||||||
)
|
)
|
||||||
self.connection.row_factory = sqlite3.Row
|
self.connection.row_factory = sqlite3.Row
|
||||||
|
mm_debug(f"[folder-db] sqlite connection opened: {self.db_path}")
|
||||||
|
|
||||||
# Ensure busy_timeout is set immediately for all subsequent ops (including pragmas)
|
# Ensure busy_timeout is set immediately for all subsequent ops (including pragmas)
|
||||||
try:
|
try:
|
||||||
@@ -337,7 +340,14 @@ class API_folder_store:
|
|||||||
# 1. WAL mode for better concurrency and fewer locks
|
# 1. WAL mode for better concurrency and fewer locks
|
||||||
self.connection.execute("PRAGMA journal_mode=WAL")
|
self.connection.execute("PRAGMA journal_mode=WAL")
|
||||||
# 2. auto_vacuum=FULL to automatically reclaim space from deleted rows/logs
|
# 2. auto_vacuum=FULL to automatically reclaim space from deleted rows/logs
|
||||||
self.connection.execute("PRAGMA auto_vacuum = FULL")
|
try:
|
||||||
|
self.connection.execute("PRAGMA auto_vacuum = FULL")
|
||||||
|
except sqlite3.OperationalError as exc:
|
||||||
|
if "locked" not in str(exc).lower():
|
||||||
|
raise
|
||||||
|
logger.warning(
|
||||||
|
"Database locked; skipping PRAGMA auto_vacuum setup for this session."
|
||||||
|
)
|
||||||
# 3. Increase page size for modern file systems
|
# 3. Increase page size for modern file systems
|
||||||
self.connection.execute("PRAGMA page_size = 4096")
|
self.connection.execute("PRAGMA page_size = 4096")
|
||||||
# 4. Memory and Sync optimizations
|
# 4. Memory and Sync optimizations
|
||||||
@@ -2657,12 +2667,30 @@ class DatabaseAPI:
|
|||||||
def __init__(self, search_dir: Path):
|
def __init__(self, search_dir: Path):
|
||||||
self.search_dir = expand_path(search_dir).resolve()
|
self.search_dir = expand_path(search_dir).resolve()
|
||||||
self.db = API_folder_store(self.search_dir)
|
self.db = API_folder_store(self.search_dir)
|
||||||
|
try:
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] DatabaseAPI init: root={self.search_dir} db={self.db.db_path}"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
|
try:
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] DatabaseAPI enter: root={self.search_dir} db={self.db.db_path}"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self.db.__enter__()
|
self.db.__enter__()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
|
try:
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] DatabaseAPI exit: root={self.search_dir} db={self.db.db_path}"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return self.db.__exit__(*args)
|
return self.db.__exit__(*args)
|
||||||
|
|
||||||
def get_cursor(self):
|
def get_cursor(self):
|
||||||
@@ -2730,6 +2758,9 @@ class DatabaseAPI:
|
|||||||
|
|
||||||
def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
|
def get_file_hashes_with_any_url(self, limit: Optional[int] = None) -> Set[str]:
|
||||||
"""Get hashes of files that have any non-empty URL metadata."""
|
"""Get hashes of files that have any non-empty URL metadata."""
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_hashes_with_any_url start: limit={limit or 10000}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
@@ -2744,8 +2775,11 @@ class DatabaseAPI:
|
|||||||
(limit or 10000,
|
(limit or 10000,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
return {row[0]
|
rows = cursor.fetchall()
|
||||||
for row in cursor.fetchall()}
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_hashes_with_any_url done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return {row[0] for row in rows}
|
||||||
|
|
||||||
def get_file_hashes_by_url_like(
|
def get_file_hashes_by_url_like(
|
||||||
self,
|
self,
|
||||||
@@ -2753,6 +2787,9 @@ class DatabaseAPI:
|
|||||||
limit: Optional[int] = None
|
limit: Optional[int] = None
|
||||||
) -> Set[str]:
|
) -> Set[str]:
|
||||||
"""Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
|
"""Get hashes of files whose URL metadata contains a substring (case-insensitive)."""
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_hashes_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
@@ -2766,8 +2803,11 @@ class DatabaseAPI:
|
|||||||
(like_pattern.lower(),
|
(like_pattern.lower(),
|
||||||
limit or 10000),
|
limit or 10000),
|
||||||
)
|
)
|
||||||
return {row[0]
|
rows = cursor.fetchall()
|
||||||
for row in cursor.fetchall()}
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_hashes_by_url_like done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return {row[0] for row in rows}
|
||||||
|
|
||||||
def get_file_hashes_by_ext(self,
|
def get_file_hashes_by_ext(self,
|
||||||
ext_value: str,
|
ext_value: str,
|
||||||
@@ -2847,14 +2887,18 @@ class DatabaseAPI:
|
|||||||
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
|
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
|
||||||
"""Get files that have any non-empty URL metadata.
|
"""Get files that have any non-empty URL metadata.
|
||||||
|
|
||||||
Returns (hash, file_path, size, ext) tuples.
|
Returns (hash, file_path, size, ext, url) tuples.
|
||||||
"""
|
"""
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_files_with_any_url start: limit={limit or 10000}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
SELECT f.hash, f.file_path,
|
SELECT f.hash, f.file_path,
|
||||||
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
||||||
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
|
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
|
||||||
|
COALESCE(m.url, '') as url
|
||||||
FROM file f
|
FROM file f
|
||||||
JOIN metadata m ON f.hash = m.hash
|
JOIN metadata m ON f.hash = m.hash
|
||||||
WHERE m.url IS NOT NULL
|
WHERE m.url IS NOT NULL
|
||||||
@@ -2866,21 +2910,29 @@ class DatabaseAPI:
|
|||||||
(limit or 10000,
|
(limit or 10000,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
return cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_files_with_any_url done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return rows
|
||||||
|
|
||||||
def get_files_by_url_like(self,
|
def get_files_by_url_like(self,
|
||||||
like_pattern: str,
|
like_pattern: str,
|
||||||
limit: Optional[int] = None) -> List[tuple]:
|
limit: Optional[int] = None) -> List[tuple]:
|
||||||
"""Get files whose URL metadata contains a substring (case-insensitive).
|
"""Get files whose URL metadata contains a substring (case-insensitive).
|
||||||
|
|
||||||
Returns (hash, file_path, size, ext) tuples.
|
Returns (hash, file_path, size, ext, url) tuples.
|
||||||
"""
|
"""
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_files_by_url_like start: pattern={like_pattern} limit={limit or 10000}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
SELECT f.hash, f.file_path,
|
SELECT f.hash, f.file_path,
|
||||||
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
|
||||||
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
|
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext,
|
||||||
|
COALESCE(m.url, '') as url
|
||||||
FROM file f
|
FROM file f
|
||||||
JOIN metadata m ON f.hash = m.hash
|
JOIN metadata m ON f.hash = m.hash
|
||||||
WHERE m.url IS NOT NULL
|
WHERE m.url IS NOT NULL
|
||||||
@@ -2891,7 +2943,11 @@ class DatabaseAPI:
|
|||||||
(like_pattern.lower(),
|
(like_pattern.lower(),
|
||||||
limit or 10000),
|
limit or 10000),
|
||||||
)
|
)
|
||||||
return cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_files_by_url_like done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return rows
|
||||||
|
|
||||||
def get_file_metadata(self,
|
def get_file_metadata(self,
|
||||||
file_hashes: Set[str],
|
file_hashes: Set[str],
|
||||||
@@ -2899,6 +2955,9 @@ class DatabaseAPI:
|
|||||||
"""Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
|
"""Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples."""
|
||||||
if not file_hashes:
|
if not file_hashes:
|
||||||
return []
|
return []
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_metadata start: hashes={len(file_hashes)} limit={limit or len(file_hashes)}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
placeholders = ",".join(["?"] * len(file_hashes))
|
placeholders = ",".join(["?"] * len(file_hashes))
|
||||||
fetch_sql = f"""
|
fetch_sql = f"""
|
||||||
@@ -2911,7 +2970,11 @@ class DatabaseAPI:
|
|||||||
LIMIT ?
|
LIMIT ?
|
||||||
"""
|
"""
|
||||||
cursor.execute(fetch_sql, (*file_hashes, limit or len(file_hashes)))
|
cursor.execute(fetch_sql, (*file_hashes, limit or len(file_hashes)))
|
||||||
return cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_file_metadata done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return rows
|
||||||
|
|
||||||
def get_all_files(self, limit: Optional[int] = None) -> List[tuple]:
|
def get_all_files(self, limit: Optional[int] = None) -> List[tuple]:
|
||||||
"""Get all files in database. Returns (hash, file_path, size, ext) tuples."""
|
"""Get all files in database. Returns (hash, file_path, size, ext) tuples."""
|
||||||
@@ -2932,11 +2995,18 @@ class DatabaseAPI:
|
|||||||
|
|
||||||
def get_tags_for_file(self, file_hash: str) -> List[str]:
|
def get_tags_for_file(self, file_hash: str) -> List[str]:
|
||||||
"""Get all tags for a file given its hash."""
|
"""Get all tags for a file given its hash."""
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_tags_for_file start: hash={file_hash}"
|
||||||
|
)
|
||||||
cursor = self.get_cursor()
|
cursor = self.get_cursor()
|
||||||
cursor.execute("SELECT tag FROM tag WHERE hash = ?",
|
cursor.execute("SELECT tag FROM tag WHERE hash = ?",
|
||||||
(file_hash,
|
(file_hash,
|
||||||
))
|
))
|
||||||
return [row[0] for row in cursor.fetchall()]
|
rows = cursor.fetchall()
|
||||||
|
mm_debug(
|
||||||
|
f"[folder-db] get_tags_for_file done: {len(rows)} row(s)"
|
||||||
|
)
|
||||||
|
return [row[0] for row in rows]
|
||||||
|
|
||||||
def get_tags_by_namespace_and_file(self,
|
def get_tags_by_namespace_and_file(self,
|
||||||
file_hash: str,
|
file_hash: str,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from fnmatch import translate
|
from fnmatch import fnmatch, translate
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
@@ -30,6 +30,28 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]
|
|||||||
return _normalize_hash(file_path.stem)
|
return _normalize_hash(file_path.stem)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_url_for_search(url: str) -> str:
|
||||||
|
value = str(url or "").strip()
|
||||||
|
value = re.sub(r"^[a-z][a-z0-9+.-]*://", "", value, flags=re.IGNORECASE)
|
||||||
|
value = re.sub(r"^www\.", "", value, flags=re.IGNORECASE)
|
||||||
|
return value.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _match_url_pattern(url: str, pattern: str) -> bool:
|
||||||
|
normalized_url = _normalize_url_for_search(url)
|
||||||
|
normalized_pattern = _normalize_url_for_search(pattern)
|
||||||
|
if not normalized_pattern:
|
||||||
|
return False
|
||||||
|
has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
|
||||||
|
if has_wildcards:
|
||||||
|
return fnmatch(normalized_url, normalized_pattern)
|
||||||
|
normalized_url_no_slash = normalized_url.rstrip("/")
|
||||||
|
normalized_pattern_no_slash = normalized_pattern.rstrip("/")
|
||||||
|
if normalized_pattern_no_slash and normalized_pattern_no_slash == normalized_url_no_slash:
|
||||||
|
return True
|
||||||
|
return normalized_pattern in normalized_url
|
||||||
|
|
||||||
|
|
||||||
class Folder(Store):
|
class Folder(Store):
|
||||||
""""""
|
""""""
|
||||||
|
|
||||||
@@ -690,6 +712,12 @@ class Folder(Store):
|
|||||||
match_all = query == "*" or (not query and bool(ext_filter))
|
match_all = query == "*" or (not query and bool(ext_filter))
|
||||||
results = []
|
results = []
|
||||||
search_dir = expand_path(self._location)
|
search_dir = expand_path(self._location)
|
||||||
|
backend_label = str(
|
||||||
|
getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder"
|
||||||
|
)
|
||||||
|
debug(
|
||||||
|
f"[folder:{backend_label}] search start: query={query} limit={limit} root={search_dir}"
|
||||||
|
)
|
||||||
|
|
||||||
def _url_like_pattern(value: str) -> str:
|
def _url_like_pattern(value: str) -> str:
|
||||||
# Interpret user patterns as substring matches (with optional glob wildcards).
|
# Interpret user patterns as substring matches (with optional glob wildcards).
|
||||||
@@ -1002,7 +1030,7 @@ class Folder(Store):
|
|||||||
namespace, pattern = query.split(":", 1)
|
namespace, pattern = query.split(":", 1)
|
||||||
namespace = namespace.strip().lower()
|
namespace = namespace.strip().lower()
|
||||||
pattern = pattern.strip().lower()
|
pattern = pattern.strip().lower()
|
||||||
debug(f"Performing namespace search: {namespace}:{pattern}")
|
debug(f"[folder:{backend_label}] namespace search: {namespace}:{pattern}")
|
||||||
|
|
||||||
if namespace == "hash":
|
if namespace == "hash":
|
||||||
normalized_hash = _normalize_hash(pattern)
|
normalized_hash = _normalize_hash(pattern)
|
||||||
@@ -1041,14 +1069,50 @@ class Folder(Store):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
if namespace == "url":
|
if namespace == "url":
|
||||||
|
pattern_hint = kwargs.get("pattern_hint")
|
||||||
|
|
||||||
|
def _parse_url_value(raw: Any) -> list[str]:
|
||||||
|
if raw is None:
|
||||||
|
return []
|
||||||
|
if isinstance(raw, list):
|
||||||
|
return [str(u).strip() for u in raw if str(u).strip()]
|
||||||
|
if isinstance(raw, str):
|
||||||
|
text = raw.strip()
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
parsed = json.loads(text)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
return [
|
||||||
|
str(u).strip()
|
||||||
|
for u in parsed
|
||||||
|
if str(u).strip()
|
||||||
|
]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return [text]
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _matches_pattern(url_list: list[str]) -> bool:
|
||||||
|
if not pattern_hint:
|
||||||
|
return True
|
||||||
|
for candidate_url in url_list:
|
||||||
|
if _match_url_pattern(candidate_url, pattern_hint):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
if not pattern or pattern == "*":
|
if not pattern or pattern == "*":
|
||||||
|
debug(f"[folder:{backend_label}] url search: any-url (limit={limit})")
|
||||||
rows = api.get_files_with_any_url(limit)
|
rows = api.get_files_with_any_url(limit)
|
||||||
else:
|
else:
|
||||||
|
debug(
|
||||||
|
f"[folder:{backend_label}] url search: like={pattern} (limit={limit})"
|
||||||
|
)
|
||||||
rows = api.get_files_by_url_like(
|
rows = api.get_files_by_url_like(
|
||||||
_url_like_pattern(pattern),
|
_url_like_pattern(pattern),
|
||||||
limit
|
limit
|
||||||
)
|
)
|
||||||
for file_hash, file_path_str, size_bytes, ext in rows:
|
for file_hash, file_path_str, size_bytes, ext, url_raw in rows:
|
||||||
if not file_path_str:
|
if not file_path_str:
|
||||||
continue
|
continue
|
||||||
file_path = search_dir / str(file_path_str)
|
file_path = search_dir / str(file_path_str)
|
||||||
@@ -1059,6 +1123,9 @@ class Folder(Store):
|
|||||||
size_bytes = file_path.stat().st_size
|
size_bytes = file_path.stat().st_size
|
||||||
except OSError:
|
except OSError:
|
||||||
size_bytes = None
|
size_bytes = None
|
||||||
|
urls = _parse_url_value(url_raw)
|
||||||
|
if not urls or not _matches_pattern(urls):
|
||||||
|
continue
|
||||||
tags = api.get_tags_for_file(file_hash)
|
tags = api.get_tags_for_file(file_hash)
|
||||||
entry = _create_entry(
|
entry = _create_entry(
|
||||||
file_path,
|
file_path,
|
||||||
@@ -1066,6 +1133,7 @@ class Folder(Store):
|
|||||||
size_bytes,
|
size_bytes,
|
||||||
file_hash
|
file_hash
|
||||||
)
|
)
|
||||||
|
entry["urls"] = urls
|
||||||
results.append(entry)
|
results.append(entry)
|
||||||
if limit is not None and len(results) >= limit:
|
if limit is not None and len(results) >= limit:
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -466,7 +466,9 @@ class HydrusNetwork(Store):
|
|||||||
def _extract_urls(meta_obj: Any) -> list[str]:
|
def _extract_urls(meta_obj: Any) -> list[str]:
|
||||||
if not isinstance(meta_obj, dict):
|
if not isinstance(meta_obj, dict):
|
||||||
return []
|
return []
|
||||||
raw = meta_obj.get("url")
|
raw = meta_obj.get("known_urls")
|
||||||
|
if raw is None:
|
||||||
|
raw = meta_obj.get("url")
|
||||||
if raw is None:
|
if raw is None:
|
||||||
raw = meta_obj.get("urls")
|
raw = meta_obj.get("urls")
|
||||||
if isinstance(raw, str):
|
if isinstance(raw, str):
|
||||||
@@ -483,100 +485,178 @@ class HydrusNetwork(Store):
|
|||||||
return out
|
return out
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return [], []
|
||||||
|
raw_ids = payload.get("file_ids", [])
|
||||||
|
raw_hashes = payload.get("hashes", [])
|
||||||
|
ids_out: list[int] = []
|
||||||
|
hashes_out: list[str] = []
|
||||||
|
if isinstance(raw_ids, list):
|
||||||
|
for item in raw_ids:
|
||||||
|
try:
|
||||||
|
if isinstance(item, (int, float)):
|
||||||
|
ids_out.append(int(item))
|
||||||
|
continue
|
||||||
|
if isinstance(item, str) and item.strip().isdigit():
|
||||||
|
ids_out.append(int(item.strip()))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if isinstance(raw_hashes, list):
|
||||||
|
for item in raw_hashes:
|
||||||
|
try:
|
||||||
|
candidate = str(item or "").strip().lower()
|
||||||
|
if candidate:
|
||||||
|
hashes_out.append(candidate)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return ids_out, hashes_out
|
||||||
|
|
||||||
def _iter_url_filtered_metadata(
|
def _iter_url_filtered_metadata(
|
||||||
url_value: str | None,
|
url_value: str | None,
|
||||||
want_any: bool,
|
want_any: bool,
|
||||||
fetch_limit: int
|
fetch_limit: int,
|
||||||
) -> list[dict[str,
|
scan_limit: int | None = None
|
||||||
Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
||||||
|
|
||||||
# First try a fast system predicate if Hydrus supports it.
|
|
||||||
candidate_file_ids: list[int] = []
|
candidate_file_ids: list[int] = []
|
||||||
try:
|
candidate_hashes: list[str] = []
|
||||||
if want_any:
|
seen_file_ids: set[int] = set()
|
||||||
|
seen_hashes: set[str] = set()
|
||||||
|
|
||||||
|
def _add_candidates(ids: list[int], hashes: list[str]) -> None:
|
||||||
|
for fid in ids:
|
||||||
|
if fid in seen_file_ids:
|
||||||
|
continue
|
||||||
|
seen_file_ids.add(fid)
|
||||||
|
candidate_file_ids.append(fid)
|
||||||
|
for hh in hashes:
|
||||||
|
if hh in seen_hashes:
|
||||||
|
continue
|
||||||
|
seen_hashes.add(hh)
|
||||||
|
candidate_hashes.append(hh)
|
||||||
|
|
||||||
|
predicate_supported = getattr(self, "_has_url_predicate", None)
|
||||||
|
if predicate_supported is not False:
|
||||||
|
try:
|
||||||
predicate = "system:has url"
|
predicate = "system:has url"
|
||||||
url_search = client.search_files(
|
url_search = client.search_files(
|
||||||
tags=[predicate],
|
tags=[predicate],
|
||||||
return_hashes=False,
|
return_hashes=True,
|
||||||
return_file_ids=True,
|
return_file_ids=False,
|
||||||
return_file_count=False,
|
return_file_count=False,
|
||||||
)
|
)
|
||||||
ids = url_search.get("file_ids",
|
ids, hashes = _extract_search_ids(url_search)
|
||||||
[]) if isinstance(url_search,
|
_add_candidates(ids, hashes)
|
||||||
dict) else []
|
self._has_url_predicate = True
|
||||||
if isinstance(ids, list):
|
except Exception as exc:
|
||||||
candidate_file_ids = [
|
try:
|
||||||
int(x) for x in ids
|
from API.HydrusNetwork import HydrusRequestError
|
||||||
if isinstance(x, (int, float,
|
|
||||||
str)) and str(x).strip().isdigit()
|
|
||||||
]
|
|
||||||
except Exception:
|
|
||||||
candidate_file_ids = []
|
|
||||||
|
|
||||||
if not candidate_file_ids:
|
if isinstance(exc, HydrusRequestError) and getattr(exc, "status", None) == 400:
|
||||||
# Fallback: scan from system:everything and filter by URL substring.
|
self._has_url_predicate = False
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not candidate_file_ids and not candidate_hashes:
|
||||||
everything = client.search_files(
|
everything = client.search_files(
|
||||||
tags=["system:everything"],
|
tags=["system:everything"],
|
||||||
return_hashes=False,
|
return_hashes=True,
|
||||||
return_file_ids=True,
|
return_file_ids=False,
|
||||||
return_file_count=False,
|
return_file_count=False,
|
||||||
)
|
)
|
||||||
ids = everything.get("file_ids",
|
ids, hashes = _extract_search_ids(everything)
|
||||||
[]) if isinstance(everything,
|
_add_candidates(ids, hashes)
|
||||||
dict) else []
|
|
||||||
if isinstance(ids, list):
|
|
||||||
candidate_file_ids = [
|
|
||||||
int(x) for x in ids if isinstance(x, (int, float))
|
|
||||||
]
|
|
||||||
|
|
||||||
if not candidate_file_ids:
|
if not candidate_file_ids and not candidate_hashes:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
needle = (url_value or "").strip().lower()
|
needle = (url_value or "").strip().lower()
|
||||||
chunk_size = 200
|
chunk_size = 200
|
||||||
out: list[dict[str, Any]] = []
|
out: list[dict[str, Any]] = []
|
||||||
|
if scan_limit is None:
|
||||||
|
try:
|
||||||
|
if not want_any and url_value:
|
||||||
|
scan_limit = max(200, min(int(fetch_limit), 400))
|
||||||
|
else:
|
||||||
|
scan_limit = max(int(fetch_limit) * 5, 1000)
|
||||||
|
except Exception:
|
||||||
|
scan_limit = 400 if (not want_any and url_value) else 1000
|
||||||
|
if scan_limit is not None:
|
||||||
|
scan_limit = min(int(scan_limit), 10000)
|
||||||
|
scanned = 0
|
||||||
|
|
||||||
for start in range(0, len(candidate_file_ids), chunk_size):
|
def _process_source(items: list[Any], kind: str) -> None:
|
||||||
|
nonlocal scanned
|
||||||
|
for start in range(0, len(items), chunk_size):
|
||||||
|
if len(out) >= fetch_limit:
|
||||||
|
return
|
||||||
|
if scan_limit is not None and scanned >= scan_limit:
|
||||||
|
return
|
||||||
|
chunk = items[start:start + chunk_size]
|
||||||
|
if scan_limit is not None:
|
||||||
|
remaining = scan_limit - scanned
|
||||||
|
if remaining <= 0:
|
||||||
|
return
|
||||||
|
if len(chunk) > remaining:
|
||||||
|
chunk = chunk[:remaining]
|
||||||
|
scanned += len(chunk)
|
||||||
|
try:
|
||||||
|
if kind == "hashes":
|
||||||
|
payload = client.fetch_file_metadata(
|
||||||
|
hashes=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=True,
|
||||||
|
include_duration=True,
|
||||||
|
include_size=True,
|
||||||
|
include_mime=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
payload = client.fetch_file_metadata(
|
||||||
|
file_ids=chunk,
|
||||||
|
include_file_url=True,
|
||||||
|
include_service_keys_to_tags=True,
|
||||||
|
include_duration=True,
|
||||||
|
include_size=True,
|
||||||
|
include_mime=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
metas = payload.get("metadata",
|
||||||
|
[]) if isinstance(payload,
|
||||||
|
dict) else []
|
||||||
|
if not isinstance(metas, list):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for meta in metas:
|
||||||
|
if len(out) >= fetch_limit:
|
||||||
|
break
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
continue
|
||||||
|
urls = _extract_urls(meta)
|
||||||
|
if not urls:
|
||||||
|
continue
|
||||||
|
if want_any:
|
||||||
|
out.append(meta)
|
||||||
|
continue
|
||||||
|
if not needle:
|
||||||
|
continue
|
||||||
|
if any(needle in u.lower() for u in urls):
|
||||||
|
out.append(meta)
|
||||||
|
continue
|
||||||
|
|
||||||
|
sources: list[tuple[str, list[Any]]] = []
|
||||||
|
if candidate_hashes:
|
||||||
|
sources.append(("hashes", candidate_hashes))
|
||||||
|
elif candidate_file_ids:
|
||||||
|
sources.append(("file_ids", candidate_file_ids))
|
||||||
|
|
||||||
|
for kind, items in sources:
|
||||||
if len(out) >= fetch_limit:
|
if len(out) >= fetch_limit:
|
||||||
break
|
break
|
||||||
chunk = candidate_file_ids[start:start + chunk_size]
|
_process_source(items, kind)
|
||||||
try:
|
|
||||||
payload = client.fetch_file_metadata(
|
|
||||||
file_ids=chunk,
|
|
||||||
include_file_url=True,
|
|
||||||
include_service_keys_to_tags=True,
|
|
||||||
include_duration=True,
|
|
||||||
include_size=True,
|
|
||||||
include_mime=True,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
|
|
||||||
metas = payload.get("metadata",
|
|
||||||
[]) if isinstance(payload,
|
|
||||||
dict) else []
|
|
||||||
if not isinstance(metas, list):
|
|
||||||
continue
|
|
||||||
|
|
||||||
for meta in metas:
|
|
||||||
if not isinstance(meta, dict):
|
|
||||||
continue
|
|
||||||
urls = _extract_urls(meta)
|
|
||||||
if not urls:
|
|
||||||
continue
|
|
||||||
if want_any:
|
|
||||||
out.append(meta)
|
|
||||||
if len(out) >= fetch_limit:
|
|
||||||
break
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not needle:
|
|
||||||
continue
|
|
||||||
if any(needle in u.lower() for u in urls):
|
|
||||||
out.append(meta)
|
|
||||||
if len(out) >= fetch_limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@@ -618,6 +698,7 @@ class HydrusNetwork(Store):
|
|||||||
|
|
||||||
# Special case: url:* and url:<value>
|
# Special case: url:* and url:<value>
|
||||||
metadata_list: list[dict[str, Any]] | None = None
|
metadata_list: list[dict[str, Any]] | None = None
|
||||||
|
pattern_hint = str(kwargs.get("pattern_hint") or "").strip().lower()
|
||||||
if ":" in query_lower and not query_lower.startswith(":"):
|
if ":" in query_lower and not query_lower.startswith(":"):
|
||||||
namespace, pattern = query_lower.split(":", 1)
|
namespace, pattern = query_lower.split(":", 1)
|
||||||
namespace = namespace.strip().lower()
|
namespace = namespace.strip().lower()
|
||||||
@@ -630,6 +711,12 @@ class HydrusNetwork(Store):
|
|||||||
fetch_limit=int(limit) if limit else 100
|
fetch_limit=int(limit) if limit else 100
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
def _clean_url_search_token(value: str | None) -> str:
|
||||||
|
token = str(value or "").strip().lower()
|
||||||
|
if not token:
|
||||||
|
return ""
|
||||||
|
return token.replace("*", "").replace("?", "")
|
||||||
|
|
||||||
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
||||||
try:
|
try:
|
||||||
if pattern.startswith("http://") or pattern.startswith(
|
if pattern.startswith("http://") or pattern.startswith(
|
||||||
@@ -706,10 +793,20 @@ class HydrusNetwork(Store):
|
|||||||
|
|
||||||
# Fallback: substring scan
|
# Fallback: substring scan
|
||||||
if metadata_list is None:
|
if metadata_list is None:
|
||||||
|
search_token = _clean_url_search_token(pattern_hint or pattern)
|
||||||
|
scan_limit_override: int | None = None
|
||||||
|
if search_token:
|
||||||
|
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
||||||
|
if is_domain_only:
|
||||||
|
try:
|
||||||
|
scan_limit_override = max(int(limit or 100) * 20, 2000)
|
||||||
|
except Exception:
|
||||||
|
scan_limit_override = 2000
|
||||||
metadata_list = _iter_url_filtered_metadata(
|
metadata_list = _iter_url_filtered_metadata(
|
||||||
pattern,
|
search_token,
|
||||||
want_any=False,
|
want_any=False,
|
||||||
fetch_limit=int(limit) if limit else 100
|
fetch_limit=int(limit) if limit else 100,
|
||||||
|
scan_limit=scan_limit_override,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse the query into tags
|
# Parse the query into tags
|
||||||
@@ -742,26 +839,6 @@ class HydrusNetwork(Store):
|
|||||||
# Search files with the tags (unless url: search already produced metadata)
|
# Search files with the tags (unless url: search already produced metadata)
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
|
|
||||||
if not isinstance(payload, dict):
|
|
||||||
return [], []
|
|
||||||
raw_ids = payload.get("file_ids", [])
|
|
||||||
raw_hashes = payload.get("hashes", [])
|
|
||||||
ids_out: list[int] = []
|
|
||||||
hashes_out: list[str] = []
|
|
||||||
if isinstance(raw_ids, list):
|
|
||||||
for item in raw_ids:
|
|
||||||
try:
|
|
||||||
ids_out.append(int(item))
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
continue
|
|
||||||
if isinstance(raw_hashes, list):
|
|
||||||
hashes_out = [
|
|
||||||
str(h).strip() for h in raw_hashes
|
|
||||||
if isinstance(h, str) and str(h).strip()
|
|
||||||
]
|
|
||||||
return ids_out, hashes_out
|
|
||||||
|
|
||||||
if metadata_list is None:
|
if metadata_list is None:
|
||||||
file_ids: list[int] = []
|
file_ids: list[int] = []
|
||||||
hashes: list[str] = []
|
hashes: list[str] = []
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from queue import SimpleQueue
|
||||||
|
from threading import Thread
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
|
from typing import Any, Dict, List, Sequence, Optional, Set, Tuple
|
||||||
import sys
|
import sys
|
||||||
@@ -34,6 +36,8 @@ class UrlItem:
|
|||||||
class Get_Url(Cmdlet):
|
class Get_Url(Cmdlet):
|
||||||
"""Get url associated with files via hash+store, or search urls by pattern."""
|
"""Get url associated with files via hash+store, or search urls by pattern."""
|
||||||
|
|
||||||
|
STORE_SEARCH_TIMEOUT_SECONDS = 6.0
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__(
|
super().__init__(
|
||||||
name="get-url",
|
name="get-url",
|
||||||
@@ -81,8 +85,56 @@ class Get_Url(Cmdlet):
|
|||||||
normalized_url = Get_Url._normalize_url_for_search(url)
|
normalized_url = Get_Url._normalize_url_for_search(url)
|
||||||
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
|
normalized_pattern = Get_Url._normalize_url_for_search(pattern)
|
||||||
|
|
||||||
# Use fnmatch for wildcard matching (* and ?)
|
has_wildcards = any(ch in normalized_pattern for ch in ("*", "?"))
|
||||||
return fnmatch(normalized_url, normalized_pattern)
|
if has_wildcards:
|
||||||
|
return fnmatch(normalized_url, normalized_pattern)
|
||||||
|
|
||||||
|
normalized_url_no_slash = normalized_url.rstrip("/")
|
||||||
|
normalized_pattern_no_slash = normalized_pattern.rstrip("/")
|
||||||
|
if normalized_pattern_no_slash and normalized_pattern_no_slash == normalized_url_no_slash:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return normalized_pattern in normalized_url
|
||||||
|
|
||||||
|
def _execute_search_with_timeout(
|
||||||
|
self,
|
||||||
|
backend: Any,
|
||||||
|
query: str,
|
||||||
|
limit: int,
|
||||||
|
store_name: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Optional[List[Any]]:
|
||||||
|
queue: SimpleQueue[tuple[str, Any]] = SimpleQueue()
|
||||||
|
|
||||||
|
def _worker() -> None:
|
||||||
|
try:
|
||||||
|
queue.put(("ok", backend.search(query, limit=limit, **kwargs)))
|
||||||
|
except Exception as exc:
|
||||||
|
queue.put(("err", exc))
|
||||||
|
|
||||||
|
worker = Thread(target=_worker, daemon=True)
|
||||||
|
worker.start()
|
||||||
|
worker.join(timeout=self.STORE_SEARCH_TIMEOUT_SECONDS)
|
||||||
|
|
||||||
|
if worker.is_alive():
|
||||||
|
debug(
|
||||||
|
f"Store '{store_name}' search timed out after {self.STORE_SEARCH_TIMEOUT_SECONDS}s",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if queue.empty():
|
||||||
|
return []
|
||||||
|
|
||||||
|
status, payload = queue.get()
|
||||||
|
if status == "err":
|
||||||
|
debug(
|
||||||
|
f"Store '{store_name}' search failed: {payload}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
return payload or []
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_first_url(value: Any) -> Optional[str]:
|
def _extract_first_url(value: Any) -> Optional[str]:
|
||||||
@@ -95,6 +147,35 @@ class Get_Url(Cmdlet):
|
|||||||
return item.strip()
|
return item.strip()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls_from_hit(hit: Any) -> List[str]:
|
||||||
|
"""Extract candidate URLs directly from a search hit, if present."""
|
||||||
|
raw = None
|
||||||
|
try:
|
||||||
|
raw = get_field(hit, "known_urls")
|
||||||
|
if not raw:
|
||||||
|
raw = get_field(hit, "urls")
|
||||||
|
if not raw:
|
||||||
|
raw = get_field(hit, "url")
|
||||||
|
if not raw:
|
||||||
|
raw = get_field(hit, "source_url") or get_field(hit, "source_urls")
|
||||||
|
except Exception:
|
||||||
|
raw = None
|
||||||
|
|
||||||
|
if isinstance(raw, str):
|
||||||
|
val = raw.strip()
|
||||||
|
return [val] if val else []
|
||||||
|
if isinstance(raw, (list, tuple)):
|
||||||
|
out: list[str] = []
|
||||||
|
for item in raw:
|
||||||
|
if not isinstance(item, str):
|
||||||
|
continue
|
||||||
|
v = item.strip()
|
||||||
|
if v:
|
||||||
|
out.append(v)
|
||||||
|
return out
|
||||||
|
return []
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_title_from_result(result: Any) -> Optional[str]:
|
def _extract_title_from_result(result: Any) -> Optional[str]:
|
||||||
# Prefer explicit title field.
|
# Prefer explicit title field.
|
||||||
@@ -219,6 +300,7 @@ class Get_Url(Cmdlet):
|
|||||||
"""
|
"""
|
||||||
items: List[UrlItem] = []
|
items: List[UrlItem] = []
|
||||||
found_stores: Set[str] = set()
|
found_stores: Set[str] = set()
|
||||||
|
MAX_RESULTS = 256
|
||||||
|
|
||||||
try:
|
try:
|
||||||
storage = Store(config)
|
storage = Store(config)
|
||||||
@@ -230,6 +312,8 @@ class Get_Url(Cmdlet):
|
|||||||
return items, list(found_stores)
|
return items, list(found_stores)
|
||||||
|
|
||||||
for store_name in store_names:
|
for store_name in store_names:
|
||||||
|
if len(items) >= MAX_RESULTS:
|
||||||
|
break
|
||||||
try:
|
try:
|
||||||
backend = storage[store_name]
|
backend = storage[store_name]
|
||||||
|
|
||||||
@@ -243,9 +327,12 @@ class Get_Url(Cmdlet):
|
|||||||
has_wildcards = any(ch in raw_pattern for ch in ("*", "?"))
|
has_wildcards = any(ch in raw_pattern for ch in ("*", "?"))
|
||||||
|
|
||||||
# If this is a Hydrus backend and the pattern is a single URL,
|
# If this is a Hydrus backend and the pattern is a single URL,
|
||||||
# normalize it through the official API.
|
# normalize it through the official API. Skip for bare domains.
|
||||||
normalized_url = None
|
normalized_url = None
|
||||||
if not has_wildcards and hasattr(backend, "get_url_info"):
|
looks_like_url = (
|
||||||
|
"://" in raw_pattern or raw_pattern.startswith("magnet:")
|
||||||
|
)
|
||||||
|
if not has_wildcards and looks_like_url and hasattr(backend, "get_url_info"):
|
||||||
try:
|
try:
|
||||||
info = backend.get_url_info(raw_pattern) # type: ignore[attr-defined]
|
info = backend.get_url_info(raw_pattern) # type: ignore[attr-defined]
|
||||||
if isinstance(info, dict):
|
if isinstance(info, dict):
|
||||||
@@ -255,13 +342,39 @@ class Get_Url(Cmdlet):
|
|||||||
except Exception:
|
except Exception:
|
||||||
normalized_url = None
|
normalized_url = None
|
||||||
|
|
||||||
search_query = "url:*" if has_wildcards else f"url:{normalized_url or raw_pattern}"
|
target_pattern = normalized_url or raw_pattern
|
||||||
try:
|
if has_wildcards or not target_pattern:
|
||||||
search_results = backend.search(search_query, limit=1000)
|
search_query = "url:*"
|
||||||
except Exception:
|
else:
|
||||||
search_results = []
|
wrapped_pattern = f"*{target_pattern}*"
|
||||||
|
search_query = f"url:{wrapped_pattern}"
|
||||||
|
search_limit = max(1, min(MAX_RESULTS, 1000))
|
||||||
|
search_results = self._execute_search_with_timeout(
|
||||||
|
backend,
|
||||||
|
search_query,
|
||||||
|
search_limit,
|
||||||
|
store_name,
|
||||||
|
pattern_hint=target_pattern,
|
||||||
|
)
|
||||||
|
if search_results is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
search_results = search_results or []
|
||||||
|
if not search_results and target_pattern and not has_wildcards:
|
||||||
|
fallback_results = self._execute_search_with_timeout(
|
||||||
|
backend,
|
||||||
|
"url:*",
|
||||||
|
search_limit,
|
||||||
|
store_name,
|
||||||
|
pattern_hint=target_pattern,
|
||||||
|
)
|
||||||
|
if fallback_results is None:
|
||||||
|
continue
|
||||||
|
search_results = fallback_results or []
|
||||||
|
|
||||||
for hit in (search_results or []):
|
for hit in (search_results or []):
|
||||||
|
if len(items) >= MAX_RESULTS:
|
||||||
|
break
|
||||||
file_hash = None
|
file_hash = None
|
||||||
if isinstance(hit, dict):
|
if isinstance(hit, dict):
|
||||||
file_hash = hit.get("hash") or hit.get("file_hash")
|
file_hash = hit.get("hash") or hit.get("file_hash")
|
||||||
@@ -271,25 +384,57 @@ class Get_Url(Cmdlet):
|
|||||||
file_hash = str(file_hash)
|
file_hash = str(file_hash)
|
||||||
|
|
||||||
title = title_cache.get(file_hash, "")
|
title = title_cache.get(file_hash, "")
|
||||||
|
if not title:
|
||||||
|
try:
|
||||||
|
title = (
|
||||||
|
get_field(hit, "title")
|
||||||
|
or get_field(hit, "name")
|
||||||
|
or get_field(hit, "file_title")
|
||||||
|
or ""
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
title = ""
|
||||||
if not title:
|
if not title:
|
||||||
title = self._resolve_title_for_hash(backend, file_hash, hit)
|
title = self._resolve_title_for_hash(backend, file_hash, hit)
|
||||||
title_cache[file_hash] = title
|
title_cache[file_hash] = title
|
||||||
|
|
||||||
size, ext = meta_cache.get(file_hash, (None, ""))
|
size, ext = meta_cache.get(file_hash, (None, ""))
|
||||||
if size is None and not ext:
|
if size is None and not ext:
|
||||||
size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
|
try:
|
||||||
meta_cache[file_hash] = (size, ext)
|
size = get_field(hit, "size")
|
||||||
|
if size is None:
|
||||||
|
size = get_field(hit, "size_bytes")
|
||||||
|
if size is None:
|
||||||
|
size = get_field(hit, "file_size")
|
||||||
|
if size is None:
|
||||||
|
size = get_field(hit, "filesize")
|
||||||
|
size = int(size) if isinstance(size, (int, float)) else None
|
||||||
|
except Exception:
|
||||||
|
size = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
urls = backend.get_url(file_hash)
|
ext = get_field(hit, "ext") or get_field(hit, "extension")
|
||||||
except Exception:
|
ext = str(ext).strip().lstrip(".") if isinstance(ext, str) else ""
|
||||||
urls = []
|
except Exception:
|
||||||
|
ext = ""
|
||||||
|
|
||||||
|
if size is None and not ext:
|
||||||
|
size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
|
||||||
|
meta_cache[file_hash] = (size, ext)
|
||||||
|
|
||||||
|
urls = self._extract_urls_from_hit(hit)
|
||||||
|
if not urls:
|
||||||
|
try:
|
||||||
|
urls = backend.get_url(file_hash)
|
||||||
|
except Exception:
|
||||||
|
urls = []
|
||||||
|
|
||||||
for url in (urls or []):
|
for url in (urls or []):
|
||||||
|
if len(items) >= MAX_RESULTS:
|
||||||
|
break
|
||||||
if not self._match_url_pattern(str(url), raw_pattern):
|
if not self._match_url_pattern(str(url), raw_pattern):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Double-check it looks like a URL to avoid data leakage from dirty DBs
|
|
||||||
from SYS.metadata import normalize_urls
|
from SYS.metadata import normalize_urls
|
||||||
valid = normalize_urls([str(url)])
|
valid = normalize_urls([str(url)])
|
||||||
if not valid:
|
if not valid:
|
||||||
@@ -306,6 +451,8 @@ class Get_Url(Cmdlet):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
found_stores.add(str(store_name))
|
found_stores.add(str(store_name))
|
||||||
|
if len(items) >= MAX_RESULTS:
|
||||||
|
break
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
debug(
|
debug(
|
||||||
f"Error searching store '{store_name}': {exc}",
|
f"Error searching store '{store_name}': {exc}",
|
||||||
|
|||||||
Reference in New Issue
Block a user