This commit is contained in:
nose
2025-11-27 10:59:01 -08:00
parent e9b505e609
commit 9eff65d1af
30 changed files with 2099 additions and 1095 deletions

View File

@@ -50,6 +50,10 @@ class StorageBackend(ABC):
Exception: If upload fails
"""
@abstractmethod
def get_name(self) -> str:
"""Get the unique name of this backend."""
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search for files in backends that support it.
@@ -125,7 +129,7 @@ class LocalStorageBackend(StorageBackend):
try:
# Compute file hash
file_hash = sha256_file(file_path)
log(f"File hash: {file_hash}", file=sys.stderr)
debug(f"File hash: {file_hash}", file=sys.stderr)
dest_dir = Path(location).expanduser()
dest_dir.mkdir(parents=True, exist_ok=True)
@@ -148,13 +152,13 @@ class LocalStorageBackend(StorageBackend):
if move_file:
shutil.move(str(file_path), dest_file)
log(f"Local move: {dest_file}", file=sys.stderr)
debug(f"Local move: {dest_file}", file=sys.stderr)
else:
shutil.copy2(file_path, dest_file)
log(f"Local copy: {dest_file}", file=sys.stderr)
debug(f"Local copy: {dest_file}", file=sys.stderr)
return str(dest_file)
except Exception as exc:
log(f"Local copy failed: {exc}", file=sys.stderr)
debug(f"Local copy failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
@@ -200,7 +204,6 @@ class LocalStorageBackend(StorageBackend):
# Try database search first (much faster than filesystem scan)
try:
debug(f"Connecting to local library DB at {search_dir}")
db = LocalLibraryDB(search_dir)
cursor = db.connection.cursor()
@@ -261,8 +264,9 @@ class LocalStorageBackend(StorageBackend):
all_tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.name,
"title": file_path.name,
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
@@ -284,35 +288,60 @@ class LocalStorageBackend(StorageBackend):
# 2. Simple tags (without namespace) containing the query
# NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
# Use explicit namespace search for that (e.g., "channel:joe*")
query_pattern = f"%{query_lower}%"
debug(f"Performing filename/tag search: {query_pattern}")
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
debug(f"Performing filename/tag search for terms: {terms}")
# Fetch more results than requested to allow for filtering
fetch_limit = (limit or 45) * 50
cursor.execute("""
# 1. Filename search (AND logic)
conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
params = [f"%{t}%" for t in terms]
where_clause = " AND ".join(conditions)
cursor.execute(f"""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
WHERE LOWER(f.file_path) LIKE ?
WHERE {where_clause}
ORDER BY f.file_path
LIMIT ?
""", (query_pattern, fetch_limit))
""", (*params, fetch_limit))
rows = cursor.fetchall()
debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
# Compile regex for whole word matching
try:
word_regex = re.compile(r'\b' + re.escape(query_lower) + r'\b', re.IGNORECASE)
except Exception:
word_regex = None
# Compile regex for whole word matching (only if single term, otherwise skip)
word_regex = None
if len(terms) == 1:
term = terms[0]
# Check if term contains wildcard characters
has_wildcard = '*' in term or '?' in term
if has_wildcard:
# Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
try:
from fnmatch import translate
word_regex = re.compile(translate(term), re.IGNORECASE)
except Exception:
word_regex = None
else:
# Use word boundary for exact terms (backwards compatibility)
try:
word_regex = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
except Exception:
word_regex = None
seen_files = set()
for file_id, file_path_str, size_bytes in rows:
if not file_path_str or file_path_str in seen_files:
continue
# Apply whole word filter on filename
# Apply whole word filter on filename if single term
if word_regex:
p = Path(file_path_str)
if not word_regex.search(p.name):
@@ -332,8 +361,9 @@ class LocalStorageBackend(StorageBackend):
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.name,
"title": file_path.name,
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
@@ -343,6 +373,12 @@ class LocalStorageBackend(StorageBackend):
})
# Also search for simple tags (without namespace) containing the query
# Only perform tag search if single term, or if we want to support multi-term tag search
# For now, fallback to single pattern search for tags if multiple terms
# (searching for a tag that contains "term1 term2" or "term1,term2")
# This is less useful for AND logic across multiple tags, but consistent with previous behavior
query_pattern = f"%{query_lower}%"
cursor.execute("""
SELECT DISTINCT f.id, f.file_path, f.file_size
FROM files f
@@ -371,8 +407,9 @@ class LocalStorageBackend(StorageBackend):
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.name,
"title": file_path.name,
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
@@ -409,8 +446,9 @@ class LocalStorageBackend(StorageBackend):
tags = [row[0] for row in cursor.fetchall()]
results.append({
"name": file_path.name,
"title": file_path.name,
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
@@ -434,6 +472,11 @@ class LocalStorageBackend(StorageBackend):
recursive = kwargs.get("recursive", True)
pattern = "**/*" if recursive else "*"
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
count = 0
for file_path in search_dir.glob(pattern):
if not file_path.is_file():
@@ -442,14 +485,26 @@ class LocalStorageBackend(StorageBackend):
if lower_name.endswith('.tags') or lower_name.endswith('.metadata') \
or lower_name.endswith('.notes') or lower_name.endswith('.tags.txt'):
continue
if not (match_all or query_lower in lower_name):
continue
if not match_all:
# Check if ALL terms are present in the filename
# For single terms with wildcards, use fnmatch; otherwise use substring matching
if len(terms) == 1 and ('*' in terms[0] or '?' in terms[0]):
# Wildcard pattern matching for single term
from fnmatch import fnmatch
if not fnmatch(lower_name, terms[0]):
continue
else:
# Substring matching for all terms (AND logic)
if not all(term in lower_name for term in terms):
continue
size_bytes = file_path.stat().st_size
path_str = str(file_path)
results.append({
"name": file_path.name,
"title": file_path.name,
"name": file_path.stem,
"title": file_path.stem,
"ext": file_path.suffix.lstrip('.'),
"path": path_str,
"target": path_str,
"origin": "local",
@@ -562,7 +617,7 @@ class HydrusStorageBackend(StorageBackend):
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
log(f"Hydrus: {file_hash}", file=sys.stderr)
# Add tags if provided
if tags:
@@ -654,7 +709,8 @@ class HydrusStorageBackend(StorageBackend):
# Fetch metadata for the found files
results = []
query_lower = query.lower().strip()
search_terms = set(query_lower.split()) # For substring matching
# Split by comma or space for AND logic
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
if file_ids:
metadata = client.fetch_file_metadata(file_ids=file_ids)
@@ -852,6 +908,11 @@ class DebridStorageBackend(StorageBackend):
# "*" means "match all" - include all magnets
match_all = query_lower == "*"
# Split query into terms for AND logic
terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
if not terms:
terms = [query_lower]
for magnet in magnets:
filename = magnet.get('filename', '').lower()
status_code = magnet.get('statusCode', 0)
@@ -862,8 +923,9 @@ class DebridStorageBackend(StorageBackend):
continue
# Match query against filename (or match all if query is "*")
if not match_all and query_lower not in filename:
continue
if not match_all:
if not all(term in filename for term in terms):
continue
matching_magnet_ids.append(magnet_id)
magnet_info_map[magnet_id] = magnet
@@ -952,6 +1014,102 @@ class DebridStorageBackend(StorageBackend):
return result
class MatrixStorageBackend(StorageBackend):
"""File storage backend for Matrix (Element) chat rooms."""
def get_name(self) -> str:
return "matrix"
def upload(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Matrix room.
Requires 'config' in kwargs with 'storage.matrix' settings:
- homeserver: URL of homeserver (e.g. https://matrix.org)
- user_id: User ID (e.g. @user:matrix.org)
- access_token: Access token (preferred) OR password
- room_id: Room ID to upload to (e.g. !roomid:matrix.org)
"""
config = kwargs.get('config', {})
if not config:
raise ValueError("Config required for Matrix upload")
matrix_conf = config.get('storage', {}).get('matrix', {})
if not matrix_conf:
raise ValueError("Matrix storage not configured in config.json")
homeserver = matrix_conf.get('homeserver')
# user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
access_token = matrix_conf.get('access_token')
room_id = matrix_conf.get('room_id')
if not homeserver or not room_id:
raise ValueError("Matrix homeserver and room_id required")
# Ensure homeserver has protocol
if not homeserver.startswith('http'):
homeserver = f"https://{homeserver}"
# Login if no access token (optional implementation, for now assume token)
if not access_token:
raise ValueError("Matrix access_token required (login not yet implemented)")
# 1. Upload Media
upload_url = f"{homeserver}/_matrix/media/r3/upload"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/octet-stream" # Or guess mime type
}
import mimetypes
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type:
headers["Content-Type"] = mime_type
filename = file_path.name
try:
with open(file_path, 'rb') as f:
resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
if resp.status_code != 200:
raise Exception(f"Matrix upload failed: {resp.text}")
content_uri = resp.json().get('content_uri')
if not content_uri:
raise Exception("No content_uri returned from Matrix upload")
# 2. Send Message
send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"
# Determine msgtype
msgtype = "m.file"
if mime_type:
if mime_type.startswith("image/"): msgtype = "m.image"
elif mime_type.startswith("video/"): msgtype = "m.video"
elif mime_type.startswith("audio/"): msgtype = "m.audio"
payload = {
"msgtype": msgtype,
"body": filename,
"url": content_uri,
"info": {
"mimetype": mime_type,
"size": file_path.stat().st_size
}
}
resp = requests.post(send_url, headers=headers, json=payload)
if resp.status_code != 200:
raise Exception(f"Matrix send message failed: {resp.text}")
event_id = resp.json().get('event_id')
return f"matrix://{room_id}/{event_id}"
except Exception as e:
log(f"❌ Matrix upload error: {e}", file=sys.stderr)
raise
class FileStorage:
"""Unified file storage interface supporting multiple backend services.
@@ -997,6 +1155,9 @@ class FileStorage:
# Include Debrid backend (API key optional - will raise on use if not provided)
if debrid_api_key:
self._backends["debrid"] = DebridStorageBackend(api_key=debrid_api_key)
# Include Matrix backend
self._backends["matrix"] = MatrixStorageBackend()
def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name.