jjlj

2025-11-27 10:59:01 -08:00
parent e9b505e609
commit 9eff65d1af
30 changed files with 2099 additions and 1095 deletions
--- a/helper/file_storage.py
+++ b/helper/file_storage.py
@@ -50,6 +50,10 @@ class StorageBackend(ABC):
            Exception: If upload fails
        """

+    @abstractmethod
+    def get_name(self) -> str:
+        """Get the unique name of this backend."""
+
    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search for files in backends that support it.

@@ -125,7 +129,7 @@ class LocalStorageBackend(StorageBackend):
        try:
            # Compute file hash
            file_hash = sha256_file(file_path)
-            log(f"File hash: {file_hash}", file=sys.stderr)
+            debug(f"File hash: {file_hash}", file=sys.stderr)

            dest_dir = Path(location).expanduser()
            dest_dir.mkdir(parents=True, exist_ok=True)
@@ -148,13 +152,13 @@ class LocalStorageBackend(StorageBackend):

            if move_file:
                shutil.move(str(file_path), dest_file)
-                log(f"✅ Local move: {dest_file}", file=sys.stderr)
+                debug(f"Local move: {dest_file}", file=sys.stderr)
            else:
                shutil.copy2(file_path, dest_file)
-                log(f"✅ Local copy: {dest_file}", file=sys.stderr)
+                debug(f"Local copy: {dest_file}", file=sys.stderr)
            return str(dest_file)
        except Exception as exc:
-            log(f"❌ Local copy failed: {exc}", file=sys.stderr)
+            debug(f"Local copy failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
@@ -200,7 +204,6 @@ class LocalStorageBackend(StorageBackend):

            # Try database search first (much faster than filesystem scan)
            try:
-                debug(f"Connecting to local library DB at {search_dir}")
                db = LocalLibraryDB(search_dir)
                cursor = db.connection.cursor()
                
@@ -261,8 +264,9 @@ class LocalStorageBackend(StorageBackend):
                                        all_tags = [row[0] for row in cursor.fetchall()]
                                        
                                        results.append({
-                                            "name": file_path.name,
-                                            "title": file_path.name,
+                                            "name": file_path.stem,
+                                            "title": file_path.stem,
+                                            "ext": file_path.suffix.lstrip('.'),
                                            "path": path_str,
                                            "target": path_str,
                                            "origin": "local",
@@ -284,35 +288,60 @@ class LocalStorageBackend(StorageBackend):
                    # 2. Simple tags (without namespace) containing the query
                    # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
                    #       Use explicit namespace search for that (e.g., "channel:joe*")
-                    query_pattern = f"%{query_lower}%"
-                    debug(f"Performing filename/tag search: {query_pattern}")
+                    
+                    # Split query into terms for AND logic
+                    terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
+                    if not terms:
+                        terms = [query_lower]
+                    
+                    debug(f"Performing filename/tag search for terms: {terms}")
                    
                    # Fetch more results than requested to allow for filtering
                    fetch_limit = (limit or 45) * 50
                    
-                    cursor.execute("""
+                    # 1. Filename search (AND logic)
+                    conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
+                    params = [f"%{t}%" for t in terms]
+                    where_clause = " AND ".join(conditions)
+                    
+                    cursor.execute(f"""
                        SELECT DISTINCT f.id, f.file_path, f.file_size 
                        FROM files f
-                        WHERE LOWER(f.file_path) LIKE ?
+                        WHERE {where_clause}
                        ORDER BY f.file_path
                        LIMIT ?
-                    """, (query_pattern, fetch_limit))
+                    """, (*params, fetch_limit))
                    
                    rows = cursor.fetchall()
                    debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")
                    
-                    # Compile regex for whole word matching
-                    try:
-                        word_regex = re.compile(r'\b' + re.escape(query_lower) + r'\b', re.IGNORECASE)
-                    except Exception:
-                        word_regex = None
+                    # Compile regex for whole word matching (only if single term, otherwise skip)
+                    word_regex = None
+                    if len(terms) == 1:
+                        term = terms[0]
+                        # Check if term contains wildcard characters
+                        has_wildcard = '*' in term or '?' in term
+                        
+                        if has_wildcard:
+                            # Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
+                            try:
+                                from fnmatch import translate
+                                word_regex = re.compile(translate(term), re.IGNORECASE)
+                            except Exception:
+                                word_regex = None
+                        else:
+                            # Use word boundary for exact terms (backwards compatibility)
+                            try:
+                                word_regex = re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE)
+                            except Exception:
+                                word_regex = None

                    seen_files = set()
                    for file_id, file_path_str, size_bytes in rows:
                        if not file_path_str or file_path_str in seen_files:
                            continue
                        
-                        # Apply whole word filter on filename
+                        # Apply whole word filter on filename if single term
                        if word_regex:
                            p = Path(file_path_str)
                            if not word_regex.search(p.name):
@@ -332,8 +361,9 @@ class LocalStorageBackend(StorageBackend):
                            tags = [row[0] for row in cursor.fetchall()]
                            
                            results.append({
-                                "name": file_path.name,
-                                "title": file_path.name,
+                                "name": file_path.stem,
+                                "title": file_path.stem,
+                                "ext": file_path.suffix.lstrip('.'),
                                "path": path_str,
                                "target": path_str,
                                "origin": "local",
@@ -343,6 +373,12 @@ class LocalStorageBackend(StorageBackend):
                            })
                    
                    # Also search for simple tags (without namespace) containing the query
+                    # Only perform tag search if single term, or if we want to support multi-term tag search
+                    # For now, fallback to single pattern search for tags if multiple terms
+                    # (searching for a tag that contains "term1 term2" or "term1,term2")
+                    # This is less useful for AND logic across multiple tags, but consistent with previous behavior
+                    query_pattern = f"%{query_lower}%"
+                    
                    cursor.execute("""
                        SELECT DISTINCT f.id, f.file_path, f.file_size
                        FROM files f
@@ -371,8 +407,9 @@ class LocalStorageBackend(StorageBackend):
                            tags = [row[0] for row in cursor.fetchall()]
                            
                            results.append({
-                                "name": file_path.name,
-                                "title": file_path.name,
+                                "name": file_path.stem,
+                                "title": file_path.stem,
+                                "ext": file_path.suffix.lstrip('.'),
                                "path": path_str,
                                "target": path_str,
                                "origin": "local",
@@ -409,8 +446,9 @@ class LocalStorageBackend(StorageBackend):
                                tags = [row[0] for row in cursor.fetchall()]
                                
                                results.append({
-                                    "name": file_path.name,
-                                    "title": file_path.name,
+                                    "name": file_path.stem,
+                                    "title": file_path.stem,
+                                    "ext": file_path.suffix.lstrip('.'),
                                    "path": path_str,
                                    "target": path_str,
                                    "origin": "local",
@@ -434,6 +472,11 @@ class LocalStorageBackend(StorageBackend):
            recursive = kwargs.get("recursive", True)
            pattern = "**/*" if recursive else "*"
            
+            # Split query into terms for AND logic
+            terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
+            if not terms:
+                terms = [query_lower]
+            
            count = 0
            for file_path in search_dir.glob(pattern):
                if not file_path.is_file():
@@ -442,14 +485,26 @@ class LocalStorageBackend(StorageBackend):
                if lower_name.endswith('.tags') or lower_name.endswith('.metadata') \
                        or lower_name.endswith('.notes') or lower_name.endswith('.tags.txt'):
                    continue
-                if not (match_all or query_lower in lower_name):
-                    continue
+                
+                if not match_all:
+                    # Check if ALL terms are present in the filename
+                    # For single terms with wildcards, use fnmatch; otherwise use substring matching
+                    if len(terms) == 1 and ('*' in terms[0] or '?' in terms[0]):
+                        # Wildcard pattern matching for single term
+                        from fnmatch import fnmatch
+                        if not fnmatch(lower_name, terms[0]):
+                            continue
+                    else:
+                        # Substring matching for all terms (AND logic)
+                        if not all(term in lower_name for term in terms):
+                            continue

                size_bytes = file_path.stat().st_size
                path_str = str(file_path)
                results.append({
-                    "name": file_path.name,
-                    "title": file_path.name,
+                    "name": file_path.stem,
+                    "title": file_path.stem,
+                    "ext": file_path.suffix.lstrip('.'),
                    "path": path_str,
                    "target": path_str,
                    "origin": "local",
@@ -562,7 +617,7 @@ class HydrusStorageBackend(StorageBackend):
                raise Exception(f"Hydrus response missing file hash: {response}")

            file_hash = hydrus_hash
-            log(f"✅ File uploaded to Hydrus: {file_hash}", file=sys.stderr)
+            log(f"Hydrus: {file_hash}", file=sys.stderr)

            # Add tags if provided
            if tags:
@@ -654,7 +709,8 @@ class HydrusStorageBackend(StorageBackend):
            # Fetch metadata for the found files
            results = []
            query_lower = query.lower().strip()
-            search_terms = set(query_lower.split())  # For substring matching
+            # Split by comma or space for AND logic
+            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching
            
            if file_ids:
                metadata = client.fetch_file_metadata(file_ids=file_ids)
@@ -852,6 +908,11 @@ class DebridStorageBackend(StorageBackend):
            # "*" means "match all" - include all magnets
            match_all = query_lower == "*"
            
+            # Split query into terms for AND logic
+            terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
+            if not terms:
+                terms = [query_lower]
+
            for magnet in magnets:
                filename = magnet.get('filename', '').lower()
                status_code = magnet.get('statusCode', 0)
@@ -862,8 +923,9 @@ class DebridStorageBackend(StorageBackend):
                    continue
                
                # Match query against filename (or match all if query is "*")
-                if not match_all and query_lower not in filename:
-                    continue
+                if not match_all:
+                    if not all(term in filename for term in terms):
+                        continue
                
                matching_magnet_ids.append(magnet_id)
                magnet_info_map[magnet_id] = magnet
@@ -952,6 +1014,102 @@ class DebridStorageBackend(StorageBackend):
        return result


+class MatrixStorageBackend(StorageBackend):
+    """File storage backend for Matrix (Element) chat rooms."""
+
+    def get_name(self) -> str:
+        return "matrix"
+
+    def upload(self, file_path: Path, **kwargs: Any) -> str:
+        """Upload file to Matrix room.
+        
+        Requires 'config' in kwargs with 'storage.matrix' settings:
+        - homeserver: URL of homeserver (e.g. https://matrix.org)
+        - user_id: User ID (e.g. @user:matrix.org)
+        - access_token: Access token (preferred) OR password
+        - room_id: Room ID to upload to (e.g. !roomid:matrix.org)
+        """
+        config = kwargs.get('config', {})
+        if not config:
+            raise ValueError("Config required for Matrix upload")
+            
+        matrix_conf = config.get('storage', {}).get('matrix', {})
+        if not matrix_conf:
+            raise ValueError("Matrix storage not configured in config.json")
+            
+        homeserver = matrix_conf.get('homeserver')
+        # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
+        access_token = matrix_conf.get('access_token')
+        room_id = matrix_conf.get('room_id')
+        
+        if not homeserver or not room_id:
+            raise ValueError("Matrix homeserver and room_id required")
+            
+        # Ensure homeserver has protocol
+        if not homeserver.startswith('http'):
+            homeserver = f"https://{homeserver}"
+            
+        # Login if no access token (optional implementation, for now assume token)
+        if not access_token:
+             raise ValueError("Matrix access_token required (login not yet implemented)")
+
+        # 1. Upload Media
+        upload_url = f"{homeserver}/_matrix/media/r3/upload"
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/octet-stream" # Or guess mime type
+        }
+        
+        import mimetypes
+        mime_type, _ = mimetypes.guess_type(file_path)
+        if mime_type:
+            headers["Content-Type"] = mime_type
+            
+        filename = file_path.name
+        
+        try:
+            with open(file_path, 'rb') as f:
+                resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})
+                
+            if resp.status_code != 200:
+                raise Exception(f"Matrix upload failed: {resp.text}")
+                
+            content_uri = resp.json().get('content_uri')
+            if not content_uri:
+                raise Exception("No content_uri returned from Matrix upload")
+                
+            # 2. Send Message
+            send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"
+            
+            # Determine msgtype
+            msgtype = "m.file"
+            if mime_type:
+                if mime_type.startswith("image/"): msgtype = "m.image"
+                elif mime_type.startswith("video/"): msgtype = "m.video"
+                elif mime_type.startswith("audio/"): msgtype = "m.audio"
+                
+            payload = {
+                "msgtype": msgtype,
+                "body": filename,
+                "url": content_uri,
+                "info": {
+                    "mimetype": mime_type,
+                    "size": file_path.stat().st_size
+                }
+            }
+            
+            resp = requests.post(send_url, headers=headers, json=payload)
+            if resp.status_code != 200:
+                 raise Exception(f"Matrix send message failed: {resp.text}")
+                 
+            event_id = resp.json().get('event_id')
+            return f"matrix://{room_id}/{event_id}"
+            
+        except Exception as e:
+            log(f"❌ Matrix upload error: {e}", file=sys.stderr)
+            raise
+
+
 class FileStorage:
    """Unified file storage interface supporting multiple backend services.
    
@@ -997,6 +1155,9 @@ class FileStorage:
        # Include Debrid backend (API key optional - will raise on use if not provided)
        if debrid_api_key:
            self._backends["debrid"] = DebridStorageBackend(api_key=debrid_api_key)
+            
+        # Include Matrix backend
+        self._backends["matrix"] = MatrixStorageBackend()

    def __getitem__(self, backend_name: str) -> StorageBackend:
        """Get a storage backend by name.