sssssss

2025-12-01 01:10:16 -08:00
parent 2b93edac10
commit 6b9ed7d4ab
17 changed files with 1644 additions and 470 deletions
--- a/helper/file_storage.py
+++ b/helper/file_storage.py
@@ -264,9 +264,12 @@ class LocalStorageBackend(StorageBackend):
                                        """, (file_id,))
                                        all_tags = [row[0] for row in cursor.fetchall()]
                                        
+                                        # Use title tag if present
+                                        title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
+                                        
                                        results.append({
                                            "name": file_path.stem,
-                                            "title": file_path.stem,
+                                            "title": title_tag or file_path.stem,
                                            "ext": file_path.suffix.lstrip('.'),
                                            "path": path_str,
                                            "target": path_str,
@@ -364,9 +367,12 @@ class LocalStorageBackend(StorageBackend):
                            """, (file_id,))
                            tags = [row[0] for row in cursor.fetchall()]
                            
+                            # Use title tag if present
+                            title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
+                            
                            results.append({
                                "name": file_path.stem,
-                                "title": file_path.stem,
+                                "title": title_tag or file_path.stem,
                                "ext": file_path.suffix.lstrip('.'),
                                "path": path_str,
                                "target": path_str,
@@ -410,9 +416,12 @@ class LocalStorageBackend(StorageBackend):
                            """, (file_id,))
                            tags = [row[0] for row in cursor.fetchall()]
                            
+                            # Use title tag if present
+                            title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
+                            
                            results.append({
                                "name": file_path.stem,
-                                "title": file_path.stem,
+                                "title": title_tag or file_path.stem,
                                "ext": file_path.suffix.lstrip('.'),
                                "path": path_str,
                                "target": path_str,
@@ -449,9 +458,12 @@ class LocalStorageBackend(StorageBackend):
                                """, (file_id,))
                                tags = [row[0] for row in cursor.fetchall()]
                                
+                                # Use title tag if present
+                                title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
+                                
                                results.append({
                                    "name": file_path.stem,
-                                    "title": file_path.stem,
+                                    "title": title_tag or file_path.stem,
                                    "ext": file_path.suffix.lstrip('.'),
                                    "path": path_str,
                                    "target": path_str,
--- a/helper/local_library.py
+++ b/helper/local_library.py
@@ -497,6 +497,10 @@ class LocalLibraryDB:
            
            cursor = self.connection.cursor()
            
+            # Update file hash in files table if present
+            if metadata.get('hash'):
+                cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id))
+            
            known_urls = metadata.get('known_urls', [])
            if not isinstance(known_urls, str):
                known_urls = json.dumps(known_urls)
@@ -534,6 +538,72 @@ class LocalLibraryDB:
        except Exception as e:
            logger.error(f"[save_metadata] ❌ Error saving metadata for {file_path}: {e}", exc_info=True)
            raise
+
+    def save_file_info(self, file_path: Path, metadata: Dict[str, Any], tags: List[str]) -> None:
+        """Save metadata and tags for a file in a single transaction."""
+        try:
+            str_path = str(file_path.resolve())
+            logger.debug(f"[save_file_info] Starting save for: {str_path}")
+            
+            file_id = self.get_or_create_file_entry(file_path)
+            
+            cursor = self.connection.cursor()
+            
+            # Update file hash in files table if present
+            if metadata.get('hash'):
+                cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id))
+            
+            # 1. Save Metadata
+            known_urls = metadata.get('known_urls', [])
+            if not isinstance(known_urls, str):
+                known_urls = json.dumps(known_urls)
+            
+            relationships = metadata.get('relationships', [])
+            if not isinstance(relationships, str):
+                relationships = json.dumps(relationships)
+            
+            cursor.execute("""
+                INSERT INTO metadata (
+                    file_id, hash, known_urls, relationships,
+                    duration, size, ext, media_type, media_kind,
+                    time_imported, time_modified
+                )
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
+                ON CONFLICT(file_id) DO UPDATE SET
+                    hash = excluded.hash,
+                    known_urls = excluded.known_urls,
+                    relationships = excluded.relationships,
+                    duration = excluded.duration,
+                    size = excluded.size,
+                    ext = excluded.ext,
+                    media_type = excluded.media_type,
+                    media_kind = excluded.media_kind,
+                    time_modified = CURRENT_TIMESTAMP,
+                    updated_at = CURRENT_TIMESTAMP
+            """, (
+                file_id, metadata.get('hash'), known_urls, relationships,
+                metadata.get('duration'), metadata.get('size'), metadata.get('ext'),
+                metadata.get('media_type'), metadata.get('media_kind')
+            ))
+            
+            # 2. Save Tags
+            # We assume tags list is complete and includes title if needed
+            cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,))
+            
+            for tag in tags:
+                tag = tag.strip()
+                if tag:
+                    cursor.execute("""
+                        INSERT OR IGNORE INTO tags (file_id, tag, tag_type)
+                        VALUES (?, ?, 'user')
+                    """, (file_id, tag))
+            
+            self.connection.commit()
+            logger.debug(f"[save_file_info] ✅ Committed metadata and tags for file_id {file_id}")
+            
+        except Exception as e:
+            logger.error(f"[save_file_info] ❌ Error saving file info for {file_path}: {e}", exc_info=True)
+            raise
    
    def get_tags(self, file_path: Path) -> List[str]:
        """Get all tags for a file."""
@@ -572,12 +642,15 @@ class LocalLibraryDB:
            cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,))
            logger.debug(f"[save_tags] Deleted existing tags for file_id {file_id}")
            
-            if existing_title:
+            # Check if new tags provide a title
+            new_title_provided = any(str(t).strip().lower().startswith("title:") for t in tags)
+            
+            if existing_title and not new_title_provided:
                cursor.execute("""
                    INSERT INTO tags (file_id, tag, tag_type) VALUES (?, ?, 'user')
                """, (file_id, existing_title[0]))
                logger.debug(f"[save_tags] Preserved existing title tag")
-            else:
+            elif not existing_title and not new_title_provided:
                filename_without_ext = file_path.stem
                if filename_without_ext:
                    # Normalize underscores to spaces for consistency
--- a/helper/search_provider.py
+++ b/helper/search_provider.py
@@ -28,9 +28,16 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 from dataclasses import dataclass
 from pathlib import Path
 import sys
+
+try:
+    from playwright.sync_api import sync_playwright
+    PLAYWRIGHT_AVAILABLE = True
+except ImportError:
+    PLAYWRIGHT_AVAILABLE = False
 import subprocess
 import json
 import shutil
+from helper.logger import log, debug


 from helper.logger import log, debug
@@ -1580,8 +1587,293 @@ class YoutubeSearchProvider(SearchProvider):
        return shutil.which("yt-dlp") is not None


+class BandcampProvider(SearchProvider):
+    """
+    Search provider for Bandcamp using Playwright scraper.
+    """
+    RESULT_FIELDS = [
+        ("name", "Name", None),
+        ("artist", "Artist/Loc", None),
+        ("type", "Type", None)
+    ]
+
+    def search(
+        self,
+        query: str,
+        limit: int = 50,
+        filters: Optional[Dict[str, Any]] = None,
+        **kwargs
+    ) -> List[SearchResult]:
+        if not PLAYWRIGHT_AVAILABLE:
+            print("Playwright library not available. Please install it (pip install playwright).")
+            return []
+        
+        results = []
+        try:
+            with sync_playwright() as p:
+                # Launch browser (headless)
+                browser = p.chromium.launch(headless=True)
+                page = browser.new_page()
+                
+                # Check if query is a URL (Artist/Album Scraping Mode)
+                if query.startswith("http://") or query.startswith("https://"):
+                    return self._scrape_url(page, query, limit)
+
+                # Search Mode
+                # Parse query for prefixes
+                search_type = "t"  # Default to track
+                clean_query = query
+                
+                if "artist:" in query.lower():
+                    search_type = "b"
+                    clean_query = query.lower().replace("artist:", "").strip()
+                elif "album:" in query.lower():
+                    search_type = "a"
+                    clean_query = query.lower().replace("album:", "").strip()
+                elif "track:" in query.lower():
+                    search_type = "t"
+                    clean_query = query.lower().replace("track:", "").strip()
+                elif "label:" in query.lower():
+                    search_type = "b"
+                    clean_query = query.lower().replace("label:", "").strip()
+                
+                # Filters override prefix
+                if filters:
+                    ftype = filters.get("type", "").lower()
+                    if ftype in ["album", "albums"]:
+                        search_type = "a"
+                    elif ftype in ["artist", "artists", "label", "labels"]:
+                        search_type = "b"
+                    elif ftype in ["track", "tracks"]:
+                        search_type = "t"
+
+                # Construct URL with item_type
+                url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}"
+                debug(f"[Bandcamp] Navigating to search URL: {url}")
+                page.goto(url)
+                page.wait_for_load_state("domcontentloaded")
+                
+                # Wait for results
+                try:
+                    # Wait for the search results to appear in the DOM
+                    page.wait_for_selector(".searchresult", timeout=10000)
+                except Exception as e:
+                    # No results found or timeout
+                    log(f"Bandcamp search timeout or no results: {e}")
+                    browser.close()
+                    return []
+                
+                # Extract items
+                items = page.query_selector_all(".searchresult")
+                debug(f"[Bandcamp] Found {len(items)} results")
+                
+                for item in items:
+                    if len(results) >= limit:
+                        break
+                        
+                    try:
+                        # Extract data
+                        heading_el = item.query_selector(".heading a")
+                        if not heading_el:
+                            debug("[Bandcamp] Skipping item: No heading found")
+                            continue
+                            
+                        name = heading_el.inner_text().strip()
+                        item_url = heading_el.get_attribute("href")
+                        # Clean URL (remove query params)
+                        if item_url and "?" in item_url:
+                            item_url = item_url.split("?")[0]
+                            
+                        item_type_el = item.query_selector(".itemtype")
+                        item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown"
+                        
+                        subhead_el = item.query_selector(".subhead")
+                        subhead = subhead_el.inner_text().strip() if subhead_el else ""
+                        
+                        art_el = item.query_selector(".art img")
+                        img = art_el.get_attribute("src") if art_el else None
+                        
+                        # Map to metadata
+                        metadata = {
+                            "name": name,
+                            "type": item_type,
+                            "url": item_url,
+                            "img": img,
+                            "subhead": subhead
+                        }
+                        
+                        # Refine metadata based on type
+                        artist_or_loc = subhead
+                        if "ALBUM" in item_type.upper():
+                            artist_or_loc = subhead.replace("by ", "").strip()
+                            metadata["artist"] = artist_or_loc
+                        elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper():
+                            metadata["location"] = subhead
+                        elif "TRACK" in item_type.upper():
+                            artist_or_loc = subhead.replace("by ", "").strip()
+                            metadata["artist"] = artist_or_loc
+                            
+                        columns = [
+                            ("Name", name),
+                            ("Artist/Loc", artist_or_loc),
+                            ("Type", item_type)
+                        ]
+                        
+                        results.append(SearchResult(
+                            origin="bandcamp",
+                            title=name,
+                            target=item_url,
+                            full_metadata=metadata,
+                            columns=columns
+                        ))
+                    except Exception as e:
+                        # Skip malformed items
+                        debug(f"[Bandcamp] Error parsing item: {e}")
+                        continue
+                
+                browser.close()
+                
+        except Exception as e:
+            log(f"Bandcamp search error: {e}")
+            return []
+            
+        return results
+
+    def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]:
+        """Scrape a Bandcamp artist or album page."""
+        debug(f"[Bandcamp] Scraping URL: {url}")
+        
+        # If it's an artist page, try to go to /music to see all
+        if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url:
+             # Check if it's likely an artist root
+             url = url.rstrip("/") + "/music"
+             debug(f"[Bandcamp] Adjusted to music page: {url}")
+
+        page.goto(url)
+        page.wait_for_load_state("domcontentloaded")
+        
+        results = []
+        
+        # Check for grid items (Artist page /music)
+        grid_items = page.query_selector_all(".music-grid-item")
+        if grid_items:
+            debug(f"[Bandcamp] Found {len(grid_items)} grid items")
+            
+            # Try to get global artist name from page metadata/header as fallback
+            page_artist = ""
+            try:
+                og_site_name = page.query_selector('meta[property="og:site_name"]')
+                if og_site_name:
+                    page_artist = og_site_name.get_attribute("content") or ""
+                
+                if not page_artist:
+                    band_name = page.query_selector('#band-name-location .title')
+                    if band_name:
+                        page_artist = band_name.inner_text().strip()
+            except Exception:
+                pass
+
+            for item in grid_items:
+                if len(results) >= limit:
+                    break
+                try:
+                    title_el = item.query_selector(".title")
+                    # Sanitize title to remove newlines which break the table
+                    title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown"
+                    # Remove extra spaces
+                    title = " ".join(title.split())
+                    
+                    link_el = item.query_selector("a")
+                    href = link_el.get_attribute("href") if link_el else ""
+                    if href and not href.startswith("http"):
+                        # Relative link, construct full URL
+                        base = url.split("/music")[0]
+                        href = base + href
+                        
+                    artist_el = item.query_selector(".artist")
+                    artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else ""
+                    
+                    # Use page artist if item artist is missing
+                    if not artist and page_artist:
+                        artist = page_artist
+                    
+                    # Sanitize artist
+                    artist = artist.replace("\n", " ").replace("\r", "")
+                    artist = " ".join(artist.split())
+                    
+                    columns = [
+                        ("Name", title),
+                        ("Artist", artist),
+                        ("Type", "Album/Track")
+                    ]
+                    
+                    results.append(SearchResult(
+                        origin="bandcamp",
+                        title=title,
+                        target=href,
+                        full_metadata={"artist": artist},
+                        columns=columns
+                    ))
+                except Exception as e:
+                    debug(f"[Bandcamp] Error parsing grid item: {e}")
+                    continue
+            return results
+
+        # Check for track list (Album page)
+        track_rows = page.query_selector_all(".track_row_view")
+        if track_rows:
+            debug(f"[Bandcamp] Found {len(track_rows)} track rows")
+            # Get Album Artist
+            artist_el = page.query_selector("#name-section h3 span a")
+            album_artist = artist_el.inner_text().strip() if artist_el else "Unknown"
+            
+            for row in track_rows:
+                if len(results) >= limit:
+                    break
+                try:
+                    title_el = row.query_selector(".track-title")
+                    # Sanitize title
+                    title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown"
+                    title = " ".join(title.split())
+                    
+                    # Track link
+                    link_el = row.query_selector(".title a")
+                    href = link_el.get_attribute("href") if link_el else ""
+                    if href and not href.startswith("http"):
+                         base = url.split(".com")[0] + ".com"
+                         href = base + href
+
+                    duration_el = row.query_selector(".time")
+                    duration = duration_el.inner_text().strip() if duration_el else ""
+                    
+                    columns = [
+                        ("Name", title),
+                        ("Artist", album_artist),
+                        ("Duration", duration)
+                    ]
+                    
+                    results.append(SearchResult(
+                        origin="bandcamp",
+                        title=title,
+                        target=href,
+                        full_metadata={"artist": album_artist, "duration": duration},
+                        columns=columns
+                    ))
+                except Exception as e:
+                     debug(f"[Bandcamp] Error parsing track row: {e}")
+                     continue
+            return results
+            
+        debug("[Bandcamp] No recognizable items found on page")
+        return []
+
+    def get_result_args(self) -> List[str]:
+        return ["-url"]
+
+
 # Provider registry
 _PROVIDERS = {
+    "bandcamp": BandcampProvider,
    "local": LocalStorageProvider,
    "libgen": LibGenProvider,
    "soulseek": SoulSeekProvider,