sssssss
This commit is contained in:
@@ -264,9 +264,12 @@ class LocalStorageBackend(StorageBackend):
|
||||
""", (file_id,))
|
||||
all_tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
@@ -364,9 +367,12 @@ class LocalStorageBackend(StorageBackend):
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
@@ -410,9 +416,12 @@ class LocalStorageBackend(StorageBackend):
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
@@ -449,9 +458,12 @@ class LocalStorageBackend(StorageBackend):
|
||||
""", (file_id,))
|
||||
tags = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Use title tag if present
|
||||
title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None)
|
||||
|
||||
results.append({
|
||||
"name": file_path.stem,
|
||||
"title": file_path.stem,
|
||||
"title": title_tag or file_path.stem,
|
||||
"ext": file_path.suffix.lstrip('.'),
|
||||
"path": path_str,
|
||||
"target": path_str,
|
||||
|
||||
@@ -497,6 +497,10 @@ class LocalLibraryDB:
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Update file hash in files table if present
|
||||
if metadata.get('hash'):
|
||||
cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id))
|
||||
|
||||
known_urls = metadata.get('known_urls', [])
|
||||
if not isinstance(known_urls, str):
|
||||
known_urls = json.dumps(known_urls)
|
||||
@@ -534,6 +538,72 @@ class LocalLibraryDB:
|
||||
except Exception as e:
|
||||
logger.error(f"[save_metadata] ❌ Error saving metadata for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def save_file_info(self, file_path: Path, metadata: Dict[str, Any], tags: List[str]) -> None:
|
||||
"""Save metadata and tags for a file in a single transaction."""
|
||||
try:
|
||||
str_path = str(file_path.resolve())
|
||||
logger.debug(f"[save_file_info] Starting save for: {str_path}")
|
||||
|
||||
file_id = self.get_or_create_file_entry(file_path)
|
||||
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Update file hash in files table if present
|
||||
if metadata.get('hash'):
|
||||
cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id))
|
||||
|
||||
# 1. Save Metadata
|
||||
known_urls = metadata.get('known_urls', [])
|
||||
if not isinstance(known_urls, str):
|
||||
known_urls = json.dumps(known_urls)
|
||||
|
||||
relationships = metadata.get('relationships', [])
|
||||
if not isinstance(relationships, str):
|
||||
relationships = json.dumps(relationships)
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO metadata (
|
||||
file_id, hash, known_urls, relationships,
|
||||
duration, size, ext, media_type, media_kind,
|
||||
time_imported, time_modified
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||
ON CONFLICT(file_id) DO UPDATE SET
|
||||
hash = excluded.hash,
|
||||
known_urls = excluded.known_urls,
|
||||
relationships = excluded.relationships,
|
||||
duration = excluded.duration,
|
||||
size = excluded.size,
|
||||
ext = excluded.ext,
|
||||
media_type = excluded.media_type,
|
||||
media_kind = excluded.media_kind,
|
||||
time_modified = CURRENT_TIMESTAMP,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""", (
|
||||
file_id, metadata.get('hash'), known_urls, relationships,
|
||||
metadata.get('duration'), metadata.get('size'), metadata.get('ext'),
|
||||
metadata.get('media_type'), metadata.get('media_kind')
|
||||
))
|
||||
|
||||
# 2. Save Tags
|
||||
# We assume tags list is complete and includes title if needed
|
||||
cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,))
|
||||
|
||||
for tag in tags:
|
||||
tag = tag.strip()
|
||||
if tag:
|
||||
cursor.execute("""
|
||||
INSERT OR IGNORE INTO tags (file_id, tag, tag_type)
|
||||
VALUES (?, ?, 'user')
|
||||
""", (file_id, tag))
|
||||
|
||||
self.connection.commit()
|
||||
logger.debug(f"[save_file_info] ✅ Committed metadata and tags for file_id {file_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[save_file_info] ❌ Error saving file info for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def get_tags(self, file_path: Path) -> List[str]:
|
||||
"""Get all tags for a file."""
|
||||
@@ -572,12 +642,15 @@ class LocalLibraryDB:
|
||||
cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,))
|
||||
logger.debug(f"[save_tags] Deleted existing tags for file_id {file_id}")
|
||||
|
||||
if existing_title:
|
||||
# Check if new tags provide a title
|
||||
new_title_provided = any(str(t).strip().lower().startswith("title:") for t in tags)
|
||||
|
||||
if existing_title and not new_title_provided:
|
||||
cursor.execute("""
|
||||
INSERT INTO tags (file_id, tag, tag_type) VALUES (?, ?, 'user')
|
||||
""", (file_id, existing_title[0]))
|
||||
logger.debug(f"[save_tags] Preserved existing title tag")
|
||||
else:
|
||||
elif not existing_title and not new_title_provided:
|
||||
filename_without_ext = file_path.stem
|
||||
if filename_without_ext:
|
||||
# Normalize underscores to spaces for consistency
|
||||
|
||||
@@ -28,9 +28,16 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
PLAYWRIGHT_AVAILABLE = True
|
||||
except ImportError:
|
||||
PLAYWRIGHT_AVAILABLE = False
|
||||
import subprocess
|
||||
import json
|
||||
import shutil
|
||||
from helper.logger import log, debug
|
||||
|
||||
|
||||
from helper.logger import log, debug
|
||||
@@ -1580,8 +1587,293 @@ class YoutubeSearchProvider(SearchProvider):
|
||||
return shutil.which("yt-dlp") is not None
|
||||
|
||||
|
||||
class BandcampProvider(SearchProvider):
|
||||
"""
|
||||
Search provider for Bandcamp using Playwright scraper.
|
||||
"""
|
||||
RESULT_FIELDS = [
|
||||
("name", "Name", None),
|
||||
("artist", "Artist/Loc", None),
|
||||
("type", "Type", None)
|
||||
]
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> List[SearchResult]:
|
||||
if not PLAYWRIGHT_AVAILABLE:
|
||||
print("Playwright library not available. Please install it (pip install playwright).")
|
||||
return []
|
||||
|
||||
results = []
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
# Launch browser (headless)
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
# Check if query is a URL (Artist/Album Scraping Mode)
|
||||
if query.startswith("http://") or query.startswith("https://"):
|
||||
return self._scrape_url(page, query, limit)
|
||||
|
||||
# Search Mode
|
||||
# Parse query for prefixes
|
||||
search_type = "t" # Default to track
|
||||
clean_query = query
|
||||
|
||||
if "artist:" in query.lower():
|
||||
search_type = "b"
|
||||
clean_query = query.lower().replace("artist:", "").strip()
|
||||
elif "album:" in query.lower():
|
||||
search_type = "a"
|
||||
clean_query = query.lower().replace("album:", "").strip()
|
||||
elif "track:" in query.lower():
|
||||
search_type = "t"
|
||||
clean_query = query.lower().replace("track:", "").strip()
|
||||
elif "label:" in query.lower():
|
||||
search_type = "b"
|
||||
clean_query = query.lower().replace("label:", "").strip()
|
||||
|
||||
# Filters override prefix
|
||||
if filters:
|
||||
ftype = filters.get("type", "").lower()
|
||||
if ftype in ["album", "albums"]:
|
||||
search_type = "a"
|
||||
elif ftype in ["artist", "artists", "label", "labels"]:
|
||||
search_type = "b"
|
||||
elif ftype in ["track", "tracks"]:
|
||||
search_type = "t"
|
||||
|
||||
# Construct URL with item_type
|
||||
url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}"
|
||||
debug(f"[Bandcamp] Navigating to search URL: {url}")
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
# Wait for results
|
||||
try:
|
||||
# Wait for the search results to appear in the DOM
|
||||
page.wait_for_selector(".searchresult", timeout=10000)
|
||||
except Exception as e:
|
||||
# No results found or timeout
|
||||
log(f"Bandcamp search timeout or no results: {e}")
|
||||
browser.close()
|
||||
return []
|
||||
|
||||
# Extract items
|
||||
items = page.query_selector_all(".searchresult")
|
||||
debug(f"[Bandcamp] Found {len(items)} results")
|
||||
|
||||
for item in items:
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
try:
|
||||
# Extract data
|
||||
heading_el = item.query_selector(".heading a")
|
||||
if not heading_el:
|
||||
debug("[Bandcamp] Skipping item: No heading found")
|
||||
continue
|
||||
|
||||
name = heading_el.inner_text().strip()
|
||||
item_url = heading_el.get_attribute("href")
|
||||
# Clean URL (remove query params)
|
||||
if item_url and "?" in item_url:
|
||||
item_url = item_url.split("?")[0]
|
||||
|
||||
item_type_el = item.query_selector(".itemtype")
|
||||
item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown"
|
||||
|
||||
subhead_el = item.query_selector(".subhead")
|
||||
subhead = subhead_el.inner_text().strip() if subhead_el else ""
|
||||
|
||||
art_el = item.query_selector(".art img")
|
||||
img = art_el.get_attribute("src") if art_el else None
|
||||
|
||||
# Map to metadata
|
||||
metadata = {
|
||||
"name": name,
|
||||
"type": item_type,
|
||||
"url": item_url,
|
||||
"img": img,
|
||||
"subhead": subhead
|
||||
}
|
||||
|
||||
# Refine metadata based on type
|
||||
artist_or_loc = subhead
|
||||
if "ALBUM" in item_type.upper():
|
||||
artist_or_loc = subhead.replace("by ", "").strip()
|
||||
metadata["artist"] = artist_or_loc
|
||||
elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper():
|
||||
metadata["location"] = subhead
|
||||
elif "TRACK" in item_type.upper():
|
||||
artist_or_loc = subhead.replace("by ", "").strip()
|
||||
metadata["artist"] = artist_or_loc
|
||||
|
||||
columns = [
|
||||
("Name", name),
|
||||
("Artist/Loc", artist_or_loc),
|
||||
("Type", item_type)
|
||||
]
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="bandcamp",
|
||||
title=name,
|
||||
target=item_url,
|
||||
full_metadata=metadata,
|
||||
columns=columns
|
||||
))
|
||||
except Exception as e:
|
||||
# Skip malformed items
|
||||
debug(f"[Bandcamp] Error parsing item: {e}")
|
||||
continue
|
||||
|
||||
browser.close()
|
||||
|
||||
except Exception as e:
|
||||
log(f"Bandcamp search error: {e}")
|
||||
return []
|
||||
|
||||
return results
|
||||
|
||||
def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]:
|
||||
"""Scrape a Bandcamp artist or album page."""
|
||||
debug(f"[Bandcamp] Scraping URL: {url}")
|
||||
|
||||
# If it's an artist page, try to go to /music to see all
|
||||
if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url:
|
||||
# Check if it's likely an artist root
|
||||
url = url.rstrip("/") + "/music"
|
||||
debug(f"[Bandcamp] Adjusted to music page: {url}")
|
||||
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
results = []
|
||||
|
||||
# Check for grid items (Artist page /music)
|
||||
grid_items = page.query_selector_all(".music-grid-item")
|
||||
if grid_items:
|
||||
debug(f"[Bandcamp] Found {len(grid_items)} grid items")
|
||||
|
||||
# Try to get global artist name from page metadata/header as fallback
|
||||
page_artist = ""
|
||||
try:
|
||||
og_site_name = page.query_selector('meta[property="og:site_name"]')
|
||||
if og_site_name:
|
||||
page_artist = og_site_name.get_attribute("content") or ""
|
||||
|
||||
if not page_artist:
|
||||
band_name = page.query_selector('#band-name-location .title')
|
||||
if band_name:
|
||||
page_artist = band_name.inner_text().strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for item in grid_items:
|
||||
if len(results) >= limit:
|
||||
break
|
||||
try:
|
||||
title_el = item.query_selector(".title")
|
||||
# Sanitize title to remove newlines which break the table
|
||||
title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown"
|
||||
# Remove extra spaces
|
||||
title = " ".join(title.split())
|
||||
|
||||
link_el = item.query_selector("a")
|
||||
href = link_el.get_attribute("href") if link_el else ""
|
||||
if href and not href.startswith("http"):
|
||||
# Relative link, construct full URL
|
||||
base = url.split("/music")[0]
|
||||
href = base + href
|
||||
|
||||
artist_el = item.query_selector(".artist")
|
||||
artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else ""
|
||||
|
||||
# Use page artist if item artist is missing
|
||||
if not artist and page_artist:
|
||||
artist = page_artist
|
||||
|
||||
# Sanitize artist
|
||||
artist = artist.replace("\n", " ").replace("\r", "")
|
||||
artist = " ".join(artist.split())
|
||||
|
||||
columns = [
|
||||
("Name", title),
|
||||
("Artist", artist),
|
||||
("Type", "Album/Track")
|
||||
]
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="bandcamp",
|
||||
title=title,
|
||||
target=href,
|
||||
full_metadata={"artist": artist},
|
||||
columns=columns
|
||||
))
|
||||
except Exception as e:
|
||||
debug(f"[Bandcamp] Error parsing grid item: {e}")
|
||||
continue
|
||||
return results
|
||||
|
||||
# Check for track list (Album page)
|
||||
track_rows = page.query_selector_all(".track_row_view")
|
||||
if track_rows:
|
||||
debug(f"[Bandcamp] Found {len(track_rows)} track rows")
|
||||
# Get Album Artist
|
||||
artist_el = page.query_selector("#name-section h3 span a")
|
||||
album_artist = artist_el.inner_text().strip() if artist_el else "Unknown"
|
||||
|
||||
for row in track_rows:
|
||||
if len(results) >= limit:
|
||||
break
|
||||
try:
|
||||
title_el = row.query_selector(".track-title")
|
||||
# Sanitize title
|
||||
title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown"
|
||||
title = " ".join(title.split())
|
||||
|
||||
# Track link
|
||||
link_el = row.query_selector(".title a")
|
||||
href = link_el.get_attribute("href") if link_el else ""
|
||||
if href and not href.startswith("http"):
|
||||
base = url.split(".com")[0] + ".com"
|
||||
href = base + href
|
||||
|
||||
duration_el = row.query_selector(".time")
|
||||
duration = duration_el.inner_text().strip() if duration_el else ""
|
||||
|
||||
columns = [
|
||||
("Name", title),
|
||||
("Artist", album_artist),
|
||||
("Duration", duration)
|
||||
]
|
||||
|
||||
results.append(SearchResult(
|
||||
origin="bandcamp",
|
||||
title=title,
|
||||
target=href,
|
||||
full_metadata={"artist": album_artist, "duration": duration},
|
||||
columns=columns
|
||||
))
|
||||
except Exception as e:
|
||||
debug(f"[Bandcamp] Error parsing track row: {e}")
|
||||
continue
|
||||
return results
|
||||
|
||||
debug("[Bandcamp] No recognizable items found on page")
|
||||
return []
|
||||
|
||||
def get_result_args(self) -> List[str]:
|
||||
return ["-url"]
|
||||
|
||||
|
||||
# Provider registry
|
||||
_PROVIDERS = {
|
||||
"bandcamp": BandcampProvider,
|
||||
"local": LocalStorageProvider,
|
||||
"libgen": LibGenProvider,
|
||||
"soulseek": SoulSeekProvider,
|
||||
|
||||
Reference in New Issue
Block a user