This commit is contained in:
2025-12-31 05:17:37 -08:00
parent 3bbaa28fb4
commit e8842ceded
10 changed files with 1255 additions and 29 deletions

View File

@@ -8,8 +8,13 @@ import requests
import sys
import json
import subprocess
try: # Optional dependency for IMDb scraping
from imdbinfo.services import search_title # type: ignore
except ImportError: # pragma: no cover - optional
search_title = None # type: ignore[assignment]
from SYS.logger import log, debug
from SYS.metadata import imdb_tag
try: # Optional dependency
import musicbrainzngs # type: ignore
@@ -607,6 +612,139 @@ class MusicBrainzMetadataProvider(MetadataProvider):
return tags
class ImdbMetadataProvider(MetadataProvider):
"""Metadata provider for IMDb titles (movies/series/episodes)."""
@property
def name(self) -> str: # type: ignore[override]
return "imdb"
@staticmethod
def _extract_imdb_id(text: str) -> str:
raw = str(text or "").strip()
if not raw:
return ""
# Exact tt123 pattern
m = re.search(r"(tt\d+)", raw, re.IGNORECASE)
if m:
imdb_id = m.group(1).lower()
return imdb_id if imdb_id.startswith("tt") else f"tt{imdb_id}"
# Bare numeric IDs (e.g., "0118883")
if raw.isdigit() and len(raw) >= 6:
return f"tt{raw}"
# Last-resort: extract first digit run
m_digits = re.search(r"(\d{6,})", raw)
if m_digits:
return f"tt{m_digits.group(1)}"
return ""
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
q = (query or "").strip()
if not q:
return []
imdb_id = self._extract_imdb_id(q)
if imdb_id:
try:
data = imdb_tag(imdb_id)
raw_tags = data.get("tag") if isinstance(data, dict) else []
title = None
year = None
if isinstance(raw_tags, list):
for tag in raw_tags:
if not isinstance(tag, str):
continue
if tag.startswith("title:"):
title = tag.split(":", 1)[1]
elif tag.startswith("year:"):
year = tag.split(":", 1)[1]
return [
{
"title": title or imdb_id,
"artist": "",
"album": "",
"year": str(year or ""),
"provider": self.name,
"imdb_id": imdb_id,
"raw": data,
}
]
except Exception as exc:
log(f"IMDb lookup failed: {exc}", file=sys.stderr)
return []
if search_title is None:
log("imdbinfo is not installed; skipping IMDb scrape", file=sys.stderr)
return []
try:
search_result = search_title(q)
titles = getattr(search_result, "titles", None) or []
except Exception as exc:
log(f"IMDb search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for entry in titles[:limit]:
imdb_id = self._extract_imdb_id(
getattr(entry, "imdb_id", None)
or getattr(entry, "imdbId", None)
or getattr(entry, "id", None)
)
title = getattr(entry, "title", "") or getattr(entry, "title_localized", "")
year = str(getattr(entry, "year", "") or "")[:4]
kind = getattr(entry, "kind", "") or ""
rating = getattr(entry, "rating", None)
items.append(
{
"title": title,
"artist": "",
"album": kind,
"year": year,
"provider": self.name,
"imdb_id": imdb_id,
"kind": kind,
"rating": rating,
"raw": entry,
}
)
return items
def to_tags(self, item: Dict[str, Any]) -> List[str]:
imdb_id = self._extract_imdb_id(
item.get("imdb_id") or item.get("id") or item.get("imdb") or ""
)
try:
if imdb_id:
data = imdb_tag(imdb_id)
raw_tags = data.get("tag") if isinstance(data, dict) else []
tags = [t for t in raw_tags if isinstance(t, str)]
if tags:
return tags
except Exception as exc:
log(f"IMDb tag extraction failed: {exc}", file=sys.stderr)
tags = super().to_tags(item)
if imdb_id:
tags.append(f"imdb:{imdb_id}")
seen: set[str] = set()
deduped: List[str] = []
for t in tags:
s = str(t or "").strip()
if not s:
continue
k = s.lower()
if k in seen:
continue
seen.add(k)
deduped.append(s)
return deduped
class YtdlpMetadataProvider(MetadataProvider):
"""Metadata provider that extracts tags from a supported URL using yt-dlp.
@@ -764,6 +902,7 @@ _METADATA_PROVIDERS: Dict[str,
"google": GoogleBooksMetadataProvider,
"isbnsearch": ISBNsearchMetadataProvider,
"musicbrainz": MusicBrainzMetadataProvider,
"imdb": ImdbMetadataProvider,
"ytdlp": YtdlpMetadataProvider,
}