df
This commit is contained in:
@@ -8,8 +8,13 @@ import requests
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
try: # Optional dependency for IMDb scraping
|
||||
from imdbinfo.services import search_title # type: ignore
|
||||
except ImportError: # pragma: no cover - optional
|
||||
search_title = None # type: ignore[assignment]
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.metadata import imdb_tag
|
||||
|
||||
try: # Optional dependency
|
||||
import musicbrainzngs # type: ignore
|
||||
@@ -607,6 +612,139 @@ class MusicBrainzMetadataProvider(MetadataProvider):
|
||||
return tags
|
||||
|
||||
|
||||
class ImdbMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider for IMDb titles (movies/series/episodes)."""
|
||||
|
||||
@property
|
||||
def name(self) -> str: # type: ignore[override]
|
||||
return "imdb"
|
||||
|
||||
@staticmethod
|
||||
def _extract_imdb_id(text: str) -> str:
|
||||
raw = str(text or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
|
||||
# Exact tt123 pattern
|
||||
m = re.search(r"(tt\d+)", raw, re.IGNORECASE)
|
||||
if m:
|
||||
imdb_id = m.group(1).lower()
|
||||
return imdb_id if imdb_id.startswith("tt") else f"tt{imdb_id}"
|
||||
|
||||
# Bare numeric IDs (e.g., "0118883")
|
||||
if raw.isdigit() and len(raw) >= 6:
|
||||
return f"tt{raw}"
|
||||
|
||||
# Last-resort: extract first digit run
|
||||
m_digits = re.search(r"(\d{6,})", raw)
|
||||
if m_digits:
|
||||
return f"tt{m_digits.group(1)}"
|
||||
|
||||
return ""
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
imdb_id = self._extract_imdb_id(q)
|
||||
if imdb_id:
|
||||
try:
|
||||
data = imdb_tag(imdb_id)
|
||||
raw_tags = data.get("tag") if isinstance(data, dict) else []
|
||||
title = None
|
||||
year = None
|
||||
if isinstance(raw_tags, list):
|
||||
for tag in raw_tags:
|
||||
if not isinstance(tag, str):
|
||||
continue
|
||||
if tag.startswith("title:"):
|
||||
title = tag.split(":", 1)[1]
|
||||
elif tag.startswith("year:"):
|
||||
year = tag.split(":", 1)[1]
|
||||
return [
|
||||
{
|
||||
"title": title or imdb_id,
|
||||
"artist": "",
|
||||
"album": "",
|
||||
"year": str(year or ""),
|
||||
"provider": self.name,
|
||||
"imdb_id": imdb_id,
|
||||
"raw": data,
|
||||
}
|
||||
]
|
||||
except Exception as exc:
|
||||
log(f"IMDb lookup failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if search_title is None:
|
||||
log("imdbinfo is not installed; skipping IMDb scrape", file=sys.stderr)
|
||||
return []
|
||||
|
||||
try:
|
||||
search_result = search_title(q)
|
||||
titles = getattr(search_result, "titles", None) or []
|
||||
except Exception as exc:
|
||||
log(f"IMDb search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
items: List[Dict[str, Any]] = []
|
||||
for entry in titles[:limit]:
|
||||
imdb_id = self._extract_imdb_id(
|
||||
getattr(entry, "imdb_id", None)
|
||||
or getattr(entry, "imdbId", None)
|
||||
or getattr(entry, "id", None)
|
||||
)
|
||||
title = getattr(entry, "title", "") or getattr(entry, "title_localized", "")
|
||||
year = str(getattr(entry, "year", "") or "")[:4]
|
||||
kind = getattr(entry, "kind", "") or ""
|
||||
rating = getattr(entry, "rating", None)
|
||||
items.append(
|
||||
{
|
||||
"title": title,
|
||||
"artist": "",
|
||||
"album": kind,
|
||||
"year": year,
|
||||
"provider": self.name,
|
||||
"imdb_id": imdb_id,
|
||||
"kind": kind,
|
||||
"rating": rating,
|
||||
"raw": entry,
|
||||
}
|
||||
)
|
||||
return items
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
imdb_id = self._extract_imdb_id(
|
||||
item.get("imdb_id") or item.get("id") or item.get("imdb") or ""
|
||||
)
|
||||
try:
|
||||
if imdb_id:
|
||||
data = imdb_tag(imdb_id)
|
||||
raw_tags = data.get("tag") if isinstance(data, dict) else []
|
||||
tags = [t for t in raw_tags if isinstance(t, str)]
|
||||
if tags:
|
||||
return tags
|
||||
except Exception as exc:
|
||||
log(f"IMDb tag extraction failed: {exc}", file=sys.stderr)
|
||||
|
||||
tags = super().to_tags(item)
|
||||
if imdb_id:
|
||||
tags.append(f"imdb:{imdb_id}")
|
||||
seen: set[str] = set()
|
||||
deduped: List[str] = []
|
||||
for t in tags:
|
||||
s = str(t or "").strip()
|
||||
if not s:
|
||||
continue
|
||||
k = s.lower()
|
||||
if k in seen:
|
||||
continue
|
||||
seen.add(k)
|
||||
deduped.append(s)
|
||||
return deduped
|
||||
|
||||
|
||||
class YtdlpMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider that extracts tags from a supported URL using yt-dlp.
|
||||
|
||||
@@ -764,6 +902,7 @@ _METADATA_PROVIDERS: Dict[str,
|
||||
"google": GoogleBooksMetadataProvider,
|
||||
"isbnsearch": ISBNsearchMetadataProvider,
|
||||
"musicbrainz": MusicBrainzMetadataProvider,
|
||||
"imdb": ImdbMetadataProvider,
|
||||
"ytdlp": YtdlpMetadataProvider,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user