pre-migration commit
This commit is contained in:
-2146
File diff suppressed because it is too large
Load Diff
-2457
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,373 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
from tool.playwright import PlaywrightTool
|
||||
|
||||
|
||||
class Bandcamp(Provider):
|
||||
"""Search provider for Bandcamp."""
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
"bandcamp": ["download-file"],
|
||||
}
|
||||
AUTO_STAGE_USE_SELECTION_ARGS = True
|
||||
|
||||
@staticmethod
|
||||
def _download_selection_args(target_url: str, media_type: str) -> Optional[List[str]]:
|
||||
target = str(target_url or "").strip()
|
||||
kind = str(media_type or "").strip().lower()
|
||||
if not target or kind == "artist":
|
||||
return None
|
||||
return ["-url", target]
|
||||
|
||||
@staticmethod
|
||||
def _base_url(raw_url: str) -> str:
|
||||
"""Normalize a Bandcamp URL down to scheme://netloc."""
|
||||
text = str(raw_url or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
try:
|
||||
parsed = urlparse(text)
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
return text
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
except Exception:
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def _discography_url(cls, raw_url: str) -> str:
|
||||
base = cls._base_url(raw_url)
|
||||
if not base:
|
||||
return ""
|
||||
# Bandcamp discography lives under /music.
|
||||
return base.rstrip("/") + "/music"
|
||||
|
||||
def _scrape_artist_page(self,
|
||||
page: Any,
|
||||
artist_url: str,
|
||||
limit: int = 50) -> List[SearchResult]:
|
||||
"""Scrape an artist page for albums/tracks (discography)."""
|
||||
base = self._base_url(artist_url)
|
||||
discography_url = self._discography_url(artist_url)
|
||||
if not base or not discography_url:
|
||||
return []
|
||||
|
||||
debug(f"[bandcamp] Scraping artist page: {discography_url}")
|
||||
page.goto(discography_url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
results: List[SearchResult] = []
|
||||
cards = page.query_selector_all("li.music-grid-item") or []
|
||||
if not cards:
|
||||
# Fallback selector
|
||||
cards = page.query_selector_all(".music-grid-item") or []
|
||||
|
||||
for item in cards[:limit]:
|
||||
try:
|
||||
link = item.query_selector("a")
|
||||
if not link:
|
||||
continue
|
||||
|
||||
href = link.get_attribute("href") or ""
|
||||
href = str(href).strip()
|
||||
if not href:
|
||||
continue
|
||||
|
||||
if href.startswith("/"):
|
||||
target = base.rstrip("/") + href
|
||||
elif href.startswith("http://") or href.startswith("https://"):
|
||||
target = href
|
||||
else:
|
||||
target = base.rstrip("/") + "/" + href
|
||||
|
||||
title_node = item.query_selector("p.title"
|
||||
) or item.query_selector(".title")
|
||||
title = title_node.inner_text().strip() if title_node else ""
|
||||
if title:
|
||||
title = " ".join(title.split())
|
||||
if not title:
|
||||
title = target.rsplit("/", 1)[-1]
|
||||
|
||||
kind = (
|
||||
"album" if "/album/" in target else
|
||||
("track" if "/track/" in target else "item")
|
||||
)
|
||||
selection_args = self._download_selection_args(target, kind)
|
||||
selection_action = (["download-file"] + selection_args) if selection_args else None
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="bandcamp",
|
||||
title=title,
|
||||
path=target,
|
||||
detail="",
|
||||
annotations=[kind],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Title",
|
||||
title),
|
||||
("Type",
|
||||
kind),
|
||||
("Url",
|
||||
target),
|
||||
],
|
||||
full_metadata={
|
||||
"type": kind,
|
||||
"url": target,
|
||||
"artist_url": base,
|
||||
},
|
||||
selection_args=selection_args,
|
||||
selection_action=selection_action,
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
debug(f"[bandcamp] Error parsing artist item: {exc}")
|
||||
|
||||
return results
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any
|
||||
) -> bool:
|
||||
"""Handle Bandcamp `@N` selection.
|
||||
|
||||
If the selected item is an ARTIST result, selecting it auto-expands into
|
||||
a discography table by scraping the artist URL.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
# Playwright is required; proceed to handle artist selection
|
||||
|
||||
# Only handle artist selections.
|
||||
chosen: List[Dict[str, Any]] = []
|
||||
for item in selected_items or []:
|
||||
payload: Dict[str,
|
||||
Any] = {}
|
||||
if isinstance(item, dict):
|
||||
payload = item
|
||||
else:
|
||||
try:
|
||||
if hasattr(item, "to_dict"):
|
||||
payload = item.to_dict() # type: ignore[assignment]
|
||||
except Exception:
|
||||
payload = {}
|
||||
if not payload:
|
||||
try:
|
||||
payload = {
|
||||
"title": getattr(item,
|
||||
"title",
|
||||
None),
|
||||
"url": getattr(item,
|
||||
"url",
|
||||
None),
|
||||
"path": getattr(item,
|
||||
"path",
|
||||
None),
|
||||
"metadata": getattr(item,
|
||||
"metadata",
|
||||
None),
|
||||
"extra": getattr(item,
|
||||
"extra",
|
||||
None),
|
||||
}
|
||||
except Exception:
|
||||
payload = {}
|
||||
|
||||
meta = payload.get("metadata") or payload.get("full_metadata") or {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
extra = payload.get("extra")
|
||||
if isinstance(extra, dict):
|
||||
meta = {
|
||||
**meta,
|
||||
**extra
|
||||
}
|
||||
|
||||
type_val = str(meta.get("type") or "").strip().lower()
|
||||
if type_val != "artist":
|
||||
continue
|
||||
|
||||
title = str(payload.get("title") or "").strip()
|
||||
url_val = str(
|
||||
payload.get("url") or payload.get("path") or meta.get("url") or ""
|
||||
).strip()
|
||||
base = self._base_url(url_val)
|
||||
if not base:
|
||||
continue
|
||||
|
||||
chosen.append(
|
||||
{
|
||||
"title": title,
|
||||
"url": base,
|
||||
"location": str(meta.get("artist") or "").strip()
|
||||
}
|
||||
)
|
||||
|
||||
if not chosen:
|
||||
return False
|
||||
|
||||
# Build a new table from artist discography.
|
||||
try:
|
||||
from SYS.result_table import Table
|
||||
from SYS.rich_display import stdout_console
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
artist_title = chosen[0].get("title") or "artist"
|
||||
artist_url = chosen[0].get("url") or ""
|
||||
|
||||
try:
|
||||
tool = PlaywrightTool({})
|
||||
tool.require()
|
||||
with tool.open_page(headless=True) as page:
|
||||
discography = self._scrape_artist_page(page, artist_url, limit=50)
|
||||
except Exception as exc:
|
||||
print(f"bandcamp artist lookup failed: {exc}\n")
|
||||
return True
|
||||
|
||||
table = Table(f"Bandcamp: artist:{artist_title}")._perseverance(True)
|
||||
table.set_table("bandcamp")
|
||||
try:
|
||||
table.set_value_case("preserve")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results_payload: List[Dict[str, Any]] = []
|
||||
for r in discography:
|
||||
table.add_result(r)
|
||||
try:
|
||||
results_payload.append(r.to_dict())
|
||||
except Exception:
|
||||
results_payload.append(
|
||||
{
|
||||
"table": "bandcamp",
|
||||
"title": getattr(r,
|
||||
"title",
|
||||
""),
|
||||
"path": getattr(r,
|
||||
"path",
|
||||
""),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
ctx.set_last_result_table(table, results_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
stdout_console().print()
|
||||
stdout_console().print(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
try:
|
||||
tool = PlaywrightTool({})
|
||||
tool.require()
|
||||
with tool.open_page(headless=True) as page:
|
||||
if query.strip().lower().startswith("artist:"):
|
||||
artist_name = query[7:].strip().strip('"')
|
||||
search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b"
|
||||
else:
|
||||
search_url = f"https://bandcamp.com/search?q={query}&item_type=a"
|
||||
|
||||
results = self._scrape_url(page, search_url, limit)
|
||||
return results
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[bandcamp] Search error: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def _scrape_url(self, page: Any, url: str, limit: int) -> List[SearchResult]:
|
||||
debug(f"[bandcamp] Scraping: {url}")
|
||||
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("domcontentloaded")
|
||||
|
||||
results: List[SearchResult] = []
|
||||
|
||||
search_results = page.query_selector_all(".searchresult")
|
||||
if not search_results:
|
||||
return results
|
||||
|
||||
for item in search_results[:limit]:
|
||||
try:
|
||||
heading = item.query_selector(".heading")
|
||||
if not heading:
|
||||
continue
|
||||
|
||||
link = heading.query_selector("a")
|
||||
if not link:
|
||||
continue
|
||||
|
||||
title = link.inner_text().strip()
|
||||
target_url = link.get_attribute("href")
|
||||
base_url = self._base_url(str(target_url or ""))
|
||||
|
||||
subhead = item.query_selector(".subhead")
|
||||
artist = subhead.inner_text().strip() if subhead else "Unknown"
|
||||
|
||||
itemtype = item.query_selector(".itemtype")
|
||||
media_type = itemtype.inner_text().strip() if itemtype else "album"
|
||||
selection_args = self._download_selection_args(str(target_url or ""), media_type)
|
||||
selection_action = (["download-file"] + selection_args) if selection_args else None
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="bandcamp",
|
||||
title=title,
|
||||
path=target_url,
|
||||
detail=f"By: {artist}",
|
||||
annotations=[media_type],
|
||||
media_kind="audio",
|
||||
columns=[
|
||||
("Title",
|
||||
title),
|
||||
("Location",
|
||||
artist),
|
||||
("Type",
|
||||
media_type),
|
||||
("Url",
|
||||
str(target_url or "")),
|
||||
],
|
||||
full_metadata={
|
||||
"artist": artist,
|
||||
"type": media_type,
|
||||
"url": str(target_url or ""),
|
||||
"artist_url": base_url,
|
||||
},
|
||||
selection_args=selection_args,
|
||||
selection_action=selection_action,
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
debug(f"[bandcamp] Error parsing result: {exc}")
|
||||
|
||||
return results
|
||||
|
||||
def validate(self) -> bool:
|
||||
# Playwright is required for the provider to function
|
||||
return True
|
||||
@@ -1,231 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
def _pick_provider_config(config: Any) -> Dict[str, Any]:
|
||||
if not isinstance(config, dict):
|
||||
return {}
|
||||
provider = config.get("provider")
|
||||
if not isinstance(provider, dict):
|
||||
return {}
|
||||
entry = provider.get("file.io")
|
||||
if isinstance(entry, dict):
|
||||
return entry
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_link(payload: Any) -> Optional[str]:
|
||||
if isinstance(payload, dict):
|
||||
for key in ("link", "url", "downloadLink", "download_url"):
|
||||
val = payload.get(key)
|
||||
if isinstance(val, str) and val.strip().startswith(("http://", "https://")):
|
||||
return val.strip()
|
||||
for nested_key in ("data", "file", "result"):
|
||||
nested = payload.get(nested_key)
|
||||
found = _extract_link(nested)
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
|
||||
|
||||
def _extract_key(payload: Any) -> Optional[str]:
|
||||
if isinstance(payload, dict):
|
||||
for key in ("key", "id", "uuid"):
|
||||
val = payload.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
return val.strip()
|
||||
for nested_key in ("data", "file", "result"):
|
||||
nested = payload.get(nested_key)
|
||||
found = _extract_key(nested)
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
|
||||
|
||||
class FileIO(Provider):
|
||||
"""File provider for file.io."""
|
||||
PLUGIN_NAME = "file.io"
|
||||
|
||||
@classmethod
|
||||
def config_schema(cls) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"key": "api_key",
|
||||
"label": "API Key",
|
||||
"default": "",
|
||||
"secret": True
|
||||
},
|
||||
{
|
||||
"key": "expires",
|
||||
"label": "Default Expiration (e.g. 1w)",
|
||||
"default": "1w"
|
||||
},
|
||||
{
|
||||
"key": "maxDownloads",
|
||||
"label": "Max Downloads",
|
||||
"default": 1
|
||||
},
|
||||
{
|
||||
"key": "autoDelete",
|
||||
"label": "Auto Delete",
|
||||
"default": True
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
conf = _pick_provider_config(self.config)
|
||||
self._base_url = str(conf.get("base_url")
|
||||
or "https://file.io").strip().rstrip("/")
|
||||
self._api_key = conf.get("api_key")
|
||||
self._default_expires = conf.get("expires")
|
||||
self._default_max_downloads = conf.get("maxDownloads")
|
||||
if self._default_max_downloads is None:
|
||||
self._default_max_downloads = conf.get("max_downloads")
|
||||
self._default_auto_delete = conf.get("autoDelete")
|
||||
if self._default_auto_delete is None:
|
||||
self._default_auto_delete = conf.get("auto_delete")
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from API.HTTP import HTTPClient
|
||||
from SYS.models import ProgressFileReader
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
data: Dict[str,
|
||||
Any] = {}
|
||||
expires = kwargs.get("expires", self._default_expires)
|
||||
max_downloads = kwargs.get(
|
||||
"maxDownloads",
|
||||
kwargs.get("max_downloads",
|
||||
self._default_max_downloads)
|
||||
)
|
||||
auto_delete = kwargs.get(
|
||||
"autoDelete",
|
||||
kwargs.get("auto_delete",
|
||||
self._default_auto_delete)
|
||||
)
|
||||
|
||||
if expires not in (None, ""):
|
||||
data["expires"] = expires
|
||||
if max_downloads not in (None, ""):
|
||||
data["maxDownloads"] = max_downloads
|
||||
if auto_delete not in (None, ""):
|
||||
data["autoDelete"] = auto_delete
|
||||
|
||||
headers: Dict[str,
|
||||
str] = {
|
||||
"User-Agent": "Medeia-Macina/1.0",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
if isinstance(self._api_key, str) and self._api_key.strip():
|
||||
# Some file.io plans use bearer tokens; keep optional.
|
||||
headers["Authorization"] = f"Bearer {self._api_key.strip()}"
|
||||
|
||||
try:
|
||||
with HTTPClient(headers=headers) as client:
|
||||
with open(file_path, "rb") as handle:
|
||||
filename = os.path.basename(file_path)
|
||||
try:
|
||||
total = os.path.getsize(file_path)
|
||||
except Exception:
|
||||
total = None
|
||||
wrapped = ProgressFileReader(
|
||||
handle,
|
||||
total_bytes=total,
|
||||
label="upload"
|
||||
)
|
||||
response = client.request(
|
||||
"POST",
|
||||
f"{self._base_url}/upload",
|
||||
data=data or None,
|
||||
files={
|
||||
"file": (filename,
|
||||
wrapped)
|
||||
},
|
||||
follow_redirects=True,
|
||||
raise_for_status=False,
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
location = response.headers.get("location"
|
||||
) or response.headers.get("Location")
|
||||
ct = response.headers.get("content-type"
|
||||
) or response.headers.get("Content-Type")
|
||||
raise Exception(
|
||||
f"Upload failed: {response.status_code} (content-type={ct}, location={location}) - {response.text}"
|
||||
)
|
||||
|
||||
payload: Any
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception:
|
||||
payload = None
|
||||
|
||||
# If the server ignored our Accept header and returned HTML, this is almost
|
||||
# certainly the wrong endpoint or an upstream block.
|
||||
ct = (
|
||||
response.headers.get("content-type")
|
||||
or response.headers.get("Content-Type") or ""
|
||||
).lower()
|
||||
if (payload is None) and ("text/html" in ct):
|
||||
raise Exception(
|
||||
"file.io returned HTML instead of JSON; expected API response from /upload"
|
||||
)
|
||||
|
||||
if isinstance(payload, dict) and payload.get("success") is False:
|
||||
reason = payload.get("message"
|
||||
) or payload.get("error") or payload.get("status")
|
||||
raise Exception(str(reason or "Upload failed"))
|
||||
|
||||
uploaded_url = _extract_link(payload)
|
||||
if not uploaded_url:
|
||||
# Some APIs may return the link as plain text.
|
||||
text = str(response.text or "").strip()
|
||||
if text.startswith(("http://", "https://")):
|
||||
uploaded_url = text
|
||||
|
||||
if not uploaded_url:
|
||||
key = _extract_key(payload)
|
||||
if key:
|
||||
uploaded_url = f"{self._base_url}/{key.lstrip('/')}"
|
||||
|
||||
if not uploaded_url:
|
||||
try:
|
||||
snippet = (response.text or "").strip()
|
||||
if len(snippet) > 300:
|
||||
snippet = snippet[:300] + "..."
|
||||
except Exception:
|
||||
snippet = "<unreadable response>"
|
||||
raise Exception(
|
||||
f"Upload succeeded but response did not include a link (response: {snippet})"
|
||||
)
|
||||
|
||||
try:
|
||||
pipe_obj = kwargs.get("pipe_obj")
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(pipe_obj,
|
||||
uploaded_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return uploaded_url
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[file.io] Upload error: {exc}", file=sys.stderr)
|
||||
raise
|
||||
@@ -1,193 +0,0 @@
|
||||
"""Example plugin template for use as a starter kit.
|
||||
|
||||
This minimal plugin demonstrates the typical hooks a plugin may implement:
|
||||
- `validate()` to assert it's usable
|
||||
- `search()` to return `SearchResult` items
|
||||
- `download()` to persist a sample file (useful for local tests)
|
||||
|
||||
See `docs/provider_guide.md` for authoring guidance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
|
||||
|
||||
class HelloProvider(Provider):
|
||||
"""Very small example plugin suitable as a template.
|
||||
|
||||
- Table name: `hello`
|
||||
- Usage: `search-file -plugin hello "query"`
|
||||
- Selecting a row and piping into `download-file` will call `download()`.
|
||||
"""
|
||||
|
||||
PLUGIN_NAME = "hello"
|
||||
URL = ("hello:",)
|
||||
URL_DOMAINS = ()
|
||||
|
||||
def validate(self) -> bool:
|
||||
# No configuration required; always available for testing/demo purposes.
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
results: List[SearchResult] = []
|
||||
if not q or q in {"*", "all", "list"}:
|
||||
q = "example"
|
||||
|
||||
# Emit up to `limit` tiny example results.
|
||||
n = min(max(1, int(limit)), 3)
|
||||
for i in range(1, n + 1):
|
||||
title = f"{q} sample {i}"
|
||||
path = f"https://example.org/{q}/{i}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=title,
|
||||
path=path,
|
||||
detail="Example provider result",
|
||||
media_kind="file",
|
||||
columns=[("Example", "yes")],
|
||||
full_metadata={"example_index": i},
|
||||
)
|
||||
results.append(sr)
|
||||
|
||||
return results[: max(0, int(limit))]
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
"""Create a small text file to simulate a download.
|
||||
|
||||
This keeps the example self-contained (no network access required) and
|
||||
makes it straightforward to test provider behavior with `pytest`.
|
||||
"""
|
||||
try:
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
title = str(getattr(result, "title", "hello") or "hello").strip()
|
||||
safe = "".join(c if c.isalnum() or c in ("-", "_", ".") else "_" for c in title)
|
||||
fname = f"{safe}.txt" if safe else "hello.txt"
|
||||
dest = Path(output_dir) / fname
|
||||
try:
|
||||
dest.write_text(f"Hello from HelloProvider\nsource: {result.path}\n", encoding="utf-8")
|
||||
return dest
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any,
|
||||
) -> bool:
|
||||
"""Present a simple details table when a HelloProvider row is selected.
|
||||
|
||||
This demonstrates how providers can implement custom `@N` selection
|
||||
behavior by constructing a `ResultTable`, populating it with
|
||||
provider-specific rows, and instructing the CLI to show the table.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
def _as_payload(item: Any) -> Dict[str, Any]:
|
||||
if isinstance(item, dict):
|
||||
return dict(item)
|
||||
try:
|
||||
if hasattr(item, "to_dict"):
|
||||
maybe = item.to_dict()
|
||||
if isinstance(maybe, dict):
|
||||
return maybe
|
||||
except Exception:
|
||||
pass
|
||||
payload: Dict[str, Any] = {}
|
||||
try:
|
||||
payload = {
|
||||
"title": getattr(item, "title", None),
|
||||
"path": getattr(item, "path", None),
|
||||
"table": getattr(item, "table", None),
|
||||
"annotations": getattr(item, "annotations", None),
|
||||
"media_kind": getattr(item, "media_kind", None),
|
||||
"full_metadata": getattr(item, "full_metadata", None),
|
||||
}
|
||||
except Exception:
|
||||
payload = {}
|
||||
return payload
|
||||
|
||||
chosen: List[Dict[str, Any]] = []
|
||||
for item in selected_items or []:
|
||||
payload = _as_payload(item)
|
||||
meta = payload.get("full_metadata") or {}
|
||||
if not isinstance(meta, dict):
|
||||
meta = {}
|
||||
idx = meta.get("example_index")
|
||||
if idx is None:
|
||||
continue
|
||||
title = str(payload.get("title") or payload.get("path") or "").strip() or f"hello-{idx}"
|
||||
chosen.append({"index": idx, "title": title, "path": payload.get("path")})
|
||||
|
||||
if not chosen:
|
||||
return False
|
||||
|
||||
target = chosen[0]
|
||||
idx = target.get("index")
|
||||
title = target.get("title") or f"hello-{idx}"
|
||||
|
||||
try:
|
||||
from SYS.result_table import Table
|
||||
from SYS.rich_display import stdout_console
|
||||
except Exception:
|
||||
# If ResultTable isn't available, consider selection handled
|
||||
return True
|
||||
|
||||
table = Table(f"Hello Details: {title}")._perseverance(True)
|
||||
table.set_table("hello")
|
||||
try:
|
||||
table.set_table_metadata({"provider": "hello", "view": "details", "example_index": idx})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
table.set_source_command("download-file", [])
|
||||
|
||||
results_payload: List[Dict[str, Any]] = []
|
||||
for part in ("a", "b"):
|
||||
file_title = f"{title} - part {part}"
|
||||
file_path = f"{target.get('path')}/{part}"
|
||||
sr = SearchResult(
|
||||
table="hello",
|
||||
title=file_title,
|
||||
path=file_path,
|
||||
detail=f"Part {part}",
|
||||
media_kind="file",
|
||||
columns=[("Part", part)],
|
||||
full_metadata={"part": part, "example_index": idx},
|
||||
)
|
||||
table.add_result(sr)
|
||||
try:
|
||||
results_payload.append(sr.to_dict())
|
||||
except Exception:
|
||||
results_payload.append({"table": sr.table, "title": sr.title, "path": sr.path})
|
||||
|
||||
try:
|
||||
ctx.set_last_result_table(table, results_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
stdout_console().print()
|
||||
stdout_console().print(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
File diff suppressed because it is too large
Load Diff
-1972
File diff suppressed because it is too large
Load Diff
-147
@@ -1,147 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from API.loc import LOCClient
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.cli_syntax import get_free_text, parse_query
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
class LOC(Provider):
|
||||
"""LoC search provider.
|
||||
|
||||
Currently implements Chronicling America collection search via the LoC JSON API.
|
||||
"""
|
||||
|
||||
@property
|
||||
def preserve_order(self) -> bool:
|
||||
return True
|
||||
|
||||
URL_DOMAINS = ["www.loc.gov"]
|
||||
URL = URL_DOMAINS
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
_ = kwargs
|
||||
parsed = parse_query(query or "")
|
||||
text = get_free_text(parsed).strip()
|
||||
fields = parsed.get("fields",
|
||||
{}) if isinstance(parsed,
|
||||
dict) else {}
|
||||
|
||||
# Allow explicit q: override.
|
||||
q = str(fields.get("q") or text or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
# Pass through any extra filters supported by the LoC API.
|
||||
extra: Dict[str,
|
||||
Any] = {}
|
||||
if isinstance(filters, dict):
|
||||
extra.update(filters)
|
||||
if isinstance(fields, dict):
|
||||
for k, v in fields.items():
|
||||
if k == "q":
|
||||
continue
|
||||
extra[str(k)] = v
|
||||
|
||||
client = LOCClient()
|
||||
|
||||
results: List[SearchResult] = []
|
||||
start = 1
|
||||
page_size = 25
|
||||
try:
|
||||
if limit and limit > 0:
|
||||
page_size = max(1, min(int(limit), 50))
|
||||
|
||||
while len(results) < max(0, int(limit)):
|
||||
payload = client.search_chronicling_america(
|
||||
q,
|
||||
start=start,
|
||||
count=page_size,
|
||||
extra_params=extra
|
||||
)
|
||||
items = payload.get("results")
|
||||
if not isinstance(items, list) or not items:
|
||||
break
|
||||
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
|
||||
title = str(it.get("title") or "").strip() or "(untitled)"
|
||||
date = str(it.get("date") or "").strip()
|
||||
url = str(it.get("url") or "").strip()
|
||||
aka = it.get("aka")
|
||||
if (not url) and isinstance(aka, list) and aka:
|
||||
url = str(aka[0] or "").strip()
|
||||
|
||||
formats = it.get("online_format")
|
||||
if isinstance(formats, list):
|
||||
fmt_text = ", ".join([str(x) for x in formats if x])
|
||||
else:
|
||||
fmt_text = str(formats or "").strip()
|
||||
|
||||
partof = it.get("partof")
|
||||
if isinstance(partof, list) and partof:
|
||||
source = str(partof[-1] or "").strip()
|
||||
else:
|
||||
source = "Chronicling America"
|
||||
|
||||
detail_parts = []
|
||||
if date:
|
||||
detail_parts.append(date)
|
||||
if source:
|
||||
detail_parts.append(source)
|
||||
detail = " — ".join(detail_parts)
|
||||
|
||||
annotations: List[str] = []
|
||||
if date:
|
||||
annotations.append(date)
|
||||
if fmt_text:
|
||||
annotations.append(fmt_text)
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="loc",
|
||||
title=title,
|
||||
path=url or title,
|
||||
detail=detail,
|
||||
annotations=annotations,
|
||||
media_kind="document",
|
||||
columns=[
|
||||
("Title",
|
||||
title),
|
||||
("Date",
|
||||
date),
|
||||
("Format",
|
||||
fmt_text),
|
||||
("URL",
|
||||
url),
|
||||
],
|
||||
full_metadata=it,
|
||||
)
|
||||
)
|
||||
if len(results) >= int(limit):
|
||||
break
|
||||
|
||||
# LoC API pagination uses sp (1-based start index).
|
||||
if len(items) < page_size:
|
||||
break
|
||||
start += len(items)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[loc] search failed: {exc}")
|
||||
return []
|
||||
|
||||
return results
|
||||
@@ -1,877 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import mimetypes
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
from API.requests_client import get_requests_session
|
||||
from SYS.utils import ffprobe as probe_media_metadata
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.provider_helpers import TableProviderMixin
|
||||
from SYS.logger import log
|
||||
|
||||
_MATRIX_INIT_CHECK_CACHE: Dict[str,
|
||||
Tuple[bool,
|
||||
Optional[str]]] = {}
|
||||
|
||||
|
||||
def _sniff_mime_from_header(path: Path) -> Optional[str]:
|
||||
"""Best-effort MIME sniffing from file headers.
|
||||
|
||||
Used when the file has no/unknown extension (common for exported/temp files).
|
||||
Keeps dependencies to stdlib only.
|
||||
"""
|
||||
try:
|
||||
if not path.exists() or not path.is_file():
|
||||
return None
|
||||
with open(path, "rb") as handle:
|
||||
header = handle.read(512)
|
||||
if not header:
|
||||
return None
|
||||
|
||||
# Images
|
||||
if header.startswith(b"\xff\xd8\xff"):
|
||||
return "image/jpeg"
|
||||
if header.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
if header.startswith(b"GIF87a") or header.startswith(b"GIF89a"):
|
||||
return "image/gif"
|
||||
if header.startswith(b"BM"):
|
||||
return "image/bmp"
|
||||
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
|
||||
# Audio
|
||||
if header.startswith(b"fLaC"):
|
||||
return "audio/flac"
|
||||
if header.startswith(b"OggS"):
|
||||
# Could be audio or video; treat as audio unless extension suggests video.
|
||||
return "audio/ogg"
|
||||
if header.startswith(b"ID3"):
|
||||
return "audio/mpeg"
|
||||
if len(header) >= 2 and header[0] == 0xFF and (header[1] & 0xE0) == 0xE0:
|
||||
return "audio/mpeg"
|
||||
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"WAVE":
|
||||
return "audio/wav"
|
||||
|
||||
# Video
|
||||
if header.startswith(b"RIFF") and len(header) >= 12 and header[8:12] == b"AVI ":
|
||||
return "video/x-msvideo"
|
||||
if header.startswith(b"\x1a\x45\xdf\xa3"):
|
||||
# EBML container: Matroska/WebM.
|
||||
return "video/x-matroska"
|
||||
if len(header) >= 12 and header[4:8] == b"ftyp":
|
||||
# ISO BMFF: mp4/mov/m4a. Default to mp4; extension can refine.
|
||||
return "video/mp4"
|
||||
# MPEG-TS / M2TS (sync byte every 188 bytes)
|
||||
try:
|
||||
if path.stat().st_size >= 188 * 2 and header[0] == 0x47:
|
||||
with open(path, "rb") as handle:
|
||||
handle.seek(188)
|
||||
b = handle.read(1)
|
||||
if b == b"\x47":
|
||||
return "video/mp2t"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _classify_matrix_upload(path: Path,
|
||||
*,
|
||||
explicit_mime_type: Optional[str] = None) -> Tuple[str,
|
||||
str]:
|
||||
"""Return (mime_type, msgtype) for Matrix uploads."""
|
||||
mime_type = str(explicit_mime_type or "").strip() or None
|
||||
|
||||
if not mime_type:
|
||||
# `mimetypes.guess_type` expects a string/URL; Path can return None on some platforms.
|
||||
mime_type, _ = mimetypes.guess_type(str(path))
|
||||
|
||||
if not mime_type:
|
||||
mime_type = _sniff_mime_from_header(path)
|
||||
|
||||
# Refinements based on extension for ambiguous containers.
|
||||
ext = path.suffix.lower()
|
||||
if ext in {".m4a",
|
||||
".aac"}:
|
||||
mime_type = mime_type or "audio/mp4"
|
||||
if ext in {".mkv",
|
||||
".webm"}:
|
||||
mime_type = mime_type or "video/x-matroska"
|
||||
if ext in {".ogv"}:
|
||||
mime_type = mime_type or "video/ogg"
|
||||
|
||||
msgtype = "m.file"
|
||||
if mime_type:
|
||||
mt = mime_type.casefold()
|
||||
if mt.startswith("image/"):
|
||||
msgtype = "m.image"
|
||||
elif mt.startswith("audio/"):
|
||||
msgtype = "m.audio"
|
||||
elif mt.startswith("video/"):
|
||||
msgtype = "m.video"
|
||||
|
||||
# Final fallback for unknown MIME types.
|
||||
if msgtype == "m.file":
|
||||
audio_exts = {
|
||||
".mp3",
|
||||
".flac",
|
||||
".wav",
|
||||
".m4a",
|
||||
".aac",
|
||||
".ogg",
|
||||
".opus",
|
||||
".wma",
|
||||
".mka",
|
||||
".alac",
|
||||
}
|
||||
video_exts = {
|
||||
".mp4",
|
||||
".mkv",
|
||||
".webm",
|
||||
".mov",
|
||||
".avi",
|
||||
".flv",
|
||||
".mpg",
|
||||
".mpeg",
|
||||
".ts",
|
||||
".m4v",
|
||||
".wmv",
|
||||
".m2ts",
|
||||
".mts",
|
||||
".3gp",
|
||||
".ogv",
|
||||
}
|
||||
image_exts = {".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".tiff"}
|
||||
if ext in audio_exts:
|
||||
msgtype = "m.audio"
|
||||
elif ext in video_exts:
|
||||
msgtype = "m.video"
|
||||
elif ext in image_exts:
|
||||
msgtype = "m.image"
|
||||
|
||||
return (mime_type or "application/octet-stream"), msgtype
|
||||
|
||||
|
||||
def _build_matrix_media_info(path: Path, *, mime_type: str, msgtype: str) -> Dict[str, Any]:
|
||||
"""Build Matrix `info` payload with dimensions/duration when available.
|
||||
|
||||
Matrix clients use `info.w`/`info.h` to size image/video placeholders. Supplying
|
||||
these fields keeps the preview aspect ratio aligned with the actual media.
|
||||
"""
|
||||
info: Dict[str, Any] = {
|
||||
"mimetype": str(mime_type or "application/octet-stream"),
|
||||
"size": int(path.stat().st_size),
|
||||
}
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
try:
|
||||
metadata = probe_media_metadata(str(path)) or {}
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
width: Optional[int] = None
|
||||
height: Optional[int] = None
|
||||
|
||||
try:
|
||||
raw_w = metadata.get("width") if isinstance(metadata, dict) else None
|
||||
if isinstance(raw_w, (int, float)) and raw_w > 0:
|
||||
width = int(raw_w)
|
||||
except Exception:
|
||||
width = None
|
||||
|
||||
try:
|
||||
raw_h = metadata.get("height") if isinstance(metadata, dict) else None
|
||||
if isinstance(raw_h, (int, float)) and raw_h > 0:
|
||||
height = int(raw_h)
|
||||
except Exception:
|
||||
height = None
|
||||
|
||||
# Fallback for images when ffprobe metadata is unavailable.
|
||||
if msgtype == "m.image" and (width is None or height is None):
|
||||
try:
|
||||
from PIL import Image # type: ignore
|
||||
|
||||
with Image.open(path) as img:
|
||||
iw, ih = img.size
|
||||
if isinstance(iw, int) and iw > 0:
|
||||
width = iw
|
||||
if isinstance(ih, int) and ih > 0:
|
||||
height = ih
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if msgtype in {"m.image", "m.video"}:
|
||||
if width is not None:
|
||||
info["w"] = width
|
||||
if height is not None:
|
||||
info["h"] = height
|
||||
|
||||
if msgtype in {"m.video", "m.audio"}:
|
||||
try:
|
||||
raw_duration = metadata.get("duration") if isinstance(metadata, dict) else None
|
||||
if isinstance(raw_duration, (int, float)) and raw_duration > 0:
|
||||
info["duration"] = int(round(float(raw_duration) * 1000.0))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def _normalize_homeserver(value: str) -> str:
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return ""
|
||||
if not text.startswith("http"):
|
||||
text = f"https://{text}"
|
||||
return text.rstrip("/")
|
||||
|
||||
|
||||
def _matrix_health_check(*,
|
||||
homeserver: str,
|
||||
access_token: Optional[str]) -> Tuple[bool,
|
||||
Optional[str]]:
|
||||
"""Lightweight Matrix reachability/auth validation.
|
||||
|
||||
- Always checks `/versions` (no auth).
|
||||
- If `access_token` is present, also checks `/whoami`.
|
||||
"""
|
||||
try:
|
||||
base = _normalize_homeserver(homeserver)
|
||||
if not base:
|
||||
return False, "Matrix homeserver missing"
|
||||
|
||||
resp = get_requests_session().get(f"{base}/_matrix/client/versions", timeout=5)
|
||||
if resp.status_code != 200:
|
||||
return False, f"Homeserver returned {resp.status_code}"
|
||||
|
||||
if access_token:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}"
|
||||
}
|
||||
resp = get_requests_session().get(
|
||||
f"{base}/_matrix/client/v3/account/whoami",
|
||||
headers=headers,
|
||||
timeout=5
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return False, f"Authentication failed: {resp.status_code}"
|
||||
|
||||
return True, None
|
||||
except Exception as exc:
|
||||
return False, str(exc)
|
||||
|
||||
|
||||
class Matrix(TableProviderMixin, Provider):
|
||||
"""Matrix (Element) room provider with file uploads and selection.
|
||||
|
||||
This provider uses the new table system (strict ResultTable adapter pattern) for
|
||||
consistent room listing and selection. It exposes Matrix joined rooms as selectable
|
||||
rows with metadata enrichment for:
|
||||
- room_id: Unique Matrix room identifier
|
||||
- room_name: Human-readable room display name
|
||||
- _selection_args: For @N expansion control and upload routing
|
||||
|
||||
KEY FEATURES:
|
||||
- Table system: Using ResultTable adapter for strict column/metadata handling
|
||||
- Room discovery: search() or list_rooms() to enumerate joined rooms
|
||||
- Selection integration: @N selection triggers upload_to_room() via selector()
|
||||
- Deferred uploads: Files can be queued for upload to multiple rooms
|
||||
- MIME detection: Automatic content type classification for Matrix msgtype
|
||||
|
||||
SELECTION FLOW:
|
||||
1. User runs: search-file -plugin matrix "room" (or .matrix -list-rooms)
|
||||
2. Results show available joined rooms
|
||||
3. User selects rooms: @1 @2 (or @1,2)
|
||||
4. Selection triggers upload of pending files to selected rooms
|
||||
"""
|
||||
|
||||
EXPOSE_AS_FILE_PROVIDER = False
|
||||
|
||||
@classmethod
|
||||
def config_schema(cls) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"key": "homeserver",
|
||||
"label": "Homeserver URL",
|
||||
"default": "https://matrix.org",
|
||||
"required": True
|
||||
},
|
||||
{
|
||||
"key": "access_token",
|
||||
"label": "Access Token",
|
||||
"default": "",
|
||||
"secret": True
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self._init_ok: Optional[bool] = None
|
||||
self._init_reason: Optional[str] = None
|
||||
|
||||
matrix_conf = (
|
||||
self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{}) if isinstance(self.config,
|
||||
dict) else {}
|
||||
)
|
||||
homeserver = matrix_conf.get("homeserver")
|
||||
access_token = matrix_conf.get("access_token")
|
||||
# Not configured: keep instance but mark invalid via validate().
|
||||
# Note: `room_id` is intentionally NOT required, since the CLI can prompt
|
||||
# the user to select a room dynamically.
|
||||
if not (homeserver and access_token):
|
||||
self._init_ok = None
|
||||
self._init_reason = None
|
||||
return
|
||||
|
||||
cache_key = f"{_normalize_homeserver(str(homeserver))}|has_token:{bool(access_token)}"
|
||||
cached = _MATRIX_INIT_CHECK_CACHE.get(cache_key)
|
||||
if cached is None:
|
||||
ok, reason = _matrix_health_check(
|
||||
homeserver=str(homeserver), access_token=str(access_token) if access_token else None
|
||||
)
|
||||
_MATRIX_INIT_CHECK_CACHE[cache_key] = (ok, reason)
|
||||
else:
|
||||
ok, reason = cached
|
||||
|
||||
self._init_ok = ok
|
||||
self._init_reason = reason
|
||||
if not ok:
|
||||
raise Exception(reason or "Matrix unavailable")
|
||||
|
||||
def validate(self) -> bool:
|
||||
if not self.config:
|
||||
return False
|
||||
if self._init_ok is False:
|
||||
return False
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
return bool(
|
||||
matrix_conf.get("homeserver")
|
||||
and matrix_conf.get("access_token")
|
||||
)
|
||||
|
||||
def status_summary(self) -> Dict[str, Any]:
|
||||
matrix_conf = self.config.get("provider", {}).get("matrix", {}) if isinstance(self.config, dict) else {}
|
||||
homeserver = str(matrix_conf.get("homeserver") or "").strip()
|
||||
room_id = str(matrix_conf.get("room_id") or "").strip()
|
||||
detail = homeserver
|
||||
if room_id:
|
||||
detail = (detail + (" " if detail else "")) + f"room:{room_id}"
|
||||
enabled = bool(self.validate())
|
||||
return {
|
||||
"status": "ENABLED" if enabled else "DISABLED",
|
||||
"name": self.label,
|
||||
"plugin": self.name,
|
||||
"detail": detail or ("Connected" if enabled else "Not configured"),
|
||||
}
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
"""Search/list joined Matrix rooms.
|
||||
|
||||
If query is empty or "*", returns all joined rooms.
|
||||
Otherwise, filters rooms by name/ID matching the query.
|
||||
"""
|
||||
try:
|
||||
rooms = self.list_rooms()
|
||||
except Exception as exc:
|
||||
log(f"[matrix] Failed to list rooms: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
q = (query or "").strip().lower()
|
||||
needle = "" if q in {"*", "all", "list"} else q
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for room in rooms:
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
room_id = room.get("room_id") or ""
|
||||
room_name = room.get("name") or ""
|
||||
|
||||
# Filter by query if provided
|
||||
if needle:
|
||||
match_text = f"{room_name} {room_id}".lower()
|
||||
if needle not in match_text:
|
||||
continue
|
||||
|
||||
if not room_id:
|
||||
continue
|
||||
|
||||
display_name = room_name or room_id
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="matrix",
|
||||
title=display_name,
|
||||
path=f"matrix:room:{room_id}",
|
||||
detail=room_id if room_name else "",
|
||||
annotations=["room"],
|
||||
media_kind="folder",
|
||||
columns=[
|
||||
("Room", display_name),
|
||||
("ID", room_id),
|
||||
],
|
||||
full_metadata={
|
||||
"room_id": room_id,
|
||||
"room_name": room_name,
|
||||
"provider": "matrix",
|
||||
# Selection metadata for table system and @N expansion
|
||||
"_selection_args": ["-room-id", room_id],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _get_homeserver_and_token(self) -> Tuple[str, str]:
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
homeserver = matrix_conf.get("homeserver")
|
||||
access_token = matrix_conf.get("access_token")
|
||||
if not homeserver:
|
||||
raise Exception("Matrix homeserver missing")
|
||||
if not access_token:
|
||||
raise Exception("Matrix access_token missing")
|
||||
base = _normalize_homeserver(str(homeserver))
|
||||
if not base:
|
||||
raise Exception("Matrix homeserver missing")
|
||||
return base, str(access_token)
|
||||
|
||||
def list_joined_room_ids(self) -> List[str]:
|
||||
"""Return joined room IDs for the current user.
|
||||
|
||||
Uses `GET /_matrix/client/v3/joined_rooms`.
|
||||
"""
|
||||
base, token = self._get_homeserver_and_token()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
resp = get_requests_session().get(
|
||||
f"{base}/_matrix/client/v3/joined_rooms",
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix joined_rooms failed: {resp.text}")
|
||||
data = resp.json() or {}
|
||||
rooms = data.get("joined_rooms") or []
|
||||
out: List[str] = []
|
||||
for rid in rooms:
|
||||
if not isinstance(rid, str) or not rid.strip():
|
||||
continue
|
||||
out.append(rid.strip())
|
||||
return out
|
||||
|
||||
def list_rooms(self,
|
||||
*,
|
||||
room_ids: Optional[List[str]] = None) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Return joined rooms, optionally limited to a subset.
|
||||
|
||||
Performance note: room names require additional per-room HTTP requests.
|
||||
If `room_ids` is provided, only those rooms will have name lookups.
|
||||
"""
|
||||
base, token = self._get_homeserver_and_token()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
|
||||
joined = self.list_joined_room_ids()
|
||||
if room_ids:
|
||||
allowed = {str(v).strip().casefold()
|
||||
for v in room_ids if str(v).strip()}
|
||||
if allowed:
|
||||
# Accept either full IDs (!id:hs) or short IDs (!id).
|
||||
def _is_allowed(rid: str) -> bool:
|
||||
r = str(rid or "").strip()
|
||||
if not r:
|
||||
return False
|
||||
rc = r.casefold()
|
||||
if rc in allowed:
|
||||
return True
|
||||
short = r.split(":", 1)[0].strip().casefold()
|
||||
return bool(short) and short in allowed
|
||||
|
||||
joined = [rid for rid in joined if _is_allowed(rid)]
|
||||
|
||||
out: List[Dict[str, Any]] = []
|
||||
for room_id in joined:
|
||||
name = ""
|
||||
# Best-effort room name lookup (safe to fail).
|
||||
try:
|
||||
encoded = quote(room_id, safe="")
|
||||
name_resp = get_requests_session().get(
|
||||
f"{base}/_matrix/client/v3/rooms/{encoded}/state/m.room.name",
|
||||
headers=headers,
|
||||
timeout=5,
|
||||
)
|
||||
if name_resp.status_code == 200:
|
||||
payload = name_resp.json() or {}
|
||||
maybe = payload.get("name")
|
||||
if isinstance(maybe, str):
|
||||
name = maybe
|
||||
except Exception:
|
||||
pass
|
||||
out.append({
|
||||
"room_id": room_id,
|
||||
"name": name
|
||||
})
|
||||
return out
|
||||
|
||||
def upload_to_room(self, file_path: str, room_id: str, **kwargs: Any) -> str:
|
||||
"""Upload a file and send it to a specific room."""
|
||||
from SYS.models import ProgressFileReader
|
||||
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
if not room_id:
|
||||
raise Exception("Matrix room_id missing")
|
||||
|
||||
base, token = self._get_homeserver_and_token()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/octet-stream",
|
||||
}
|
||||
|
||||
mime_type, msgtype = _classify_matrix_upload(
|
||||
path, explicit_mime_type=kwargs.get("mime_type")
|
||||
)
|
||||
headers["Content-Type"] = mime_type
|
||||
|
||||
filename = path.name
|
||||
|
||||
# Upload media
|
||||
upload_url = f"{base}/_matrix/media/v3/upload"
|
||||
with open(path, "rb") as handle:
|
||||
wrapped = ProgressFileReader(
|
||||
handle,
|
||||
total_bytes=int(path.stat().st_size),
|
||||
label="upload"
|
||||
)
|
||||
resp = get_requests_session().post(
|
||||
upload_url,
|
||||
headers=headers,
|
||||
data=wrapped,
|
||||
params={
|
||||
"filename": filename
|
||||
}
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
raise Exception(f"Matrix upload failed: {resp.text}")
|
||||
content_uri = (resp.json() or {}).get("content_uri")
|
||||
if not content_uri:
|
||||
raise Exception("No content_uri returned")
|
||||
|
||||
# Build a fragment-free URL suitable for storage backends.
|
||||
# `matrix.to` links use fragments (`#/...`) which some backends normalize away.
|
||||
download_url_for_store = ""
|
||||
try:
|
||||
curi = str(content_uri or "").strip()
|
||||
if curi.startswith("mxc://"):
|
||||
rest = curi[len("mxc://"):]
|
||||
if "/" in rest:
|
||||
server_name, media_id = rest.split("/", 1)
|
||||
server_name = str(server_name).strip()
|
||||
media_id = str(media_id).strip()
|
||||
if server_name and media_id:
|
||||
download_url_for_store = f"{base}/_matrix/media/v3/download/{quote(server_name, safe='')}/{quote(media_id, safe='')}"
|
||||
except Exception:
|
||||
download_url_for_store = ""
|
||||
|
||||
info = _build_matrix_media_info(path, mime_type=mime_type, msgtype=msgtype)
|
||||
payload = {
|
||||
"msgtype": msgtype,
|
||||
"body": filename,
|
||||
"url": content_uri,
|
||||
"info": info
|
||||
}
|
||||
|
||||
# Correct Matrix client API send endpoint requires a transaction ID.
|
||||
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
|
||||
encoded_room = quote(str(room_id), safe="")
|
||||
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
|
||||
send_headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
send_resp = get_requests_session().put(send_url, headers=send_headers, json=payload)
|
||||
if send_resp.status_code != 200:
|
||||
raise Exception(f"Matrix send message failed: {send_resp.text}")
|
||||
|
||||
event_id = (send_resp.json() or {}).get("event_id")
|
||||
link = (
|
||||
f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
if event_id else f"https://matrix.to/#/{room_id}"
|
||||
)
|
||||
|
||||
# Optional: if a PipeObject is provided and it already has store+hash,
|
||||
# attach the uploaded URL back to the stored file.
|
||||
try:
|
||||
pipe_obj = kwargs.get("pipe_obj")
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
# Prefer the direct media download URL for storage backends.
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(
|
||||
pipe_obj,
|
||||
download_url_for_store or link,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return link
|
||||
|
||||
def send_text_to_room(self, text: str, room_id: str) -> str:
|
||||
"""Send a plain text message to a specific room."""
|
||||
message = str(text or "").strip()
|
||||
if not message:
|
||||
return ""
|
||||
if not room_id:
|
||||
raise Exception("Matrix room_id missing")
|
||||
|
||||
base, token = self._get_homeserver_and_token()
|
||||
encoded_room = quote(str(room_id), safe="")
|
||||
txn_id = f"mm_{int(time.time())}_{uuid.uuid4().hex[:8]}"
|
||||
send_url = f"{base}/_matrix/client/v3/rooms/{encoded_room}/send/m.room.message/{txn_id}"
|
||||
send_headers = {
|
||||
"Authorization": f"Bearer {token}"
|
||||
}
|
||||
payload = {
|
||||
"msgtype": "m.text",
|
||||
"body": message
|
||||
}
|
||||
send_resp = get_requests_session().put(send_url, headers=send_headers, json=payload)
|
||||
if send_resp.status_code != 200:
|
||||
raise Exception(f"Matrix send text failed: {send_resp.text}")
|
||||
|
||||
event_id = (send_resp.json() or {}).get("event_id")
|
||||
return (
|
||||
f"https://matrix.to/#/{room_id}/{event_id}"
|
||||
if event_id else f"https://matrix.to/#/{room_id}"
|
||||
)
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
matrix_conf = self.config.get("provider",
|
||||
{}).get("matrix",
|
||||
{})
|
||||
room_id = matrix_conf.get("room_id")
|
||||
if not room_id:
|
||||
raise Exception("Matrix room_id missing")
|
||||
return self.upload_to_room(file_path, str(room_id))
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any
|
||||
) -> bool:
|
||||
"""Handle Matrix room selection via `@N`.
|
||||
|
||||
If the CLI has a pending upload stash, selecting a room triggers an upload.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
pending = None
|
||||
try:
|
||||
pending = ctx.load_value("matrix_pending_uploads", default=None)
|
||||
except Exception:
|
||||
pending = None
|
||||
|
||||
pending_list = list(pending) if isinstance(pending, list) else []
|
||||
if not pending_list:
|
||||
return False
|
||||
|
||||
room_ids: List[str] = []
|
||||
for item in selected_items or []:
|
||||
rid = None
|
||||
if isinstance(item, dict):
|
||||
rid = item.get("room_id") or item.get("id")
|
||||
else:
|
||||
rid = getattr(item, "room_id", None) or getattr(item, "id", None)
|
||||
if rid and str(rid).strip():
|
||||
room_ids.append(str(rid).strip())
|
||||
|
||||
if not room_ids:
|
||||
print("No Matrix room selected\n")
|
||||
return True
|
||||
|
||||
any_failed = False
|
||||
for room_id in room_ids:
|
||||
for payload in pending_list:
|
||||
try:
|
||||
file_path = ""
|
||||
delete_after = False
|
||||
pipe_obj = None
|
||||
if isinstance(payload, dict):
|
||||
file_path = str(payload.get("path") or "")
|
||||
delete_after = bool(payload.get("delete_after", False))
|
||||
pipe_obj = payload.get("pipe_obj")
|
||||
else:
|
||||
file_path = str(getattr(payload, "path", "") or "")
|
||||
if not file_path:
|
||||
any_failed = True
|
||||
continue
|
||||
|
||||
media_path = Path(file_path)
|
||||
if not media_path.exists():
|
||||
any_failed = True
|
||||
print(f"Matrix upload file missing: {file_path}")
|
||||
continue
|
||||
|
||||
link = self.upload_to_room(
|
||||
str(media_path),
|
||||
str(room_id),
|
||||
pipe_obj=pipe_obj
|
||||
)
|
||||
if link:
|
||||
print(link)
|
||||
|
||||
if delete_after:
|
||||
try:
|
||||
media_path.unlink(missing_ok=True) # type: ignore[arg-type]
|
||||
except TypeError:
|
||||
try:
|
||||
if media_path.exists():
|
||||
media_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
any_failed = True
|
||||
print(f"Matrix upload failed: {exc}")
|
||||
|
||||
try:
|
||||
ctx.store_value("matrix_pending_uploads", [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if any_failed:
|
||||
print("\nOne or more Matrix uploads failed\n")
|
||||
return True
|
||||
|
||||
|
||||
# Minimal provider registration for the new table system
|
||||
try:
|
||||
from SYS.result_table_adapters import register_plugin
|
||||
from SYS.result_table_api import ResultModel, ColumnSpec, metadata_column, title_column
|
||||
|
||||
def _convert_search_result_to_model(sr: Any) -> ResultModel:
|
||||
"""Convert Matrix SearchResult to ResultModel for strict table display."""
|
||||
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
|
||||
title = d.get("title") or ""
|
||||
path = d.get("path") or None
|
||||
columns = d.get("columns") or getattr(sr, "columns", None) or []
|
||||
|
||||
# Extract metadata from columns and full_metadata
|
||||
metadata: Dict[str, Any] = {}
|
||||
for name, value in columns:
|
||||
key = str(name or "").strip().lower()
|
||||
if key in ("room_id", "room_name", "id", "name"):
|
||||
metadata[key] = value
|
||||
|
||||
try:
|
||||
fm = d.get("full_metadata") or {}
|
||||
if isinstance(fm, dict):
|
||||
for k, v in fm.items():
|
||||
metadata[str(k).strip().lower()] = v
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ResultModel(
|
||||
title=str(title),
|
||||
path=str(path) if path else None,
|
||||
ext=None,
|
||||
size_bytes=None,
|
||||
metadata=metadata,
|
||||
source="matrix"
|
||||
)
|
||||
|
||||
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
|
||||
"""Adapter to convert SearchResults to ResultModels."""
|
||||
for it in items:
|
||||
try:
|
||||
yield _convert_search_result_to_model(it)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _has_metadata(rows: List[ResultModel], key: str) -> bool:
|
||||
"""Check if any row has a given metadata key with a non-empty value."""
|
||||
for row in rows:
|
||||
md = row.metadata or {}
|
||||
if key in md:
|
||||
val = md[key]
|
||||
if val is None:
|
||||
continue
|
||||
if isinstance(val, str) and not val.strip():
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
|
||||
"""Build column specifications from available metadata in rows."""
|
||||
cols = [title_column()]
|
||||
if _has_metadata(rows, "room_id"):
|
||||
cols.append(metadata_column("room_id", "Room ID"))
|
||||
if _has_metadata(rows, "room_name"):
|
||||
cols.append(metadata_column("room_name", "Name"))
|
||||
return cols
|
||||
|
||||
def _selection_fn(row: ResultModel) -> List[str]:
|
||||
"""Return selection args for @N expansion and room selection.
|
||||
|
||||
Uses explicit -room-id flag to identify the selected room for file uploads.
|
||||
"""
|
||||
metadata = row.metadata or {}
|
||||
|
||||
# Check for explicit selection args first
|
||||
args = metadata.get("_selection_args") or metadata.get("selection_args")
|
||||
if isinstance(args, (list, tuple)) and args:
|
||||
return [str(x) for x in args if x is not None]
|
||||
|
||||
# Fallback to room_id
|
||||
room_id = metadata.get("room_id")
|
||||
if room_id:
|
||||
return ["-room-id", str(room_id)]
|
||||
|
||||
return ["-title", row.title or ""]
|
||||
|
||||
register_plugin(
|
||||
"matrix",
|
||||
_adapter,
|
||||
columns=_columns_factory,
|
||||
selection_fn=_selection_fn,
|
||||
metadata={"description": "Matrix room provider with file uploads"},
|
||||
)
|
||||
except Exception:
|
||||
# best-effort registration
|
||||
pass
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,459 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log
|
||||
from SYS.utils import format_bytes
|
||||
|
||||
|
||||
def _get_podcastindex_credentials(config: Dict[str, Any]) -> Tuple[str, str]:
|
||||
provider = config.get("provider")
|
||||
if not isinstance(provider, dict):
|
||||
return "", ""
|
||||
|
||||
entry = provider.get("podcastindex")
|
||||
if not isinstance(entry, dict):
|
||||
return "", ""
|
||||
|
||||
key = entry.get("key") or entry.get("Key") or entry.get("api_key")
|
||||
secret = entry.get("secret") or entry.get("Secret") or entry.get("api_secret")
|
||||
|
||||
key_str = str(key or "").strip()
|
||||
secret_str = str(secret or "").strip()
|
||||
return key_str, secret_str
|
||||
|
||||
|
||||
class PodcastIndex(Provider):
|
||||
"""Search provider for PodcastIndex.org."""
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
"podcastindex": ["download-file"],
|
||||
"podcastindex.episodes": ["download-file"],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_duration(value: Any) -> str:
|
||||
def _to_seconds(v: Any) -> Optional[int]:
|
||||
if v is None:
|
||||
return None
|
||||
if isinstance(v, (int, float)):
|
||||
try:
|
||||
return max(0, int(v))
|
||||
except Exception:
|
||||
return None
|
||||
if isinstance(v, str):
|
||||
text = v.strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.isdigit():
|
||||
try:
|
||||
return max(0, int(text))
|
||||
except Exception:
|
||||
return None
|
||||
# Accept common clock formats too.
|
||||
if ":" in text:
|
||||
parts = [p.strip() for p in text.split(":") if p.strip()]
|
||||
if len(parts) == 2 and all(p.isdigit() for p in parts):
|
||||
m, s = parts
|
||||
return max(0, int(m) * 60 + int(s))
|
||||
if len(parts) == 3 and all(p.isdigit() for p in parts):
|
||||
h, m, s = parts
|
||||
return max(0, int(h) * 3600 + int(m) * 60 + int(s))
|
||||
return None
|
||||
|
||||
total = _to_seconds(value)
|
||||
if total is None:
|
||||
return "" if value is None else str(value).strip()
|
||||
|
||||
h = total // 3600
|
||||
m = (total % 3600) // 60
|
||||
s = total % 60
|
||||
if h > 0:
|
||||
return f"{h:d}h{m:d}m{s:d}s"
|
||||
if m > 0:
|
||||
return f"{m:d}m{s:d}s"
|
||||
return f"{s:d}s"
|
||||
|
||||
@staticmethod
|
||||
def _format_bytes(value: Any) -> str:
|
||||
"""Format bytes using centralized utility."""
|
||||
return format_bytes(value)
|
||||
|
||||
@staticmethod
|
||||
def _format_date_from_epoch(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
try:
|
||||
import datetime
|
||||
|
||||
ts = int(value)
|
||||
if ts <= 0:
|
||||
return ""
|
||||
return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_episode_categories(ep: Dict[str, Any]) -> List[str]:
|
||||
cats = ep.get("categories") or ep.get("category")
|
||||
out: List[str] = []
|
||||
|
||||
if isinstance(cats, dict):
|
||||
for v in cats.values():
|
||||
if isinstance(v, str):
|
||||
t = v.strip()
|
||||
if t:
|
||||
out.append(t)
|
||||
elif isinstance(cats, list):
|
||||
for v in cats:
|
||||
if isinstance(v, str):
|
||||
t = v.strip()
|
||||
if t:
|
||||
out.append(t)
|
||||
elif isinstance(cats, str):
|
||||
t = cats.strip()
|
||||
if t:
|
||||
out.append(t)
|
||||
|
||||
# Keep the table readable.
|
||||
dedup: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for t in out:
|
||||
low = t.lower()
|
||||
if low in seen:
|
||||
continue
|
||||
seen.add(low)
|
||||
dedup.append(t)
|
||||
return dedup
|
||||
|
||||
@staticmethod
|
||||
def _looks_like_episode(item: Any) -> bool:
|
||||
if not isinstance(item, dict):
|
||||
return False
|
||||
md = item.get("full_metadata")
|
||||
if not isinstance(md, dict):
|
||||
return False
|
||||
enc = md.get("enclosureUrl") or md.get("enclosure_url")
|
||||
if isinstance(enc, str) and enc.strip().startswith("http"):
|
||||
return True
|
||||
# Some pipelines may flatten episode fields.
|
||||
enc2 = item.get("enclosureUrl") or item.get("url")
|
||||
return isinstance(enc2, str) and enc2.strip().startswith("http")
|
||||
|
||||
@staticmethod
|
||||
def _compute_sha256(filepath: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with open(filepath, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any,
|
||||
) -> bool:
|
||||
if not stage_is_last:
|
||||
return False
|
||||
if not selected_items:
|
||||
return False
|
||||
|
||||
# Episode selection (terminal): download episodes to temp/output dir.
|
||||
if all(self._looks_like_episode(x) for x in selected_items):
|
||||
return self._handle_episode_download_selection(selected_items, ctx)
|
||||
|
||||
# Podcast selection (terminal): expand into episode list.
|
||||
return self._handle_podcast_expand_selection(selected_items, ctx)
|
||||
|
||||
def _handle_podcast_expand_selection(self, selected_items: List[Any], ctx: Any) -> bool:
|
||||
chosen: List[Dict[str, Any]] = [x for x in (selected_items or []) if isinstance(x, dict)]
|
||||
if not chosen:
|
||||
return False
|
||||
|
||||
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||
if not key or not secret:
|
||||
return False
|
||||
|
||||
# Resolve feed id/url from the selected podcast row.
|
||||
item0 = chosen[0]
|
||||
feed_md = item0.get("full_metadata") if isinstance(item0.get("full_metadata"), dict) else {}
|
||||
feed_title = str(item0.get("title") or feed_md.get("title") or "Podcast").strip() or "Podcast"
|
||||
feed_id = None
|
||||
try:
|
||||
feed_id = int(feed_md.get("id")) if feed_md.get("id") is not None else None
|
||||
except Exception:
|
||||
feed_id = None
|
||||
feed_url = str(feed_md.get("url") or item0.get("path") or "").strip()
|
||||
|
||||
try:
|
||||
from API.podcastindex import PodcastIndexClient
|
||||
|
||||
client = PodcastIndexClient(key, secret)
|
||||
if feed_id:
|
||||
episodes = client.episodes_byfeedid(feed_id, max_results=200)
|
||||
else:
|
||||
episodes = client.episodes_byfeedurl(feed_url, max_results=200)
|
||||
except Exception as exc:
|
||||
log(f"[podcastindex] episode lookup failed: {exc}", file=sys.stderr)
|
||||
return True
|
||||
|
||||
try:
|
||||
from SYS.result_table import Table
|
||||
from SYS.rich_display import stdout_console
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
table = Table(f"PodcastIndex Episodes: {feed_title}")._perseverance(True)
|
||||
table.set_table("podcastindex.episodes")
|
||||
try:
|
||||
table.set_value_case("preserve")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results_payload: List[Dict[str, Any]] = []
|
||||
for ep in episodes or []:
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
|
||||
ep_title = str(ep.get("title") or "").strip() or "Unknown"
|
||||
enc_url = str(ep.get("enclosureUrl") or "").strip()
|
||||
page_url = str(ep.get("link") or "").strip()
|
||||
audio_url = enc_url or page_url
|
||||
if not audio_url:
|
||||
continue
|
||||
|
||||
duration = ep.get("duration")
|
||||
size_bytes = ep.get("enclosureLength") or ep.get("enclosure_length")
|
||||
published = ep.get("datePublished") or ep.get("datePublishedPretty")
|
||||
published_text = self._format_date_from_epoch(published) or str(published or "").strip()
|
||||
|
||||
sr = SearchResult(
|
||||
table="podcastindex",
|
||||
title=ep_title,
|
||||
path=audio_url,
|
||||
detail=feed_title,
|
||||
media_kind="audio",
|
||||
size_bytes=int(size_bytes) if str(size_bytes or "").isdigit() else None,
|
||||
columns=[
|
||||
("Title", ep_title),
|
||||
("Date", published_text),
|
||||
("Duration", self._format_duration(duration)),
|
||||
("Size", self._format_bytes(size_bytes)),
|
||||
("Url", audio_url),
|
||||
],
|
||||
full_metadata={
|
||||
**dict(ep),
|
||||
"_feed": dict(feed_md) if isinstance(feed_md, dict) else {},
|
||||
},
|
||||
)
|
||||
|
||||
table.add_result(sr)
|
||||
results_payload.append(sr.to_dict())
|
||||
|
||||
try:
|
||||
ctx.set_last_result_table(table, results_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
stdout_console().print()
|
||||
stdout_console().print(table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def _handle_episode_download_selection(self, selected_items: List[Any], ctx: Any) -> bool:
|
||||
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||
if not key or not secret:
|
||||
return False
|
||||
|
||||
try:
|
||||
from SYS.config import resolve_output_dir
|
||||
output_dir = resolve_output_dir(self.config or {})
|
||||
except Exception:
|
||||
import tempfile
|
||||
output_dir = Path(tempfile.gettempdir())
|
||||
|
||||
try:
|
||||
output_dir = Path(output_dir).expanduser()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from API.HTTP import _download_direct_file
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
payloads: List[Dict[str, Any]] = []
|
||||
downloaded = 0
|
||||
|
||||
for item in selected_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
md = item.get("full_metadata") if isinstance(item.get("full_metadata"), dict) else {}
|
||||
enc_url = str(md.get("enclosureUrl") or item.get("url") or item.get("path") or "").strip()
|
||||
if not enc_url or not enc_url.startswith("http"):
|
||||
continue
|
||||
|
||||
title_hint = str(item.get("title") or md.get("title") or "episode").strip() or "episode"
|
||||
|
||||
try:
|
||||
result_obj = _download_direct_file(
|
||||
enc_url,
|
||||
Path(output_dir),
|
||||
quiet=False,
|
||||
suggested_filename=title_hint,
|
||||
)
|
||||
except Exception as exc:
|
||||
log(f"[podcastindex] download failed: {exc}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
downloaded_path = None
|
||||
try:
|
||||
downloaded_path = getattr(result_obj, "filepath", None)
|
||||
except Exception:
|
||||
downloaded_path = None
|
||||
if downloaded_path is None:
|
||||
try:
|
||||
downloaded_path = getattr(result_obj, "file_path", None)
|
||||
except Exception:
|
||||
downloaded_path = None
|
||||
if downloaded_path is None:
|
||||
try:
|
||||
downloaded_path = getattr(result_obj, "path", None)
|
||||
except Exception:
|
||||
downloaded_path = None
|
||||
|
||||
try:
|
||||
local_path = Path(str(downloaded_path))
|
||||
except Exception:
|
||||
local_path = None
|
||||
if local_path is None or not local_path.exists():
|
||||
continue
|
||||
|
||||
sha256 = ""
|
||||
try:
|
||||
sha256 = self._compute_sha256(local_path)
|
||||
except Exception:
|
||||
sha256 = ""
|
||||
|
||||
tags: List[str] = []
|
||||
tags.append(f"title:{title_hint}")
|
||||
cats = self._extract_episode_categories(md) if isinstance(md, dict) else []
|
||||
for c in cats[:10]:
|
||||
tags.append(f"tag:{c}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"path": str(local_path),
|
||||
"hash": sha256,
|
||||
"title": title_hint,
|
||||
"action": "provider:podcastindex.selector",
|
||||
"download_mode": "file",
|
||||
"store": "local",
|
||||
"media_kind": "audio",
|
||||
"tag": tags,
|
||||
"provider": "podcastindex",
|
||||
"url": enc_url,
|
||||
}
|
||||
if isinstance(md, dict) and md:
|
||||
payload["full_metadata"] = dict(md)
|
||||
|
||||
payloads.append(payload)
|
||||
downloaded += 1
|
||||
|
||||
try:
|
||||
if payloads and hasattr(ctx, "set_last_result_items_only"):
|
||||
ctx.set_last_result_items_only(payloads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if downloaded <= 0:
|
||||
return True
|
||||
|
||||
try:
|
||||
from SYS.rich_display import stdout_console
|
||||
|
||||
stdout_console().print(f"Downloaded {downloaded} episode(s) -> {output_dir}")
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
def validate(self) -> bool:
|
||||
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||
return bool(key and secret)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
_ = filters
|
||||
_ = kwargs
|
||||
|
||||
key, secret = _get_podcastindex_credentials(self.config or {})
|
||||
if not key or not secret:
|
||||
return []
|
||||
|
||||
try:
|
||||
from API.podcastindex import PodcastIndexClient
|
||||
|
||||
client = PodcastIndexClient(key, secret)
|
||||
feeds = client.search_byterm(query, max_results=limit)
|
||||
except Exception as exc:
|
||||
log(f"[podcastindex] search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for feed in feeds[: max(0, int(limit))]:
|
||||
if not isinstance(feed, dict):
|
||||
continue
|
||||
|
||||
title = str(feed.get("title") or "").strip() or "Unknown"
|
||||
author = str(feed.get("author") or feed.get("ownerName") or "").strip()
|
||||
feed_url = str(feed.get("url") or "").strip()
|
||||
site_url = str(feed.get("link") or "").strip()
|
||||
language = str(feed.get("language") or "").strip()
|
||||
|
||||
episode_count_val = feed.get("episodeCount")
|
||||
episode_count = ""
|
||||
if episode_count_val is not None:
|
||||
try:
|
||||
episode_count = str(int(episode_count_val))
|
||||
except Exception:
|
||||
episode_count = str(episode_count_val).strip()
|
||||
|
||||
path = feed_url or site_url or str(feed.get("id") or "").strip()
|
||||
|
||||
columns = [
|
||||
("Title", title),
|
||||
("Author", author),
|
||||
("Episodes", episode_count),
|
||||
("Lang", language),
|
||||
("Feed", feed_url),
|
||||
]
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="podcastindex",
|
||||
title=title,
|
||||
path=path,
|
||||
detail=author,
|
||||
media_kind="audio",
|
||||
columns=columns,
|
||||
full_metadata=dict(feed),
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -1,888 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
from SYS.models import ProgressBar
|
||||
|
||||
_SOULSEEK_NOISE_SUBSTRINGS = (
|
||||
"search reply ticket does not match any search request",
|
||||
"failed to receive transfer ticket on file connection",
|
||||
"aioslsk.exceptions.ConnectionReadError",
|
||||
)
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def _suppress_aioslsk_asyncio_task_noise() -> Any:
|
||||
"""Suppress non-fatal aioslsk task exceptions emitted via asyncio's loop handler.
|
||||
|
||||
aioslsk may spawn background tasks (e.g. direct peer connection attempts) that
|
||||
can fail with ConnectionFailedError. These are often expected and should not
|
||||
end a successful download with a scary "Task exception was never retrieved"
|
||||
traceback.
|
||||
|
||||
We only swallow those specific cases and delegate everything else to the
|
||||
previous/default handler.
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
# Not in an event loop.
|
||||
yield
|
||||
return
|
||||
|
||||
previous_handler = loop.get_exception_handler()
|
||||
|
||||
def _handler(loop: asyncio.AbstractEventLoop, context: Dict[str, Any]) -> None:
|
||||
try:
|
||||
exc = context.get("exception")
|
||||
msg = str(context.get("message") or "")
|
||||
|
||||
if exc is not None:
|
||||
# Suppress internal asyncio AssertionError on Windows teardown (Proactor loop)
|
||||
if isinstance(exc, AssertionError):
|
||||
m_lower = msg.lower()
|
||||
if "proactor" in m_lower or "_start_serving" in m_lower or "self._sockets is not None" in str(exc):
|
||||
return
|
||||
|
||||
# Only suppress un-retrieved task exceptions from aioslsk connection failures.
|
||||
if msg == "Task exception was never retrieved":
|
||||
cls = getattr(exc, "__class__", None)
|
||||
name = getattr(cls, "__name__", "")
|
||||
mod = getattr(cls, "__module__", "")
|
||||
|
||||
# Suppress ConnectionFailedError from aioslsk
|
||||
if name == "ConnectionFailedError" and str(mod).startswith("aioslsk"):
|
||||
return
|
||||
except Exception:
|
||||
# If our filter logic fails, fall through to default handling.
|
||||
pass
|
||||
|
||||
if previous_handler is not None:
|
||||
previous_handler(loop, context)
|
||||
else:
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
loop.set_exception_handler(_handler)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
try:
|
||||
loop.set_exception_handler(previous_handler)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _configure_aioslsk_logging() -> None:
|
||||
"""Reduce aioslsk internal log noise.
|
||||
|
||||
Some aioslsk components emit non-fatal warnings/errors during high churn
|
||||
(search + download + disconnect). We keep our own debug output, but push
|
||||
aioslsk to ERROR and stop propagation so it doesn't spam the CLI.
|
||||
"""
|
||||
for name in (
|
||||
"aioslsk",
|
||||
"aioslsk.network",
|
||||
"aioslsk.search",
|
||||
"aioslsk.transfer",
|
||||
"aioslsk.transfer.manager",
|
||||
):
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.ERROR)
|
||||
logger.propagate = False
|
||||
|
||||
|
||||
class _LineFilterStream(io.TextIOBase):
|
||||
"""A minimal stream wrapper that filters known noisy lines.
|
||||
|
||||
It also suppresses entire traceback blocks when they contain known non-fatal
|
||||
aioslsk noise (e.g. ConnectionReadError during peer init).
|
||||
"""
|
||||
|
||||
def __init__(self, underlying: Any, suppress_substrings: tuple[str, ...]):
|
||||
super().__init__()
|
||||
self._underlying = underlying
|
||||
self._suppress = suppress_substrings
|
||||
self._buf = ""
|
||||
self._in_tb = False
|
||||
self._tb_lines: list[str] = []
|
||||
self._tb_suppress = False
|
||||
|
||||
def writable(self) -> bool: # pragma: no cover
|
||||
return True
|
||||
|
||||
def _should_suppress_line(self, line: str) -> bool:
|
||||
return any(sub in line for sub in self._suppress)
|
||||
|
||||
def _flush_tb(self) -> None:
|
||||
if not self._tb_lines:
|
||||
return
|
||||
if not self._tb_suppress:
|
||||
for l in self._tb_lines:
|
||||
try:
|
||||
self._underlying.write(l + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
self._tb_lines = []
|
||||
self._tb_suppress = False
|
||||
self._in_tb = False
|
||||
|
||||
def write(self, s: str) -> int:
|
||||
self._buf += str(s)
|
||||
while "\n" in self._buf:
|
||||
line, self._buf = self._buf.split("\n", 1)
|
||||
self._handle_line(line)
|
||||
return len(s)
|
||||
|
||||
def _handle_line(self, line: str) -> None:
|
||||
# Start capturing tracebacks so we can suppress the whole block if it matches.
|
||||
if not self._in_tb and line.startswith("Traceback (most recent call last):"):
|
||||
self._in_tb = True
|
||||
self._tb_lines = [line]
|
||||
self._tb_suppress = False
|
||||
return
|
||||
|
||||
if self._in_tb:
|
||||
self._tb_lines.append(line)
|
||||
if self._should_suppress_line(line):
|
||||
self._tb_suppress = True
|
||||
# End traceback block on blank line.
|
||||
if line.strip() == "":
|
||||
self._flush_tb()
|
||||
return
|
||||
|
||||
# Non-traceback line
|
||||
if self._should_suppress_line(line):
|
||||
return
|
||||
try:
|
||||
self._underlying.write(line + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def flush(self) -> None:
|
||||
# Flush any pending traceback block.
|
||||
if self._in_tb:
|
||||
# If the traceback ends without a trailing blank line, decide here.
|
||||
self._flush_tb()
|
||||
if self._buf:
|
||||
line = self._buf
|
||||
self._buf = ""
|
||||
if not self._should_suppress_line(line):
|
||||
try:
|
||||
self._underlying.write(line)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._underlying.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _suppress_aioslsk_noise() -> Any:
|
||||
"""Temporarily suppress known aioslsk noise printed to stdout/stderr.
|
||||
|
||||
Opt out by setting DOWNLOW_SOULSEEK_VERBOSE=1.
|
||||
"""
|
||||
if os.environ.get("DOWNLOW_SOULSEEK_VERBOSE"):
|
||||
yield
|
||||
return
|
||||
|
||||
_configure_aioslsk_logging()
|
||||
old_out, old_err = sys.stdout, sys.stderr
|
||||
sys.stdout = _LineFilterStream(old_out, _SOULSEEK_NOISE_SUBSTRINGS)
|
||||
sys.stderr = _LineFilterStream(old_err, _SOULSEEK_NOISE_SUBSTRINGS)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
try:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
except Exception:
|
||||
pass
|
||||
sys.stdout, sys.stderr = old_out, old_err
|
||||
|
||||
|
||||
class Soulseek(Provider):
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
"soulseek": ["download-file", "-plugin", "soulseek"],
|
||||
}
|
||||
"""Search provider for Soulseek P2P network."""
|
||||
|
||||
def selector(
|
||||
self,
|
||||
selected_items: List[Any],
|
||||
*,
|
||||
ctx: Any,
|
||||
stage_is_last: bool = True,
|
||||
**_kwargs: Any,
|
||||
) -> bool:
|
||||
"""Handle Soulseek selection.
|
||||
|
||||
Currently defaults to download-file via TABLE_AUTO_STAGES, but this
|
||||
hook allows for future 'Browse User' or 'Browse Folder' drill-down.
|
||||
"""
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
# If we wanted to handle drill-down (like Tidal.py) we would:
|
||||
# 1. Fetch more data (e.g. user shares)
|
||||
# 2. Create a new ResultTable
|
||||
# 3. ctx.set_current_stage_table(new_table)
|
||||
# 4. return True
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def config_schema(cls) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"key": "username",
|
||||
"label": "Soulseek Username",
|
||||
"default": "",
|
||||
"required": True
|
||||
},
|
||||
{
|
||||
"key": "password",
|
||||
"label": "Soulseek Password",
|
||||
"default": "",
|
||||
"required": True,
|
||||
"secret": True
|
||||
}
|
||||
]
|
||||
|
||||
MUSIC_EXTENSIONS = {
|
||||
".flac",
|
||||
".mp3",
|
||||
".m4a",
|
||||
".aac",
|
||||
".ogg",
|
||||
".opus",
|
||||
".wav",
|
||||
".alac",
|
||||
".wma",
|
||||
".ape",
|
||||
".aiff",
|
||||
".dsf",
|
||||
".dff",
|
||||
".wv",
|
||||
".tta",
|
||||
".tak",
|
||||
".ac3",
|
||||
".dts",
|
||||
}
|
||||
|
||||
# NOTE: These defaults preserve existing behavior.
|
||||
USERNAME = "asjhkjljhkjfdsd334"
|
||||
PASSWORD = "khhhg"
|
||||
DOWNLOAD_DIR = None
|
||||
MAX_WAIT_TRANSFER = 1200
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
try:
|
||||
from SYS.config import get_soulseek_username, get_soulseek_password
|
||||
|
||||
user = get_soulseek_username(self.config)
|
||||
pwd = get_soulseek_password(self.config)
|
||||
if user:
|
||||
Soulseek.USERNAME = user
|
||||
if pwd:
|
||||
Soulseek.PASSWORD = pwd
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
"""Download file from Soulseek."""
|
||||
|
||||
try:
|
||||
full_metadata = result.full_metadata or {}
|
||||
username = full_metadata.get("username")
|
||||
filename = full_metadata.get("filename") or result.path
|
||||
|
||||
if not username or not filename:
|
||||
# If we were invoked via generic download-file on a SearchResult
|
||||
# that has minimal data (e.g. from table selection), try to rescue it.
|
||||
if isinstance(result, SearchResult) and result.full_metadata:
|
||||
username = result.full_metadata.get("username")
|
||||
filename = result.full_metadata.get("filename")
|
||||
|
||||
if not username or not filename:
|
||||
log(
|
||||
f"[soulseek] Missing metadata for download: {result.title}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
|
||||
# Cast to str for Mypy
|
||||
username = str(username)
|
||||
filename = str(filename)
|
||||
|
||||
# Use tempfile directory as default if generic path elements were passed or None.
|
||||
if output_dir is None:
|
||||
import tempfile
|
||||
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
else:
|
||||
target_dir = Path(output_dir)
|
||||
if str(target_dir) in (".", "downloads", "Downloads"):
|
||||
import tempfile
|
||||
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# This cmdlet stack is synchronous; use asyncio.run for clarity.
|
||||
return asyncio.run(
|
||||
download_soulseek_file(
|
||||
username=username,
|
||||
filename=filename,
|
||||
output_dir=target_dir,
|
||||
timeout=self.MAX_WAIT_TRANSFER,
|
||||
)
|
||||
)
|
||||
|
||||
except RuntimeError:
|
||||
# If we're already inside an event loop (e.g., TUI), fall back to a
|
||||
# dedicated loop in this thread.
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
# Re-resolve target_dir inside rescue block just in case
|
||||
if output_dir is None:
|
||||
import tempfile
|
||||
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
else:
|
||||
target_dir = Path(output_dir)
|
||||
if str(target_dir) in (".", "downloads", "Downloads"):
|
||||
import tempfile
|
||||
target_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
|
||||
asyncio.set_event_loop(loop)
|
||||
# Cast to str for Mypy
|
||||
username_str = str(username)
|
||||
filename_str = str(filename)
|
||||
return loop.run_until_complete(
|
||||
download_soulseek_file(
|
||||
username=username_str,
|
||||
filename=filename_str,
|
||||
output_dir=target_dir,
|
||||
timeout=self.MAX_WAIT_TRANSFER,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
loop.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Download error: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
async def perform_search(self,
|
||||
query: str,
|
||||
timeout: float = 9.0,
|
||||
limit: int = 50) -> List[Dict[str,
|
||||
Any]]:
|
||||
"""Perform async Soulseek search."""
|
||||
|
||||
from aioslsk.client import SoulSeekClient
|
||||
from aioslsk.settings import CredentialsSettings, Settings
|
||||
|
||||
# Removed legacy os.makedirs(self.DOWNLOAD_DIR) - specific commands handle output dirs.
|
||||
|
||||
settings = Settings(
|
||||
credentials=CredentialsSettings(
|
||||
username=self.USERNAME,
|
||||
password=self.PASSWORD
|
||||
)
|
||||
)
|
||||
client = SoulSeekClient(settings)
|
||||
|
||||
with _suppress_aioslsk_noise():
|
||||
async with _suppress_aioslsk_asyncio_task_noise():
|
||||
try:
|
||||
await client.start()
|
||||
await client.login()
|
||||
except Exception as exc:
|
||||
log(
|
||||
f"[soulseek] Login failed: {type(exc).__name__}: {exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return []
|
||||
|
||||
try:
|
||||
search_request = await client.searches.search(query)
|
||||
await self._collect_results(search_request, timeout=timeout)
|
||||
return self._flatten_results(search_request)[:limit]
|
||||
except Exception as exc:
|
||||
log(
|
||||
f"[soulseek] Search error: {type(exc).__name__}: {exc}",
|
||||
file=sys.stderr
|
||||
)
|
||||
return []
|
||||
finally:
|
||||
# Best-effort: try to cancel/close the search request before stopping
|
||||
# the client to reduce stray reply spam.
|
||||
try:
|
||||
if "search_request" in locals() and search_request is not None:
|
||||
cancel = getattr(search_request, "cancel", None)
|
||||
if callable(cancel):
|
||||
maybe = cancel()
|
||||
if asyncio.iscoroutine(maybe):
|
||||
await maybe
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await client.stop()
|
||||
except Exception:
|
||||
pass
|
||||
# Give Proactor/Windows loop a moment to drain internal buffers after stop.
|
||||
try:
|
||||
await asyncio.sleep(0.2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _flatten_results(self, search_request: Any) -> List[dict]:
|
||||
flat: List[dict] = []
|
||||
for result in getattr(search_request, "results", []):
|
||||
username = getattr(result, "username", "?")
|
||||
|
||||
def _add(file_data: Any) -> None:
|
||||
flat.append({
|
||||
"file": file_data,
|
||||
"username": username,
|
||||
"filename": getattr(file_data, "filename", "?"),
|
||||
"size": getattr(file_data, "filesize", 0)
|
||||
})
|
||||
|
||||
for file_data in getattr(result, "shared_items", []):
|
||||
_add(file_data)
|
||||
|
||||
for file_data in getattr(result, "locked_results", []):
|
||||
_add(file_data)
|
||||
|
||||
return flat
|
||||
|
||||
async def _collect_results(
|
||||
self,
|
||||
search_request: Any,
|
||||
timeout: float = 75.0
|
||||
) -> None:
|
||||
end = time.time() + timeout
|
||||
last_count = 0
|
||||
while time.time() < end:
|
||||
current_count = len(getattr(search_request, "results", []))
|
||||
if current_count > last_count:
|
||||
debug(f"[soulseek] Got {current_count} result(s)...")
|
||||
last_count = current_count
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
# Ensure temp download dir structure exists, but don't create legacy ./downloads here.
|
||||
import tempfile
|
||||
base_tmp = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
base_tmp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
flat_results = asyncio.run(
|
||||
self.perform_search(query,
|
||||
timeout=9.0,
|
||||
limit=limit)
|
||||
)
|
||||
if not flat_results:
|
||||
return []
|
||||
|
||||
music_results: List[dict] = []
|
||||
for item in flat_results:
|
||||
filename = item["filename"]
|
||||
ext = (
|
||||
"." + filename.rsplit(".",
|
||||
1)[-1].lower()
|
||||
) if "." in filename else ""
|
||||
if ext in self.MUSIC_EXTENSIONS:
|
||||
music_results.append(item)
|
||||
|
||||
if not music_results:
|
||||
return []
|
||||
|
||||
enriched_results: List[dict] = []
|
||||
for item in music_results:
|
||||
filename = item["filename"]
|
||||
ext = (
|
||||
"." + filename.rsplit(".",
|
||||
1)[-1].lower()
|
||||
) if "." in filename else ""
|
||||
|
||||
display_name = filename.replace("\\", "/").split("/")[-1]
|
||||
path_parts = filename.replace("\\", "/").split("/")
|
||||
artist = path_parts[-3] if len(path_parts) >= 3 else ""
|
||||
album = (
|
||||
path_parts[-2] if len(path_parts) >= 3 else
|
||||
(path_parts[-2] if len(path_parts) == 2 else "")
|
||||
)
|
||||
|
||||
base_name = display_name.rsplit(
|
||||
".",
|
||||
1
|
||||
)[0] if "." in display_name else display_name
|
||||
track_num = ""
|
||||
title = base_name
|
||||
filename_artist = ""
|
||||
|
||||
match = re.match(r"^(\d{1,3})\s*[\.\-]?\s+(.+)$", base_name)
|
||||
if match:
|
||||
track_num = match.group(1)
|
||||
rest = match.group(2)
|
||||
if " - " in rest:
|
||||
filename_artist, title = rest.split(" - ", 1)
|
||||
else:
|
||||
title = rest
|
||||
|
||||
if filename_artist:
|
||||
artist = filename_artist
|
||||
|
||||
enriched_results.append(
|
||||
{
|
||||
**item,
|
||||
"artist": artist,
|
||||
"album": album,
|
||||
"title": title,
|
||||
"track_num": track_num,
|
||||
"ext": ext,
|
||||
}
|
||||
)
|
||||
|
||||
if filters:
|
||||
artist_filter = (filters.get("artist", "") or "").lower()
|
||||
album_filter = (filters.get("album", "") or "").lower()
|
||||
track_filter = (filters.get("track", "") or "").lower()
|
||||
|
||||
if artist_filter or album_filter or track_filter:
|
||||
filtered: List[dict] = []
|
||||
for item in enriched_results:
|
||||
if artist_filter and artist_filter not in item["artist"].lower(
|
||||
):
|
||||
continue
|
||||
if album_filter and album_filter not in item["album"].lower():
|
||||
continue
|
||||
if track_filter and track_filter not in item["title"].lower():
|
||||
continue
|
||||
filtered.append(item)
|
||||
enriched_results = filtered
|
||||
|
||||
enriched_results.sort(
|
||||
key=lambda item: (item["ext"].lower() != ".flac", -item["size"])
|
||||
)
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for item in enriched_results:
|
||||
artist_display = item["artist"] if item["artist"] else "(no artist)"
|
||||
album_display = item["album"] if item["album"] else "(no album)"
|
||||
size_mb = int(item["size"] / 1024 / 1024)
|
||||
|
||||
columns = [
|
||||
("Track",
|
||||
item["track_num"] or "?"),
|
||||
("Title",
|
||||
item["title"][:40]),
|
||||
("Artist",
|
||||
artist_display[:32]),
|
||||
("Album",
|
||||
album_display[:32]),
|
||||
("Size",
|
||||
f"{size_mb} MB"),
|
||||
]
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="soulseek",
|
||||
title=item["title"],
|
||||
path=item["filename"],
|
||||
detail=f"{artist_display} - {album_display}",
|
||||
annotations=[f"{size_mb} MB",
|
||||
item["ext"].lstrip(".").upper()],
|
||||
media_kind="audio",
|
||||
size_bytes=item["size"],
|
||||
columns=columns,
|
||||
selection_action=["download-file", "-plugin", "soulseek"],
|
||||
full_metadata={
|
||||
"username": item["username"],
|
||||
"filename": item["filename"],
|
||||
"artist": item["artist"],
|
||||
"album": item["album"],
|
||||
"track_num": item["track_num"],
|
||||
"ext": item["ext"],
|
||||
"provider": "soulseek"
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Search error: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from aioslsk.client import SoulSeekClient # noqa: F401
|
||||
|
||||
# Require configured credentials.
|
||||
try:
|
||||
from SYS.config import get_soulseek_username, get_soulseek_password
|
||||
|
||||
user = get_soulseek_username(self.config)
|
||||
pwd = get_soulseek_password(self.config)
|
||||
return bool(user and pwd)
|
||||
except Exception:
|
||||
# Fall back to legacy class defaults if config helpers aren't available.
|
||||
return bool(Soulseek.USERNAME and Soulseek.PASSWORD)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
async def download_soulseek_file(
|
||||
username: str,
|
||||
filename: str,
|
||||
output_dir: Optional[Path] = None,
|
||||
timeout: int = 1200,
|
||||
*,
|
||||
client_username: Optional[str] = None,
|
||||
client_password: Optional[str] = None,
|
||||
) -> Optional[Path]:
|
||||
"""Download a file from a Soulseek peer."""
|
||||
|
||||
try:
|
||||
from aioslsk.client import SoulSeekClient
|
||||
from aioslsk.settings import CredentialsSettings, Settings
|
||||
from aioslsk.transfer.model import Transfer, TransferDirection
|
||||
from aioslsk.transfer.state import TransferState
|
||||
|
||||
if output_dir is None:
|
||||
import tempfile
|
||||
output_dir = Path(tempfile.gettempdir()) / "Medios" / "Soulseek"
|
||||
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
local_filename = filename.replace("\\", "/").split("/")[-1]
|
||||
output_user_dir = output_dir / username
|
||||
output_user_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = output_user_dir / local_filename
|
||||
|
||||
if output_path.exists():
|
||||
base = output_path.stem
|
||||
ext = output_path.suffix
|
||||
counter = 1
|
||||
while output_path.exists():
|
||||
output_path = output_user_dir / f"{base}_{counter}{ext}"
|
||||
counter += 1
|
||||
|
||||
output_path = output_path.resolve()
|
||||
|
||||
login_user = (client_username or Soulseek.USERNAME or "").strip()
|
||||
login_pass = (client_password or Soulseek.PASSWORD or "").strip()
|
||||
if not login_user or not login_pass:
|
||||
raise RuntimeError(
|
||||
"Soulseek credentials not configured (set provider=soulseek username/password)"
|
||||
)
|
||||
|
||||
settings = Settings(
|
||||
credentials=CredentialsSettings(username=login_user,
|
||||
password=login_pass)
|
||||
)
|
||||
|
||||
async def _attempt_once(
|
||||
attempt_num: int
|
||||
) -> tuple[Optional[Path],
|
||||
Any,
|
||||
int,
|
||||
float]:
|
||||
client = SoulSeekClient(settings)
|
||||
with _suppress_aioslsk_noise():
|
||||
async with _suppress_aioslsk_asyncio_task_noise():
|
||||
try:
|
||||
await client.start()
|
||||
await client.login()
|
||||
debug(f"[soulseek] Logged in as {login_user}")
|
||||
|
||||
log(
|
||||
f"[soulseek] Download attempt {attempt_num}: {username} :: {local_filename}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
debug(
|
||||
f"[soulseek] Requesting download from {username}: {filename}"
|
||||
)
|
||||
|
||||
transfer = await client.transfers.add(
|
||||
Transfer(username,
|
||||
filename,
|
||||
TransferDirection.DOWNLOAD)
|
||||
)
|
||||
transfer.local_path = str(output_path)
|
||||
await client.transfers.queue(transfer)
|
||||
|
||||
start_time = time.time()
|
||||
last_progress_time = start_time
|
||||
progress_bar = ProgressBar()
|
||||
|
||||
while not transfer.is_finalized():
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > timeout:
|
||||
log(
|
||||
f"[soulseek] Download timeout after {timeout}s",
|
||||
file=sys.stderr
|
||||
)
|
||||
bytes_done = int(
|
||||
getattr(transfer,
|
||||
"bytes_transfered",
|
||||
0) or 0
|
||||
)
|
||||
state_val = getattr(
|
||||
getattr(transfer,
|
||||
"state",
|
||||
None),
|
||||
"VALUE",
|
||||
None
|
||||
)
|
||||
progress_bar.finish()
|
||||
return None, state_val, bytes_done, elapsed
|
||||
|
||||
bytes_done = int(
|
||||
getattr(transfer,
|
||||
"bytes_transfered",
|
||||
0) or 0
|
||||
)
|
||||
total_bytes = int(getattr(transfer, "filesize", 0) or 0)
|
||||
now = time.time()
|
||||
if now - last_progress_time >= 0.5:
|
||||
progress_bar.update(
|
||||
downloaded=bytes_done,
|
||||
total=total_bytes if total_bytes > 0 else None,
|
||||
label="download",
|
||||
file=sys.stderr,
|
||||
)
|
||||
last_progress_time = now
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
final_state = getattr(
|
||||
getattr(transfer,
|
||||
"state",
|
||||
None),
|
||||
"VALUE",
|
||||
None
|
||||
)
|
||||
downloaded_path = (
|
||||
Path(transfer.local_path)
|
||||
if getattr(transfer,
|
||||
"local_path",
|
||||
None) else output_path
|
||||
)
|
||||
final_elapsed = time.time() - start_time
|
||||
|
||||
# Clear in-place progress bar.
|
||||
progress_bar.finish()
|
||||
|
||||
# If a file was written, treat it as success even if state is odd.
|
||||
try:
|
||||
if downloaded_path.exists() and downloaded_path.stat(
|
||||
).st_size > 0:
|
||||
if final_state != TransferState.COMPLETE:
|
||||
log(
|
||||
f"[soulseek] Transfer finalized as {final_state}, but file exists ({downloaded_path.stat().st_size} bytes). Keeping file.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return (
|
||||
downloaded_path,
|
||||
final_state,
|
||||
int(downloaded_path.stat().st_size),
|
||||
final_elapsed,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if final_state == TransferState.COMPLETE and downloaded_path.exists(
|
||||
):
|
||||
debug(f"[soulseek] Download complete: {downloaded_path}")
|
||||
return (
|
||||
downloaded_path,
|
||||
final_state,
|
||||
int(downloaded_path.stat().st_size),
|
||||
final_elapsed,
|
||||
)
|
||||
|
||||
fail_bytes = int(getattr(transfer, "bytes_transfered", 0) or 0)
|
||||
fail_total = int(getattr(transfer, "filesize", 0) or 0)
|
||||
reason = getattr(transfer, "reason", None)
|
||||
log(
|
||||
f"[soulseek] Download failed: state={final_state} bytes={fail_bytes}/{fail_total} reason={reason}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Clean up 0-byte placeholder.
|
||||
try:
|
||||
if downloaded_path.exists() and downloaded_path.stat(
|
||||
).st_size == 0:
|
||||
downloaded_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
return None, final_state, fail_bytes, final_elapsed
|
||||
finally:
|
||||
try:
|
||||
await client.stop()
|
||||
except Exception:
|
||||
pass
|
||||
# Let cancellation/cleanup callbacks run while our exception handler is still installed.
|
||||
# Increased to 0.2s for Windows Proactor loop stability.
|
||||
try:
|
||||
await asyncio.sleep(0.2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Retry a couple times only for fast 0-byte failures (common transient case).
|
||||
max_attempts = 3
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
result_path, final_state, bytes_done, elapsed = await _attempt_once(attempt)
|
||||
if result_path:
|
||||
return result_path
|
||||
|
||||
should_retry = (bytes_done == 0) and (elapsed < 15.0)
|
||||
if attempt < max_attempts and should_retry:
|
||||
log(
|
||||
f"[soulseek] Retrying after fast failure (state={final_state})",
|
||||
file=sys.stderr
|
||||
)
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
break
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
log(
|
||||
"[soulseek] aioslsk not installed. Install with: pip install aioslsk",
|
||||
file=sys.stderr
|
||||
)
|
||||
return None
|
||||
except Exception as exc:
|
||||
log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr)
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,448 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from API.requests_client import get_requests_session
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.logger import debug, log
|
||||
try: # Preferred HTML parser
|
||||
from lxml import html as lxml_html
|
||||
except Exception: # pragma: no cover - optional
|
||||
lxml_html = None # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TorrentInfo:
|
||||
name: str
|
||||
url: str
|
||||
seeders: int
|
||||
leechers: int
|
||||
size: str
|
||||
source: str
|
||||
category: Optional[str] = None
|
||||
uploader: Optional[str] = None
|
||||
magnet: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchParams:
|
||||
name: str
|
||||
category: Optional[str] = None
|
||||
order_column: Optional[str] = None
|
||||
order_ascending: bool = False
|
||||
|
||||
|
||||
_MAGNET_RE = re.compile(r"^magnet", re.IGNORECASE)
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, name: str, base_url: str, timeout: float = 10.0) -> None:
|
||||
self.name = name
|
||||
self.base = base_url.rstrip("/")
|
||||
self.timeout = timeout
|
||||
self.headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
|
||||
)
|
||||
}
|
||||
self.params: Optional[SearchParams] = None
|
||||
|
||||
def find(self, params: SearchParams, pages: int = 1) -> List[TorrentInfo]:
|
||||
self.params = params
|
||||
results: List[TorrentInfo] = []
|
||||
for page in range(1, max(1, pages) + 1):
|
||||
try:
|
||||
results.extend(self._get_page(page))
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] page fetch failed: {exc}")
|
||||
return results
|
||||
|
||||
def _get_page(self, page: int) -> List[TorrentInfo]:
|
||||
url, payload = self._request_data(page)
|
||||
try:
|
||||
resp = get_requests_session().get(
|
||||
url,
|
||||
params=payload,
|
||||
headers=self.headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return self._parse_search(resp)
|
||||
except Exception as exc:
|
||||
debug(f"[{self.name}] request failed: {exc}")
|
||||
return []
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
return self.base, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]: # pragma: no cover - interface
|
||||
raise NotImplementedError
|
||||
|
||||
def _parse_detail(self, url: str) -> Optional[str]: # optional override
|
||||
try:
|
||||
resp = get_requests_session().get(url, headers=self.headers, timeout=self.timeout)
|
||||
resp.raise_for_status()
|
||||
return self._parse_detail_response(resp)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]: # pragma: no cover - interface
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _int_from_text(value: Any) -> int:
|
||||
try:
|
||||
return int(str(value).strip().replace(",", ""))
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
class NyaaScraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("nyaa.si", "https://nyaa.si")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"p": page,
|
||||
"q": params.name,
|
||||
"c": params.category or "0_0",
|
||||
"f": "0",
|
||||
}
|
||||
if params.order_column:
|
||||
payload["s"] = params.order_column
|
||||
payload["o"] = "asc" if params.order_ascending else "desc"
|
||||
return f"{self.base}/", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 7:
|
||||
continue
|
||||
category_cell, name_cell, links_cell, size_cell, _, seed_cell, leech_cell, *_ = cells
|
||||
|
||||
name_links = name_cell.xpath("./a")
|
||||
name_tag = name_links[1] if len(name_links) > 1 else (name_links[0] if name_links else None)
|
||||
if name_tag is None:
|
||||
continue
|
||||
|
||||
name = name_tag.get("title") or (name_tag.text_content() or "").strip()
|
||||
url = name_tag.get("href") or ""
|
||||
|
||||
magnet_link = None
|
||||
magnet_candidates = links_cell.xpath('.//a[starts-with(@href,"magnet:")]/@href')
|
||||
if magnet_candidates:
|
||||
magnet_link = magnet_candidates[0]
|
||||
|
||||
category_title = None
|
||||
cat_titles = category_cell.xpath(".//a/@title")
|
||||
if cat_titles:
|
||||
category_title = cat_titles[0]
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}{url}",
|
||||
seeders=self._int_from_text(seed_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip(),
|
||||
source=self.name,
|
||||
category=category_title,
|
||||
magnet=magnet_link,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class X1337Scraper(Scraper):
|
||||
def __init__(self) -> None:
|
||||
super().__init__("1337x.to", "https://1337x.to")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
order = None
|
||||
if params.order_column:
|
||||
direction = "asc" if params.order_ascending else "desc"
|
||||
order = f"{params.order_column}/{direction}"
|
||||
|
||||
category = params.category
|
||||
name = requests.utils.quote(params.name)
|
||||
|
||||
if order and category:
|
||||
path = f"/sort-category-search/{name}/{category}/{order}"
|
||||
elif category:
|
||||
path = f"/category-search/{name}/{category}"
|
||||
elif order:
|
||||
path = f"/sort-search/{name}/{order}"
|
||||
else:
|
||||
path = f"/search/{name}"
|
||||
|
||||
url = f"{self.base}{path}/{page}/"
|
||||
return url, {}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
if lxml_html is None:
|
||||
return []
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
rows = doc.xpath("//table//tbody/tr")
|
||||
results: List[TorrentInfo] = []
|
||||
for row in rows:
|
||||
cells = row.xpath("./td")
|
||||
if len(cells) < 6:
|
||||
continue
|
||||
name_cell, seeds_cell, leech_cell, _, size_cell, uploader_cell = cells
|
||||
|
||||
links = name_cell.xpath(".//a")
|
||||
if len(links) < 2:
|
||||
continue
|
||||
|
||||
torrent_path = links[1].get("href")
|
||||
torrent_url = f"{self.base}{torrent_path}" if torrent_path else ""
|
||||
|
||||
info = TorrentInfo(
|
||||
name=(links[1].text_content() or "").strip(),
|
||||
url=torrent_url,
|
||||
seeders=self._int_from_text(seeds_cell.text_content()),
|
||||
leechers=self._int_from_text(leech_cell.text_content()),
|
||||
size=(size_cell.text_content() or "").strip().replace(",", ""),
|
||||
source=self.name,
|
||||
uploader=(uploader_cell.text_content() or "").strip() if uploader_cell is not None else None,
|
||||
)
|
||||
|
||||
if not info.magnet:
|
||||
info.magnet = self._parse_detail(info.url)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _parse_detail_response(self, response: requests.Response) -> Optional[str]:
|
||||
if lxml_html is None:
|
||||
return None
|
||||
doc = lxml_html.fromstring(response.text)
|
||||
links = doc.xpath("//main//a[starts-with(@href,'magnet:')]/@href")
|
||||
return links[0] if links else None
|
||||
|
||||
|
||||
class YTSScraper(Scraper):
|
||||
TRACKERS = "&tr=".join(
|
||||
[
|
||||
"udp://open.demonii.com:1337/announce",
|
||||
"udp://tracker.opentrackr.org:1337/announce",
|
||||
"udp://tracker.leechers-paradise.org:6969",
|
||||
]
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("yts.mx", "https://yts.mx/api/v2")
|
||||
self.headers = {}
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
params = self.params or SearchParams(name="")
|
||||
payload = {
|
||||
"limit": 50,
|
||||
"page": page,
|
||||
"query_term": params.name,
|
||||
"sort_by": "seeds",
|
||||
"order_by": "desc" if not params.order_ascending else "asc",
|
||||
}
|
||||
return f"{self.base}/list_movies.json", payload
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
data = response.json()
|
||||
if data.get("status") != "ok":
|
||||
return results
|
||||
movies = (data.get("data") or {}).get("movies") or []
|
||||
for movie in movies:
|
||||
torrents = movie.get("torrents") or []
|
||||
if not torrents:
|
||||
continue
|
||||
tor = max(torrents, key=lambda t: t.get("seeds", 0))
|
||||
name = movie.get("title") or "unknown"
|
||||
info = TorrentInfo(
|
||||
name=name,
|
||||
url=str(movie.get("id") or ""),
|
||||
seeders=int(tor.get("seeds", 0) or 0),
|
||||
leechers=int(tor.get("peers", 0) or 0),
|
||||
size=str(tor.get("size") or ""),
|
||||
source=self.name,
|
||||
category=(movie.get("genres") or [None])[0],
|
||||
magnet=self._build_magnet(tor, name),
|
||||
)
|
||||
results.append(info)
|
||||
return results
|
||||
|
||||
def _build_magnet(self, torrent: Dict[str, Any], name: str) -> str:
|
||||
return (
|
||||
f"magnet:?xt=urn:btih:{torrent.get('hash')}"
|
||||
f"&dn={requests.utils.quote(name)}&tr={self.TRACKERS}"
|
||||
)
|
||||
|
||||
|
||||
class ApiBayScraper(Scraper):
|
||||
"""Scraper for apibay.org (The Pirate Bay API clone)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("apibay.org", "https://apibay.org")
|
||||
|
||||
def _request_data(self, page: int) -> tuple[str, Dict[str, Any]]:
|
||||
_ = page # single-page API
|
||||
params = self.params or SearchParams(name="")
|
||||
return f"{self.base}/q.php", {"q": params.name}
|
||||
|
||||
def _parse_search(self, response: requests.Response) -> List[TorrentInfo]:
|
||||
results: List[TorrentInfo] = []
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception:
|
||||
return results
|
||||
if not isinstance(data, list):
|
||||
return results
|
||||
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
name = str(item.get("name") or "").strip()
|
||||
info_hash = str(item.get("info_hash") or "").strip()
|
||||
if not name or not info_hash:
|
||||
continue
|
||||
|
||||
magnet = self._build_magnet(info_hash, name)
|
||||
seeders = self._int_from_text(item.get("seeders"))
|
||||
leechers = self._int_from_text(item.get("leechers"))
|
||||
size_raw = str(item.get("size") or "").strip()
|
||||
size_fmt = self._format_size(size_raw)
|
||||
|
||||
results.append(
|
||||
TorrentInfo(
|
||||
name=name,
|
||||
url=f"{self.base}/description.php?id={item.get('id')}",
|
||||
seeders=seeders,
|
||||
leechers=leechers,
|
||||
size=size_fmt,
|
||||
source=self.name,
|
||||
category=str(item.get("category") or ""),
|
||||
uploader=str(item.get("username") or ""),
|
||||
magnet=magnet,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _build_magnet(info_hash: str, name: str) -> str:
|
||||
return f"magnet:?xt=urn:btih:{info_hash}&dn={requests.utils.quote(name)}"
|
||||
|
||||
@staticmethod
|
||||
def _format_size(size_raw: str) -> str:
|
||||
try:
|
||||
size_int = int(size_raw)
|
||||
if size_int <= 0:
|
||||
return size_raw
|
||||
gb = size_int / (1024 ** 3)
|
||||
if gb >= 1:
|
||||
return f"{gb:.1f} GB"
|
||||
mb = size_int / (1024 ** 2)
|
||||
return f"{mb:.1f} MB"
|
||||
except Exception:
|
||||
return size_raw
|
||||
|
||||
|
||||
class Torrent(Provider):
|
||||
TABLE_AUTO_STAGES = {"torrent": ["download-file"]}
|
||||
|
||||
@property
|
||||
def preserve_order(self) -> bool:
|
||||
return True
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
||||
super().__init__(config)
|
||||
self.scrapers: List[Scraper] = []
|
||||
# JSON APIs (no lxml dependency)
|
||||
self.scrapers.append(ApiBayScraper())
|
||||
self.scrapers.append(YTSScraper())
|
||||
# HTML scrapers require lxml
|
||||
if lxml_html is not None:
|
||||
self.scrapers.append(NyaaScraper())
|
||||
self.scrapers.append(X1337Scraper())
|
||||
else:
|
||||
log("[torrent] lxml not installed; skipping Nyaa/1337x scrapers", file=None)
|
||||
|
||||
def validate(self) -> bool:
|
||||
return bool(self.scrapers)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**_kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
params = SearchParams(name=q, order_column="seeders", order_ascending=False)
|
||||
results: List[TorrentInfo] = []
|
||||
|
||||
for scraper in self.scrapers:
|
||||
try:
|
||||
scraped = scraper.find(params, pages=1)
|
||||
results.extend(scraped)
|
||||
except Exception as exc:
|
||||
debug(f"[torrent] scraper {scraper.name} failed: {exc}")
|
||||
continue
|
||||
|
||||
results = sorted(results, key=lambda r: r.seeders, reverse=True)
|
||||
if limit and limit > 0:
|
||||
results = results[:limit]
|
||||
|
||||
out: List[SearchResult] = []
|
||||
for item in results:
|
||||
path = item.magnet or item.url
|
||||
columns = [
|
||||
("TITLE", item.name),
|
||||
("Seeds", str(item.seeders)),
|
||||
("Leechers", str(item.leechers)),
|
||||
("Size", item.size or ""),
|
||||
("Source", item.source),
|
||||
]
|
||||
if item.uploader:
|
||||
columns.append(("Uploader", item.uploader))
|
||||
|
||||
md = {
|
||||
"magnet": item.magnet,
|
||||
"url": item.url,
|
||||
"source": item.source,
|
||||
"seeders": item.seeders,
|
||||
"leechers": item.leechers,
|
||||
"size": item.size,
|
||||
}
|
||||
if item.uploader:
|
||||
md["uploader"] = item.uploader
|
||||
|
||||
out.append(
|
||||
SearchResult(
|
||||
table="torrent",
|
||||
title=item.name,
|
||||
path=path,
|
||||
detail=f"Seeds:{item.seeders} | Size:{item.size}",
|
||||
annotations=[item.source],
|
||||
media_kind="other",
|
||||
columns=columns,
|
||||
full_metadata=md,
|
||||
tag={"torrent"},
|
||||
)
|
||||
)
|
||||
return out
|
||||
@@ -1,870 +0,0 @@
|
||||
"""Minimal Vimm provider: table-row parsing for display.
|
||||
|
||||
This minimal implementation focuses on fetching a Vimm search result page,
|
||||
turning the vault table rows into SearchResults, and letting the CLI
|
||||
auto-insert the download-file stage directly from the first table so that
|
||||
Playwright-driven downloads happen without showing a nested detail table.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import parse_qsl, parse_qs, urljoin, urlparse, urlunparse, urlencode
|
||||
from lxml import html as lxml_html
|
||||
import base64
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import Provider, SearchResult, parse_inline_query_arguments
|
||||
from ProviderCore.inline_utils import resolve_filter
|
||||
from SYS.logger import debug
|
||||
from SYS.provider_helpers import TableProviderMixin
|
||||
from tool.playwright import PlaywrightTool
|
||||
|
||||
|
||||
class Vimm(TableProviderMixin, Provider):
|
||||
"""Minimal provider for vimm.net vault listings using TableProvider mixin.
|
||||
|
||||
NOTES / HOW-TO (selection & auto-download):
|
||||
- This provider exposes file rows on a detail page. Each file row includes
|
||||
a `path` which is an absolute download URL (or a form action + mediaId).
|
||||
|
||||
- To make `@N` expansion robust (so users can do `@1 | add-file -store <x>`)
|
||||
we ensure three things:
|
||||
1) The ResultTable produced by the `selector()` sets `source_command` to
|
||||
"download-file" (the canonical cmdlet for downloading files).
|
||||
2) Each row carries explicit selection args: `['-url', '<full-url>']`.
|
||||
Using an explicit `-url` flag avoids ambiguity during argument
|
||||
parsing (some cmdlets accept positional URLs, others accept flags).
|
||||
3) The CLI's expansion logic places selection args *before* plugin
|
||||
source args (e.g., `-plugin vimm`) so the first positional token is
|
||||
the intended URL (not an unknown flag like `-plugin`).
|
||||
|
||||
- Why this approach? Argument parsing treats the *first* unrecognized token
|
||||
as a positional value (commonly interpreted as a URL). If a plugin
|
||||
injects hints like `-plugin vimm` *before* a bare URL, the parser can
|
||||
misinterpret `-plugin` as the URL, causing confusing attempts to
|
||||
download `-plugin`. By using `-url` and ensuring the URL appears first
|
||||
we avoid that class of bugs and make `@N` -> `download-file`/`add-file`
|
||||
flows reliable.
|
||||
|
||||
The code below implements these choices (and contains inline comments
|
||||
explaining specific decisions)."""
|
||||
|
||||
URL = ("https://vimm.net/vault/",)
|
||||
URL_DOMAINS = ("vimm.net",)
|
||||
|
||||
def get_source_command(self, args_list: List[str]) -> Tuple[str, List[str]]:
|
||||
return "search-file", ["-plugin", self.name]
|
||||
|
||||
REGION_CHOICES = [
|
||||
{"value": "1", "text": "Argentina"},
|
||||
{"value": "2", "text": "Asia"},
|
||||
{"value": "3", "text": "Australia"},
|
||||
{"value": "35", "text": "Austria"},
|
||||
{"value": "31", "text": "Belgium"},
|
||||
{"value": "4", "text": "Brazil"},
|
||||
{"value": "5", "text": "Canada"},
|
||||
{"value": "6", "text": "China"},
|
||||
{"value": "38", "text": "Croatia"},
|
||||
{"value": "7", "text": "Denmark"},
|
||||
{"value": "8", "text": "Europe"},
|
||||
{"value": "9", "text": "Finland"},
|
||||
{"value": "10", "text": "France"},
|
||||
{"value": "11", "text": "Germany"},
|
||||
{"value": "12", "text": "Greece"},
|
||||
{"value": "13", "text": "Hong Kong"},
|
||||
{"value": "27", "text": "India"},
|
||||
{"value": "33", "text": "Ireland"},
|
||||
{"value": "34", "text": "Israel"},
|
||||
{"value": "14", "text": "Italy"},
|
||||
{"value": "15", "text": "Japan"},
|
||||
{"value": "16", "text": "Korea"},
|
||||
{"value": "30", "text": "Latin America"},
|
||||
{"value": "17", "text": "Mexico"},
|
||||
{"value": "18", "text": "Netherlands"},
|
||||
{"value": "40", "text": "New Zealand"},
|
||||
{"value": "19", "text": "Norway"},
|
||||
{"value": "28", "text": "Poland"},
|
||||
{"value": "29", "text": "Portugal"},
|
||||
{"value": "20", "text": "Russia"},
|
||||
{"value": "32", "text": "Scandinavia"},
|
||||
{"value": "37", "text": "South Africa"},
|
||||
{"value": "21", "text": "Spain"},
|
||||
{"value": "22", "text": "Sweden"},
|
||||
{"value": "36", "text": "Switzerland"},
|
||||
{"value": "23", "text": "Taiwan"},
|
||||
{"value": "39", "text": "Turkey"},
|
||||
{"value": "41", "text": "United Arab Emirates"},
|
||||
{"value": "24", "text": "United Kingdom"},
|
||||
{"value": "25", "text": "USA"},
|
||||
{"value": "26", "text": "World"},
|
||||
]
|
||||
|
||||
QUERY_ARG_CHOICES = {
|
||||
"system": [
|
||||
"Atari2600",
|
||||
"Atari5200",
|
||||
"Atari7800",
|
||||
"CDi",
|
||||
"Dreamcast",
|
||||
"GB",
|
||||
"GBA",
|
||||
"GBC",
|
||||
"GG",
|
||||
"GameCube",
|
||||
"Genesis",
|
||||
"Jaguar",
|
||||
"JaguarCD",
|
||||
"Lynx",
|
||||
"SMS",
|
||||
"NES",
|
||||
"3DS",
|
||||
"N64",
|
||||
"DS",
|
||||
"PS1",
|
||||
"PS2",
|
||||
"PS3",
|
||||
"PSP",
|
||||
"Saturn",
|
||||
"32X",
|
||||
"SegaCD",
|
||||
"SNES",
|
||||
"TG16",
|
||||
"TGCD",
|
||||
"VB",
|
||||
"Wii",
|
||||
"WiiWare",
|
||||
"Xbox",
|
||||
"Xbox360",
|
||||
"X360-D",
|
||||
],
|
||||
"region": REGION_CHOICES,
|
||||
}
|
||||
# ProviderCore still looks for INLINE_QUERY_FIELD_CHOICES, so expose this
|
||||
# mapping once and keep QUERY_ARG_CHOICES as the readable name we prefer.
|
||||
INLINE_QUERY_FIELD_CHOICES = QUERY_ARG_CHOICES
|
||||
|
||||
# Table metadata/constants grouped near the table helpers below.
|
||||
TABLE_AUTO_STAGES = {"vimm": ["download-file"]}
|
||||
AUTO_STAGE_USE_SELECTION_ARGS = True
|
||||
TABLE_SYSTEM_COLUMN = {"label": "Platform", "metadata_key": "system"}
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(self, query: str, limit: int = 50, filters: Optional[Dict[str, Any]] = None, **kwargs: Any) -> List[SearchResult]:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
base = "https://vimm.net/vault/"
|
||||
normalized_filters: Dict[str, Any] = {}
|
||||
for key, value in (filters or {}).items():
|
||||
if key is None:
|
||||
continue
|
||||
normalized_filters[str(key).lower()] = value
|
||||
|
||||
system_value = normalized_filters.get("system") or normalized_filters.get("platform")
|
||||
system_param = str(system_value or "").strip()
|
||||
|
||||
region_value = normalized_filters.get("region")
|
||||
region_param = str(region_value or "").strip()
|
||||
|
||||
params = [("p", "list"), ("q", q)]
|
||||
if system_param:
|
||||
params.append(("system", system_param))
|
||||
if region_param:
|
||||
params.append(("region", region_param))
|
||||
url = f"{base}?{urlencode(params)}"
|
||||
debug(f"[vimm] search: query={q} url={url} filters={normalized_filters}")
|
||||
|
||||
try:
|
||||
with HTTPClient(timeout=9.0) as client:
|
||||
resp = client.get(url)
|
||||
content = resp.content
|
||||
except Exception as exc:
|
||||
debug(f"[vimm] HTTP fetch failed: {exc}")
|
||||
return []
|
||||
|
||||
try:
|
||||
doc = lxml_html.fromstring(content)
|
||||
except Exception as exc:
|
||||
debug(f"[vimm] HTML parse failed: {exc}")
|
||||
return []
|
||||
|
||||
xpaths = [
|
||||
"//table//tbody/tr",
|
||||
"//table//tr[td]",
|
||||
"//div[contains(@class,'list-item')]",
|
||||
"//div[contains(@class,'result')]",
|
||||
"//li[contains(@class,'item')]",
|
||||
]
|
||||
|
||||
rows = doc.xpath("//table//tr[td]")
|
||||
results = self._build_results_from_rows(rows, url, system_param, limit)
|
||||
if not results:
|
||||
results = self.search_table_from_url(url, limit=limit, xpaths=xpaths)
|
||||
self._ensure_system_column(results, system_param)
|
||||
|
||||
results = [self._apply_selection_defaults(r, referer=url, detail_url=getattr(r, "path", "")) for r in (results or [])]
|
||||
|
||||
debug(f"[vimm] results={len(results)}")
|
||||
return results[: int(limit)]
|
||||
|
||||
def extract_query_arguments(self, query: str) -> Tuple[str, Dict[str, Any]]:
|
||||
normalized, inline_args = parse_inline_query_arguments(query)
|
||||
inline_args_norm: Dict[str, Any] = {}
|
||||
for k, v in (inline_args or {}).items():
|
||||
if k is None:
|
||||
continue
|
||||
key_norm = str(k).strip().lower()
|
||||
if key_norm == "platform":
|
||||
key_norm = "system"
|
||||
inline_args_norm[key_norm] = v
|
||||
|
||||
filters = resolve_filter(self, inline_args_norm)
|
||||
return normalized, filters
|
||||
|
||||
def _build_results_from_rows(
|
||||
self,
|
||||
rows: List[Any],
|
||||
base_url: str,
|
||||
system_value: Optional[str],
|
||||
limit: int,
|
||||
) -> List[SearchResult]:
|
||||
out: List[SearchResult] = []
|
||||
seen: set[str] = set()
|
||||
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
|
||||
key = str(system_column.get("metadata_key") or "system").strip()
|
||||
if not key:
|
||||
key = "system"
|
||||
|
||||
for tr in rows:
|
||||
if len(out) >= limit:
|
||||
break
|
||||
rec = self._parse_table_row(tr, base_url, system_value)
|
||||
if not rec:
|
||||
continue
|
||||
path = rec.get("path")
|
||||
if not path or path in seen:
|
||||
continue
|
||||
seen.add(path)
|
||||
columns = self._build_columns_from_record(rec)
|
||||
if not columns:
|
||||
continue
|
||||
metadata: Dict[str, Any] = {"raw_record": rec, "detail_url": path, "referer": base_url}
|
||||
if path:
|
||||
metadata["_selection_args"] = ["-url", path]
|
||||
platform_value = rec.get("platform")
|
||||
if platform_value:
|
||||
metadata[key] = platform_value
|
||||
sr = SearchResult(
|
||||
table="vimm",
|
||||
title=rec.get("title") or "",
|
||||
path=path,
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="file",
|
||||
size_bytes=None,
|
||||
tag={"vimm"},
|
||||
columns=columns,
|
||||
full_metadata=metadata,
|
||||
)
|
||||
out.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=path))
|
||||
return out
|
||||
|
||||
def _parse_table_row(self, tr: Any, base_url: str, system_value: Optional[str]) -> Dict[str, str]:
|
||||
tds = tr.xpath("./td")
|
||||
if not tds:
|
||||
return {}
|
||||
|
||||
rec: Dict[str, str] = {}
|
||||
title_anchor = tds[0].xpath('.//a[contains(@href,"/vault/")]') or []
|
||||
if title_anchor:
|
||||
el = title_anchor[0]
|
||||
rec["title"] = (el.text_content() or "").strip()
|
||||
href = el.get("href") or ""
|
||||
rec["path"] = urljoin(base_url, href) if href else ""
|
||||
if system_value:
|
||||
rec["platform"] = str(system_value).strip().upper()
|
||||
rec["region"] = self._flag_text_at(tds, 1)
|
||||
rec["version"] = self._text_at(tds, 2)
|
||||
rec["languages"] = self._text_at(tds, 3)
|
||||
else:
|
||||
raw_platform = (tds[0].text_content() or "").strip()
|
||||
if raw_platform:
|
||||
rec["platform"] = raw_platform.upper()
|
||||
anchors = tds[1].xpath('.//a[contains(@href,"/vault/")]') or tds[1].xpath('.//a')
|
||||
if not anchors:
|
||||
return {}
|
||||
el = anchors[0]
|
||||
rec["title"] = (el.text_content() or "").strip()
|
||||
href = el.get("href") or ""
|
||||
rec["path"] = urljoin(base_url, href) if href else ""
|
||||
rec["region"] = self._flag_text_at(tds, 2)
|
||||
rec["version"] = self._text_at(tds, 3)
|
||||
rec["languages"] = self._text_at(tds, 4)
|
||||
|
||||
return {k: v for k, v in rec.items() if v}
|
||||
|
||||
def _text_at(self, tds: List[Any], idx: int) -> str:
|
||||
if idx < 0 or idx >= len(tds):
|
||||
return ""
|
||||
return (tds[idx].text_content() or "").strip()
|
||||
|
||||
def _flag_text_at(self, tds: List[Any], idx: int) -> str:
|
||||
if idx < 0 or idx >= len(tds):
|
||||
return ""
|
||||
td = tds[idx]
|
||||
imgs = td.xpath('.//img[contains(@class,"flag")]/@title')
|
||||
if imgs:
|
||||
return str(imgs[0]).strip()
|
||||
return (td.text_content() or "").strip()
|
||||
|
||||
def _build_columns_from_record(self, rec: Dict[str, str]) -> List[Tuple[str, str]]:
|
||||
title = rec.get("title") or ""
|
||||
if not title:
|
||||
return []
|
||||
columns: List[Tuple[str, str]] = [("Title", title)]
|
||||
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
|
||||
label = str(system_column.get("label") or "Platform")
|
||||
platform_value = rec.get("platform")
|
||||
if platform_value:
|
||||
columns.append((label, platform_value))
|
||||
for key, friendly in (("region", "Region"), ("version", "Version"), ("languages", "Languages")):
|
||||
value = rec.get(key)
|
||||
if value:
|
||||
columns.append((friendly, value))
|
||||
return columns
|
||||
|
||||
def _apply_selection_defaults(self, sr: SearchResult, *, referer: Optional[str], detail_url: Optional[str]) -> SearchResult:
|
||||
"""Attach selection metadata so @N expansion passes a usable URL first."""
|
||||
|
||||
try:
|
||||
md = dict(getattr(sr, "full_metadata", {}) or {})
|
||||
except Exception:
|
||||
md = {}
|
||||
|
||||
path_val = str(getattr(sr, "path", "") or "")
|
||||
if not path_val:
|
||||
path_val = str(detail_url or "")
|
||||
|
||||
if path_val:
|
||||
md.setdefault("_selection_args", ["-url", path_val])
|
||||
md.setdefault("detail_url", detail_url or path_val)
|
||||
if referer:
|
||||
md.setdefault("referer", referer)
|
||||
|
||||
sr.full_metadata = md
|
||||
return sr
|
||||
|
||||
def _ensure_system_column(self, results: List[SearchResult], system_value: Optional[str]) -> None:
|
||||
if not results or not system_value:
|
||||
return
|
||||
label_value = str(system_value).strip()
|
||||
if not label_value:
|
||||
return
|
||||
label_value = label_value.upper()
|
||||
system_column = getattr(self, "TABLE_SYSTEM_COLUMN", {}) or {}
|
||||
label_name = str(system_column.get("label") or "Platform").strip()
|
||||
if not label_name:
|
||||
label_name = "Platform"
|
||||
normalized_label = label_name.strip().lower()
|
||||
metadata_key = str(system_column.get("metadata_key") or "system").strip()
|
||||
if not metadata_key:
|
||||
metadata_key = "system"
|
||||
for result in results:
|
||||
try:
|
||||
cols = getattr(result, "columns", None)
|
||||
if isinstance(cols, list):
|
||||
lowered = {str(name or "").strip().lower() for name, _ in cols}
|
||||
if normalized_label not in lowered:
|
||||
insert_pos = 1 if cols else 0
|
||||
cols.insert(insert_pos, (label_name, label_value))
|
||||
metadata = getattr(result, "full_metadata", None)
|
||||
if isinstance(metadata, dict):
|
||||
metadata.setdefault(metadata_key, label_value)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _parse_detail_doc(self, doc, base_url: str) -> List[Any]:
|
||||
"""Parse a Vimm detail page (non-standard table layout) and return a list
|
||||
of SearchResult or dict payloads suitable for `ResultTable.add_result()`.
|
||||
|
||||
The function extracts simple key/value rows and file download entries (anchors
|
||||
or download forms) and returns property dicts first followed by file SearchResults.
|
||||
"""
|
||||
def _build_download_url(action_url: str, params: Dict[str, str]) -> str:
|
||||
if not action_url:
|
||||
return ""
|
||||
if not params:
|
||||
return action_url
|
||||
cleaned = {k: str(v) for k, v in params.items() if v is not None and str(v) != ""}
|
||||
if not cleaned:
|
||||
return action_url
|
||||
parsed = urlparse(action_url)
|
||||
existing = dict(parse_qsl(parsed.query, keep_blank_values=True))
|
||||
existing.update(cleaned)
|
||||
query = urlencode(existing, doseq=True)
|
||||
return urlunparse(parsed._replace(query=query))
|
||||
|
||||
try:
|
||||
# Prefer the compact 'rounded' detail table when present
|
||||
tables = doc.xpath('//table[contains(@class,"rounded") and contains(@class,"cellpadding1")]') or doc.xpath('//table[contains(@class,"rounded")]')
|
||||
if not tables:
|
||||
return []
|
||||
|
||||
tbl = tables[0]
|
||||
trs = tbl.xpath('.//tr') or []
|
||||
|
||||
# Aggregate page properties into a mapping and create file rows with Title, Region, CRC, Version
|
||||
props: Dict[str, Any] = {}
|
||||
anchors_by_label: Dict[str, List[Dict[str, str]]] = {}
|
||||
|
||||
for tr in trs:
|
||||
try:
|
||||
if tr.xpath('.//hr'):
|
||||
continue
|
||||
tds = tr.xpath('./td')
|
||||
if not tds:
|
||||
continue
|
||||
|
||||
# Canvas-based title row (base64 encoded in data-v)
|
||||
canvas = tr.xpath('.//canvas[@data-v]')
|
||||
if canvas:
|
||||
data_v = canvas[0].get('data-v') or ''
|
||||
try:
|
||||
raw = base64.b64decode(data_v)
|
||||
txt = raw.decode('utf-8', errors='ignore').strip()
|
||||
except Exception:
|
||||
txt = (canvas[0].text_content() or '').strip()
|
||||
if txt:
|
||||
props['Title'] = txt
|
||||
continue
|
||||
|
||||
label = (tds[0].text_content() or '').strip()
|
||||
if not label:
|
||||
continue
|
||||
val_td = tds[-1]
|
||||
|
||||
# collect anchors under this label for later detection
|
||||
anchors = val_td.xpath('.//a')
|
||||
if anchors:
|
||||
entries = []
|
||||
for a in anchors:
|
||||
entries.append({'text': (a.text_content() or '').strip(), 'href': a.get('href') or ''})
|
||||
# try to capture any explicit span value (e.g., CRC) even if an anchor exists
|
||||
span_data = val_td.xpath('.//span[@id]/text()')
|
||||
if span_data:
|
||||
props[label] = str(span_data[0]).strip()
|
||||
else:
|
||||
# fallback to direct text nodes excluding anchor text
|
||||
txts = [t.strip() for t in val_td.xpath('.//text()') if t.strip()]
|
||||
anchor_texts = [a.text_content().strip() for a in anchors if a.text_content()]
|
||||
filtered = [t for t in txts if t not in anchor_texts]
|
||||
if filtered:
|
||||
props[label] = filtered[0]
|
||||
anchors_by_label[label] = entries
|
||||
continue
|
||||
|
||||
img_title = val_td.xpath('.//img/@title')
|
||||
if img_title:
|
||||
val = str(img_title[0]).strip()
|
||||
else:
|
||||
span_data = val_td.xpath('.//span[@id]/text()')
|
||||
if span_data:
|
||||
val = str(span_data[0]).strip()
|
||||
else:
|
||||
opt = val_td.xpath('.//select/option[@selected]/text()')
|
||||
if opt:
|
||||
val = str(opt[0]).strip()
|
||||
else:
|
||||
vt = val_td.xpath('.//div[@id="version_text"]/text()')
|
||||
if vt:
|
||||
val = vt[0].strip()
|
||||
else:
|
||||
val = (val_td.text_content() or '').strip()
|
||||
|
||||
props[label] = val
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Download form handling: find action, mediaId, and dl_size
|
||||
form = doc.xpath('//form[@id="dl_form"]')
|
||||
action = ''
|
||||
media_id = None
|
||||
dl_size = None
|
||||
form_inputs: Dict[str, str] = {}
|
||||
download_url = ''
|
||||
if form:
|
||||
f = form[0]
|
||||
action = f.get('action') or ''
|
||||
if action.startswith('//'):
|
||||
action = 'https:' + action
|
||||
elif action.startswith('/'):
|
||||
action = urljoin(base_url, action)
|
||||
media_ids = f.xpath('.//input[@name="mediaId"]/@value')
|
||||
media_id = media_ids[0] if media_ids else None
|
||||
size_vals = doc.xpath('//td[@id="dl_size"]/text()')
|
||||
dl_size = size_vals[0].strip() if size_vals else None
|
||||
inputs = f.xpath('.//input[@name]')
|
||||
for inp in inputs:
|
||||
name = (inp.get('name') or '').strip()
|
||||
if not name:
|
||||
continue
|
||||
form_inputs[name] = inp.get('value') or ''
|
||||
download_url = _build_download_url(action, form_inputs)
|
||||
|
||||
file_results: List[SearchResult] = []
|
||||
|
||||
# Create file rows from anchors that look like downloads
|
||||
for lbl, alist in anchors_by_label.items():
|
||||
for a in alist:
|
||||
href = a.get('href') or ''
|
||||
txt = a.get('text') or ''
|
||||
is_download_link = False
|
||||
if href:
|
||||
low = href.lower()
|
||||
if 'p=download' in low or '/download' in low or '/dl' in low:
|
||||
is_download_link = True
|
||||
for ext in ('.zip', '.nes', '.gba', '.bin', '.7z', '.iso'):
|
||||
if low.endswith(ext):
|
||||
is_download_link = True
|
||||
break
|
||||
if txt and re.search(r"\.[a-z0-9]{1,5}$", txt, re.I):
|
||||
is_download_link = True
|
||||
if not is_download_link:
|
||||
continue
|
||||
|
||||
title = txt or props.get('Title') or ''
|
||||
path = urljoin(base_url, href) if href else ''
|
||||
cols = [("Title", title), ("Region", props.get('Region', '')), ("CRC", props.get('CRC', '')), ("Version", props.get('Version', ''))]
|
||||
if dl_size:
|
||||
cols.append(("Size", dl_size))
|
||||
metadata: Dict[str, Any] = {"raw_record": {"label": lbl}}
|
||||
if base_url:
|
||||
metadata["referer"] = base_url
|
||||
metadata.setdefault("detail_url", base_url)
|
||||
sr = SearchResult(table="vimm", title=title, path=path, detail="", annotations=[], media_kind="file", size_bytes=None, tag={"vimm"}, columns=cols, full_metadata=metadata)
|
||||
file_results.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=base_url))
|
||||
|
||||
# If no explicit file anchors, but we have a form, create a single file entry using page properties
|
||||
if not file_results and (media_id or action):
|
||||
# Ensure CRC is captured even if earlier parsing missed it
|
||||
if not props.get('CRC'):
|
||||
try:
|
||||
crc_vals = doc.xpath('//span[@id="data-crc"]/text()')
|
||||
if crc_vals:
|
||||
props['CRC'] = str(crc_vals[0]).strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
title = props.get('Title') or ''
|
||||
cols = [("Title", title), ("Region", props.get('Region', '')), ("CRC", props.get('CRC', '')), ("Version", props.get('Version', ''))]
|
||||
if dl_size:
|
||||
cols.append(("Size", dl_size))
|
||||
target_path = download_url or action or base_url
|
||||
sr = SearchResult(
|
||||
table="vimm",
|
||||
title=title,
|
||||
path=target_path,
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="file",
|
||||
size_bytes=None,
|
||||
tag={"vimm"},
|
||||
columns=cols,
|
||||
full_metadata={
|
||||
"mediaId": media_id,
|
||||
"dl_action": action,
|
||||
"download_url": download_url,
|
||||
"form_params": dict(form_inputs),
|
||||
"referer": base_url,
|
||||
"raw_props": props,
|
||||
},
|
||||
)
|
||||
file_results.append(self._apply_selection_defaults(sr, referer=base_url, detail_url=base_url))
|
||||
|
||||
# Attach mediaId/dl_action to file rows
|
||||
if file_results and (media_id or action):
|
||||
for fi in file_results:
|
||||
try:
|
||||
fi.full_metadata = dict(getattr(fi, 'full_metadata', {}) or {})
|
||||
if media_id:
|
||||
fi.full_metadata['mediaId'] = media_id
|
||||
if action:
|
||||
fi.full_metadata['dl_action'] = action
|
||||
if form_inputs:
|
||||
fi.full_metadata.setdefault('form_params', dict(form_inputs))
|
||||
if download_url:
|
||||
fi.full_metadata['download_url'] = download_url
|
||||
if dl_size and not any((k.lower() == 'size') for k, _ in getattr(fi, 'columns', [])):
|
||||
fi.columns.append(("Size", dl_size))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Return only file rows (properties are attached as columns)
|
||||
return file_results
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def _fetch_detail_rows(self, detail_url: str) -> List[SearchResult]:
|
||||
"""Fetch the detail page for a selected row and return the parsed file rows."""
|
||||
|
||||
detail_url = str(detail_url or "").strip()
|
||||
if not detail_url:
|
||||
return []
|
||||
try:
|
||||
with HTTPClient(timeout=9.0) as client:
|
||||
resp = client.get(detail_url)
|
||||
doc = lxml_html.fromstring(resp.content)
|
||||
except Exception as exc:
|
||||
debug(f"[vimm] detail fetch failed: {exc}")
|
||||
return []
|
||||
return self._parse_detail_doc(doc, base_url=detail_url)
|
||||
|
||||
def _download_from_payload(self, payload: Dict[str, Any], output_dir: Path) -> Optional[Path]:
|
||||
"""Download using the metadata/form data stored in a SearchResult payload."""
|
||||
|
||||
try:
|
||||
d = payload or {}
|
||||
fm = d.get("full_metadata") or {}
|
||||
media_id = fm.get("mediaId") or fm.get("media_id")
|
||||
base_action = fm.get("dl_action") or d.get("path") or ""
|
||||
download_url = fm.get("download_url")
|
||||
params = dict(fm.get("form_params") or {})
|
||||
if media_id:
|
||||
params.setdefault("mediaId", media_id)
|
||||
target = download_url or base_action
|
||||
if not target:
|
||||
return None
|
||||
if target.startswith("//"):
|
||||
target = "https:" + target
|
||||
|
||||
# Avoid downloading HTML detail pages directly; let detail parsing handle them.
|
||||
low_target = target.lower()
|
||||
if ("vimm.net/vault" in low_target or "?p=list" in low_target) and not download_url and not media_id and not params:
|
||||
return None
|
||||
|
||||
referer = fm.get("referer") or d.get("referer") or d.get("detail_url")
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
if not referer:
|
||||
try:
|
||||
from SYS.pipeline import get_last_result_items
|
||||
|
||||
items = get_last_result_items() or []
|
||||
try:
|
||||
parsed_target = urlparse(target)
|
||||
target_qs = parse_qs(parsed_target.query)
|
||||
target_media = None
|
||||
if isinstance(target_qs, dict):
|
||||
target_media = (target_qs.get("mediaId") or target_qs.get("mediaid") or [None])[0]
|
||||
if target_media is not None:
|
||||
target_media = str(target_media)
|
||||
except Exception:
|
||||
target_media = None
|
||||
|
||||
found = None
|
||||
for it in items:
|
||||
try:
|
||||
it_d = it if isinstance(it, dict) else (it.to_dict() if hasattr(it, "to_dict") else {})
|
||||
fm2 = (it_d.get("full_metadata") or {}) if isinstance(it_d, dict) else {}
|
||||
dl_cand = (fm2.get("download_url") or fm2.get("dl_action") or it_d.get("path"))
|
||||
if target_media:
|
||||
m2 = None
|
||||
if isinstance(fm2, dict):
|
||||
m2 = str(fm2.get("mediaId") or fm2.get("media_id") or "")
|
||||
if m2 and m2 == target_media:
|
||||
found = it_d
|
||||
break
|
||||
if dl_cand and str(dl_cand).strip() and (str(dl_cand).strip() == str(target).strip() or str(dl_cand) in str(target) or str(target) in str(dl_cand)):
|
||||
found = it_d
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if found:
|
||||
referer = (found.get("full_metadata") or {}).get("referer") or found.get("detail_url") or found.get("path")
|
||||
except Exception:
|
||||
referer = referer
|
||||
|
||||
if referer:
|
||||
headers["Referer"] = str(referer)
|
||||
headers_arg = headers or None
|
||||
|
||||
out_dir = Path(output_dir or Path("."))
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
filename_hint = str(d.get("title") or f"vimm_{media_id or 'download'}")
|
||||
|
||||
with HTTPClient(timeout=60.0) as client:
|
||||
try:
|
||||
if download_url:
|
||||
resp = client.get(target, headers=headers_arg)
|
||||
elif params:
|
||||
resp = client.get(target, params=params, headers=headers_arg)
|
||||
else:
|
||||
resp = client.get(target, headers=headers_arg)
|
||||
except Exception as exc_get:
|
||||
try:
|
||||
detail_url = referer or target
|
||||
p = self._playwright_fetch(detail_url, out_dir, selector="form#dl_form button[type=submit]", timeout_sec=60)
|
||||
if p:
|
||||
debug(f"[vimm] downloaded via Playwright after get() error: {p}")
|
||||
return p
|
||||
except Exception as e:
|
||||
debug(f"[vimm] Playwright download failed after get() error: {e}")
|
||||
|
||||
debug(f"[vimm] HTTP GET failed (network): {exc_get}")
|
||||
return None
|
||||
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except Exception as exc:
|
||||
try:
|
||||
detail_url = referer or target
|
||||
p = self._playwright_fetch(detail_url, out_dir, selector="form#dl_form button[type=submit]", timeout_sec=60)
|
||||
if p:
|
||||
debug(f"[vimm] downloaded via Playwright after HTTP error: {p}")
|
||||
return p
|
||||
except Exception as e:
|
||||
debug(f"[vimm] Playwright download failed after HTTP error: {e}")
|
||||
|
||||
debug(f"[vimm] HTTP GET failed: {exc}")
|
||||
return None
|
||||
|
||||
content = getattr(resp, "content", b"") or b""
|
||||
cd = getattr(resp, "headers", {}).get("content-disposition", "") if hasattr(resp, "headers") else ""
|
||||
m = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', cd)
|
||||
if m:
|
||||
fname = m.group(1) or m.group(2)
|
||||
else:
|
||||
fname = filename_hint
|
||||
|
||||
out_path = out_dir / str(fname)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_path.write_bytes(content)
|
||||
return out_path
|
||||
except Exception as exc:
|
||||
debug(f"[vimm] download failed: {exc}")
|
||||
return None
|
||||
|
||||
def _playwright_fetch(self, detail_url: str, out_dir: Path, selector: str = "form#dl_form button[type=submit]", timeout_sec: int = 90) -> Optional[Path]:
|
||||
"""Attempt a browser-driven download using the shared Playwright tool.
|
||||
|
||||
Playwright is a required runtime dependency for this operation; import
|
||||
failures will surface at module import time rather than being silently
|
||||
swallowed by per-call guards.
|
||||
"""
|
||||
|
||||
# Prefer headful-first attempts for Vimm to mirror real browser behaviour
|
||||
cfg = {}
|
||||
try:
|
||||
from SYS.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
tool = PlaywrightTool(cfg)
|
||||
result = tool.download_file(
|
||||
detail_url,
|
||||
selector=selector,
|
||||
out_dir=out_dir,
|
||||
timeout_sec=timeout_sec,
|
||||
headless_first=False,
|
||||
debug_mode=False,
|
||||
)
|
||||
if result.ok and result.path:
|
||||
return result.path
|
||||
debug(f"[vimm] playwright helper failed: {result.error}")
|
||||
return None
|
||||
|
||||
def download(self, result: Any, output_dir: Path, progress_callback: Optional[Any] = None) -> Optional[Path]:
|
||||
"""Download an item identified on a Vimm detail page."""
|
||||
|
||||
payload = result.to_dict() if hasattr(result, "to_dict") else (result if isinstance(result, dict) else {})
|
||||
downloaded = self._download_from_payload(payload, output_dir)
|
||||
if downloaded:
|
||||
return downloaded
|
||||
|
||||
detail_url = str(payload.get("path") or "").strip()
|
||||
if not detail_url:
|
||||
return None
|
||||
|
||||
for row in self._fetch_detail_rows(detail_url):
|
||||
detail_payload = row.to_dict() if hasattr(row, "to_dict") else (row if isinstance(row, dict) else {})
|
||||
downloaded = self._download_from_payload(detail_payload, output_dir)
|
||||
if downloaded:
|
||||
return downloaded
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Minimal provider registration
|
||||
|
||||
# Minimal provider registration
|
||||
try:
|
||||
from SYS.result_table_adapters import register_plugin
|
||||
from SYS.result_table_api import ResultModel, title_column, metadata_column
|
||||
|
||||
def _convert_search_result_to_model(sr):
|
||||
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
|
||||
title = d.get("title") or ""
|
||||
path = d.get("path") or None
|
||||
columns = d.get("columns") or getattr(sr, "columns", None) or []
|
||||
metadata: Dict[str, Any] = {}
|
||||
for name, value in columns:
|
||||
key = str(name or "").strip().lower()
|
||||
if key in ("system", "region", "version", "languages", "size"):
|
||||
metadata[key] = value
|
||||
try:
|
||||
fm = d.get("full_metadata") or {}
|
||||
if isinstance(fm, dict):
|
||||
for k, v in fm.items():
|
||||
metadata[str(k).strip().lower()] = v
|
||||
except Exception:
|
||||
pass
|
||||
return ResultModel(title=str(title), path=str(path) if path else None, ext=None, size_bytes=None, metadata=metadata, source="vimm")
|
||||
|
||||
def _adapter(items):
|
||||
for it in items:
|
||||
yield _convert_search_result_to_model(it)
|
||||
|
||||
def _columns_factory(rows):
|
||||
cols = [title_column()]
|
||||
md = lambda key: any((r.metadata or {}).get(key) for r in rows)
|
||||
if md("system"):
|
||||
cols.append(metadata_column("system", "system"))
|
||||
if md("region"):
|
||||
cols.append(metadata_column("region", "Region"))
|
||||
if md("version"):
|
||||
cols.append(metadata_column("version", "Version"))
|
||||
if md("languages"):
|
||||
cols.append(metadata_column("languages", "Languages"))
|
||||
if md("size"):
|
||||
cols.append(metadata_column("size", "Size"))
|
||||
return cols
|
||||
|
||||
def _selection_fn(row):
|
||||
# Return explicit URL selection args so `select -run-cmd` and `@N` expansion
|
||||
# behave correctly when the downstream stage is a downloader (e.g., download-file).
|
||||
# Using '-url' is explicit and avoids ambiguity during argument parsing.
|
||||
if getattr(row, "path", None):
|
||||
return ["-url", row.path]
|
||||
return ["-title", row.title or ""]
|
||||
|
||||
|
||||
register_plugin(
|
||||
"vimm",
|
||||
_adapter,
|
||||
columns=_columns_factory,
|
||||
selection_fn=_selection_fn,
|
||||
metadata={"description": "Minimal Vimm provider"},
|
||||
)
|
||||
except Exception:
|
||||
# best-effort registration
|
||||
pass
|
||||
|
||||
@@ -1,218 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.provider_helpers import TableProviderMixin
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
class YouTube(TableProviderMixin, Provider):
|
||||
"""YouTube video search provider using yt_dlp.
|
||||
|
||||
This provider uses the new table system (strict ResultTable adapter pattern) for
|
||||
consistent selection and auto-stage integration. It exposes videos as SearchResult
|
||||
rows with metadata enrichment for:
|
||||
- video_id: Unique YouTube video identifier
|
||||
- uploader: Channel/creator name
|
||||
- duration: Video length in seconds
|
||||
- view_count: Number of views
|
||||
- _selection_args: For @N expansion control and download-file routing
|
||||
|
||||
SELECTION FLOW:
|
||||
1. User runs: search-file -plugin youtube "linux tutorial"
|
||||
2. Results show video rows with uploader, duration, views
|
||||
3. User selects a video: @1
|
||||
4. Selection metadata routes to download-file with the YouTube URL
|
||||
5. download-file uses yt_dlp to download the video
|
||||
"""
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
"youtube": ["download-file"],
|
||||
}
|
||||
# If the user provides extra args on the selection stage, forward them to download-file.
|
||||
AUTO_STAGE_USE_SELECTION_ARGS = True
|
||||
|
||||
@property
|
||||
def preserve_order(self) -> bool:
|
||||
return True
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
filters: Optional[Dict[str,
|
||||
Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
# Use the yt_dlp Python module (installed via requirements.txt).
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
|
||||
ydl_opts: Dict[str,
|
||||
Any] = {
|
||||
"quiet": True,
|
||||
"skip_download": True,
|
||||
"extract_flat": True
|
||||
}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
search_query = f"ytsearch{limit}:{query}"
|
||||
info = ydl.extract_info(search_query, download=False)
|
||||
entries = info.get("entries") or []
|
||||
results: List[SearchResult] = []
|
||||
for video_data in entries[:limit]:
|
||||
title = video_data.get("title", "Unknown")
|
||||
video_id = video_data.get("id", "")
|
||||
url = video_data.get(
|
||||
"url"
|
||||
) or f"https://youtube.com/watch?v={video_id}"
|
||||
uploader = video_data.get("uploader", "Unknown")
|
||||
duration = video_data.get("duration", 0)
|
||||
view_count = video_data.get("view_count", 0)
|
||||
|
||||
duration_str = (
|
||||
f"{int(duration // 60)}:{int(duration % 60):02d}"
|
||||
if duration else ""
|
||||
)
|
||||
views_str = f"{view_count:,}" if view_count else ""
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="youtube",
|
||||
title=title,
|
||||
path=url,
|
||||
detail=f"By: {uploader}",
|
||||
annotations=[duration_str,
|
||||
f"{views_str} views"],
|
||||
media_kind="video",
|
||||
columns=[
|
||||
("Title",
|
||||
title),
|
||||
("Uploader",
|
||||
uploader),
|
||||
("Duration",
|
||||
duration_str),
|
||||
("Views",
|
||||
views_str),
|
||||
],
|
||||
full_metadata={
|
||||
"video_id": video_id,
|
||||
"uploader": uploader,
|
||||
"duration": duration,
|
||||
"view_count": view_count,
|
||||
# Selection metadata for table system and @N expansion
|
||||
"_selection_args": ["-url", url],
|
||||
},
|
||||
)
|
||||
)
|
||||
return results
|
||||
except Exception:
|
||||
log("[youtube] yt_dlp import failed", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# Minimal provider registration for the new table system
|
||||
try:
|
||||
from SYS.result_table_adapters import register_plugin
|
||||
from SYS.result_table_api import ResultModel, ColumnSpec, metadata_column, title_column
|
||||
|
||||
def _convert_search_result_to_model(sr: Any) -> ResultModel:
|
||||
"""Convert YouTube SearchResult to ResultModel for strict table display."""
|
||||
d = sr.to_dict() if hasattr(sr, "to_dict") else (sr if isinstance(sr, dict) else {"title": getattr(sr, "title", str(sr))})
|
||||
title = d.get("title") or ""
|
||||
path = d.get("path") or None
|
||||
columns = d.get("columns") or getattr(sr, "columns", None) or []
|
||||
|
||||
# Extract metadata from columns and full_metadata
|
||||
metadata: Dict[str, Any] = {}
|
||||
for name, value in columns:
|
||||
key = str(name or "").strip().lower()
|
||||
if key in ("uploader", "duration", "views", "video_id"):
|
||||
metadata[key] = value
|
||||
|
||||
try:
|
||||
fm = d.get("full_metadata") or {}
|
||||
if isinstance(fm, dict):
|
||||
for k, v in fm.items():
|
||||
metadata[str(k).strip().lower()] = v
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ResultModel(
|
||||
title=str(title),
|
||||
path=str(path) if path else None,
|
||||
ext=None,
|
||||
size_bytes=None,
|
||||
metadata=metadata,
|
||||
source="youtube"
|
||||
)
|
||||
|
||||
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
|
||||
"""Adapter to convert SearchResults to ResultModels."""
|
||||
for it in items:
|
||||
try:
|
||||
yield _convert_search_result_to_model(it)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _has_metadata(rows: List[ResultModel], key: str) -> bool:
|
||||
"""Check if any row has a given metadata key with a non-empty value."""
|
||||
for row in rows:
|
||||
md = row.metadata or {}
|
||||
if key in md:
|
||||
val = md[key]
|
||||
if val is None:
|
||||
continue
|
||||
if isinstance(val, str) and not val.strip():
|
||||
continue
|
||||
return True
|
||||
return False
|
||||
|
||||
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
|
||||
"""Build column specifications from available metadata in rows."""
|
||||
cols = [title_column()]
|
||||
if _has_metadata(rows, "uploader"):
|
||||
cols.append(metadata_column("uploader", "Uploader"))
|
||||
if _has_metadata(rows, "duration"):
|
||||
cols.append(metadata_column("duration", "Duration"))
|
||||
if _has_metadata(rows, "views"):
|
||||
cols.append(metadata_column("views", "Views"))
|
||||
return cols
|
||||
|
||||
def _selection_fn(row: ResultModel) -> List[str]:
|
||||
"""Return selection args for @N expansion and auto-download integration.
|
||||
|
||||
Uses explicit -url flag to ensure the YouTube URL is properly routed
|
||||
to download-file for yt_dlp download processing.
|
||||
"""
|
||||
metadata = row.metadata or {}
|
||||
|
||||
# Check for explicit selection args first
|
||||
args = metadata.get("_selection_args") or metadata.get("selection_args")
|
||||
if isinstance(args, (list, tuple)) and args:
|
||||
return [str(x) for x in args if x is not None]
|
||||
|
||||
# Fallback to direct URL
|
||||
if row.path:
|
||||
return ["-url", row.path]
|
||||
|
||||
return ["-title", row.title or ""]
|
||||
|
||||
register_plugin(
|
||||
"youtube",
|
||||
_adapter,
|
||||
columns=_columns_factory,
|
||||
selection_fn=_selection_fn,
|
||||
metadata={"description": "YouTube video search using yt_dlp"},
|
||||
)
|
||||
except Exception:
|
||||
# best-effort registration
|
||||
pass
|
||||
-1432
File diff suppressed because it is too large
Load Diff
@@ -1,72 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from ProviderCore.base import Provider
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
class ZeroXZero(Provider):
|
||||
"""File provider for 0x0.st."""
|
||||
|
||||
PLUGIN_NAME = "0x0"
|
||||
PLUGIN_ALIASES = ("zeroxzero",)
|
||||
|
||||
def upload(self, file_path: str, **kwargs: Any) -> str:
|
||||
from API.HTTP import HTTPClient
|
||||
from SYS.models import ProgressFileReader
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Medeia-Macina/1.0"
|
||||
}
|
||||
with HTTPClient(headers=headers) as client:
|
||||
with open(file_path, "rb") as handle:
|
||||
try:
|
||||
total = os.path.getsize(file_path)
|
||||
except Exception:
|
||||
total = None
|
||||
wrapped = ProgressFileReader(
|
||||
handle,
|
||||
total_bytes=total,
|
||||
label="upload"
|
||||
)
|
||||
response = client.post(
|
||||
"https://0x0.st",
|
||||
files={
|
||||
"file": wrapped
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
uploaded_url = response.text.strip()
|
||||
try:
|
||||
pipe_obj = kwargs.get("pipe_obj")
|
||||
if pipe_obj is not None:
|
||||
from Store import Store
|
||||
|
||||
Store(
|
||||
self.config,
|
||||
suppress_debug=True
|
||||
).try_add_url_for_pipe_object(pipe_obj,
|
||||
uploaded_url)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return uploaded_url
|
||||
|
||||
raise Exception(
|
||||
f"Upload failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[0x0] Upload error: {exc}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
Reference in New Issue
Block a user